Hi Aaron,

On Wed, Aug 22, 2018 at 12:31:51PM -0500, Aaron Sawdey wrote:
> This patch teaches rs6000 inline expansion of strcmp/strncmp how to
> generate vector/vsx code for power8/power9 processors. Compares 16
> bytes and longer are generated using the vector code, which is
> considerably faster than the gpr based code. 

:-)

>       (expand_strn_compare): Support for vector strncmp.

"Add support"?

> -(define_insn "*altivec_vcmpequ<VI_char>_p"
> +(define_insn "altivec_vcmpequ<VI_char>_p"
>    [(set (reg:CC CR6_REGNO)
>       (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
>                          (match_operand:VI2 2 "register_operand" "v"))]

You missed this one in the changelog.

> --- gcc/config/rs6000/rs6000-string.c (revision 263753)
> +++ gcc/config/rs6000/rs6000-string.c (working copy)
> @@ -157,6 +157,29 @@
>  {
>    switch (GET_MODE (reg))
>      {
> +    case E_V16QImode:
> +      switch (mode) {
> +      case E_V16QImode:
> +     if (!BYTES_BIG_ENDIAN) 

This has a trailing space.

> +       if (TARGET_P9_VECTOR)
> +         emit_insn (gen_vsx_ld_elemrev_v16qi_internal (reg, mem));
> +       else
> +         {
> +           rtx reg_v2di = simplify_gen_subreg (V2DImode, reg, V16QImode, 0);
> +           gcc_assert (MEM_P (mem));
> +           rtx addr = XEXP (mem, 0);
> +           rtx mem_v2di = gen_rtx_MEM (V2DImode, addr);
> +           MEM_COPY_ATTRIBUTES (mem_v2di, mem);
> +           set_mem_size (mem, GET_MODE_SIZE (V2DImode));
> +           emit_insn (gen_vsx_ld_elemrev_v2di (reg_v2di, mem_v2di));
> +         }
> +     else

If you have to outdent more than one, you usually should put the inner if
in a block of its own.  Another option is to make a helper function, if you
can come up with something for which you can make a sane and short name.

> +       emit_insn (gen_vsx_movv2di_64bit (reg, mem));
> +     break;
> +      default:
> +     gcc_unreachable ();
> +      }
> +      break;

This cannot be correct (same indent for break and preceding } ).

> +      switch (mode)
> +     {
> +     case E_QImode:
> +       emit_move_insn (reg, mem);
> +       break;
> +     default:
> +       debug_rtx(reg);
> +       gcc_unreachable ();
> +       break;
> +     }

Unless you have a reason you want to keep debug_rtx here, why not just do

    gcc_assert (mode == E_QImode);
    emit_move_insn (reg, mem);

> +static void
> +expand_strncmp_vec_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
> +                         rtx orig_src1, rtx orig_src2,
> +                         rtx s1addr, rtx s2addr, rtx off_reg,
> +                         rtx s1data, rtx s2data,
> +                         rtx vec_result, bool equality_compare_rest,
> +                         rtx &cleanup_label, rtx final_move_label)

Space before (.  Wow what a monster :-)

Why use a reference instead of a pointer for cleanup_label?  Will that work
even, it seems to allow NULL?

> +  unsigned int i;
> +  rtx zr[16];
> +  for (i = 0; i < 16; i++)
> +    zr[i] = GEN_INT (0);
> +  rtvec zv = gen_rtvec_v (16, zr);
> +  rtx zero_reg = gen_reg_rtx (V16QImode);
> +  rs6000_expand_vector_init (zero_reg, gen_rtx_PARALLEL (V16QImode, zv));

Is there no helper for this already?  Should there be?

> +       if (extra_bytes < offset)
> +         {
> +           offset -= extra_bytes;
> +           cmp_bytes = load_mode_size;
> +           bytes_to_compare = cmp_bytes;
> +         }
> +       else
> +         /* We do not call this with less than 16 bytes.  */
> +         gcc_unreachable ();

          gcc_assert (offset > extra_bytes);
          offset -= extra_bytes;
          cmp_bytes = load_mode_size;
          bytes_to_compare = cmp_bytes;

> +  /* Is it OK to use vec/vsx for this. TARGET_VSX means we have at
> +     least POWER7 but we use TARGET_EFFICIENT_UNALIGNED_VSX which is
> +     at least POWER8. That way we can rely on overlapping compares to
> +     do the final comparison of less than 16 bytes. Also I do not want
> +     to deal with making this work for 32 bits.  */

Two spaces after full stop.

> +  if (use_vec)
> +    {
> +      s1addr = gen_reg_rtx (Pmode);
> +      s2addr = gen_reg_rtx (Pmode);
> +      off_reg = gen_reg_rtx (Pmode);
> +      vec_result = gen_reg_rtx (load_mode);
> +      emit_move_insn (result_reg, GEN_INT (0));
> +      expand_strncmp_vec_sequence(compare_length,

Space before (.

Please fix these nits, and then it is okay for trunk.  Thanks!


Segher

Reply via email to