On Wed, Dec 04, 2019 at 10:07:05AM +0800, Hongtao Liu wrote:
> Changelog
> gcc/
>       PR target/92686
>       * config/i386/sse.md
>       (*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>,
>       *<avx512>_cmp<mode>3<mask_scalar_merge_name>,
>       *<avx512>_ucmp<mode>3<mask_scalar_merge_name>,
>       *<avx512>_ucmp<mode>3<mask_scalar_merge_name>): New.
>       * config/i386/i386.c (ix86_print_operand): New operand substitution.
>       * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode):
>       New function.
>       (ix86_expand_sse_cmp): Relax condition for integer mask from
>       512-bit vector to all 128/256/512-bit vector. Delete code gen
>       for avx512f compare patterns since we have generic pattern now.
>       (ix86_expand_sse_movcc): Adjust condition and codegen for
>       maskcmp.
>       (ix86_expand_int_sse_cmp): Don't canonicalize the comparison
>       when corresponding vector compare is available.
> 
> gcc/testsuite/
>       * gcc.target/i386/pr92686.inc: New file.
>       * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test.
>       * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
>       * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
>       * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
>       * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto.
>       * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
>       * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
>       * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
>       * gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase.
>       * gcc.target/i386/pr88547-1.c: Ditto.

See comments below.

> +  /* AVX512BW is needed for vector QI/HImode,
> +     AVX512VL is needed for 128/256-bit vector.  */
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  int vector_size = GET_MODE_SIZE (mode);
> +  if ((inner_mode == QImode || inner_mode == HImode)
> +      && !TARGET_AVX512BW)

There is no reason not to keep && !TARGET_AVX512BW) on the previous line.

> +  if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
>      {
>        unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
> -      cmp_mode = int_mode_for_size (nbits, 0).require ();
>        maskcmp = true;
> +      cmp_mode = nbits > 8 ?
> +     int_mode_for_size (nbits, 0).require ()
> +     : E_QImode;

Formatting.  ? never goes at the end of line, similarly :.
So, you want either
      cmp_mode
        = nbits > 8 ? int_mode_for_size (nbits, 0).require () : E_QImode;
or
      cmp_mode = (nbits > 8 ? int_mode_for_size (nbits, 0).require ()
                  : E_QImode);
or
      cmp_mode = (nbits > 8
                  ? int_mode_for_size (nbits, 0).require () : E_QImode);
or
      cmp_mode = (nbits > 8
                  ? int_mode_for_size (nbits, 0).require ()
                  : E_QImode);
- the parens around to help emacs.

> @@ -3515,7 +3510,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
> rtx op_false)
>    machine_mode cmpmode = GET_MODE (cmp);
>  
>    /* In AVX512F the result of comparison is an integer mask.  */
> -  bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
> +  bool maskcmp = ((mode != cmpmode) && ix86_valid_mask_cmp_mode (mode));

No reason for either pair of ()s here, of course except the function call
argument list.

> +      /* Using vector move with mask register.  */
> +      cmp = force_reg (cmpmode, cmp);
> +      /* Optimize for mask zero.  */
> +      op_true = op_true != CONST0_RTX (mode)
> +     ? force_reg (mode, op_true)
> +     : op_true;

Same thing as above, just in this case ? wasn't incorrectly at the end.

> +      op_false = op_false != CONST0_RTX (mode)
> +     ? force_reg (mode, op_false)
> +     : op_false;

And again.

> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12468,6 +12468,38 @@ ix86_print_operand (FILE *file, rtx x, int code)
>           }
>         return;
>  
> +     case 'I':
> +       switch (GET_CODE (x))
> +         {
> +         case EQ:
> +           fputs ("$0", file);
> +           break;
> +         case NE:
> +           fputs ("$4", file);
> +           break;
> +         case GE:
> +         case GEU:
> +           fputs ("$5", file);
> +           break;
> +         case GT:
> +         case GTU:
> +           fputs ("$6", file);
> +           break;
> +         case LE:
> +         case LEU:
> +           fputs ("$2", file);
> +           break;
> +         case LT:
> +         case LTU:
> +           fputs ("$1", file);
> +           break;

Does this work with -masm=intel?  I'd guess that $1 etc. isn't valid.
If not, either I should print just the number without $ and $ should be
added only for the AT&T syntax, or the above should print or not print the
$ depending on the asm syntax.

> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -3050,6 +3050,18 @@
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>  
> +(define_insn 
> "*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
> +  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> +     (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
> +       [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> +        (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" 
> "<round_saeonly_constraint>")]))]
> +  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
> +  "vpcmp<ssemodesuffix>\t{%I3, <round_saeonly_mask_scalar_merge_op4>%2, %1, 
> %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, 
> %2<round_saeonly_mask_scalar_merge_op4>, %I3}"
> +  [(set_attr "type" "ssecmp")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
>    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
>       (unspec:<avx512fmaskmode>

> +      abort();
> +    }
> +  abort();
> +}
> \ No newline at end of file

Please make sure all files are newline terminated.

        Jakub

Reply via email to