On Wed, Jan 8, 2020 at 9:09 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> This is very similar to the previous PR93141 addv<mode>4 half and
> improves signed __builtin_sub_overflow on double-words rather than
> __builtin_add_overflow.
>
> I have left out the uaddv<mode>4 double-word stuff, because I ran into
> issues with it - as the pattern uses (set (reg:CC flags) (compare:CC (reg:TI) 
> (reg:TI)))
> as the first set, ifcvt.c (canonicalize_condition it calls) extracts
> the TImode comparison arguments from it, but as we don't have a cmpti3
> pattern, noce_try_store_flag fails and we don't manage to convert branchy
> code into setc.  Even for smaller modes ifcvt in that case does weird
> things, after the sub instruction which sets the flags too it emits
> a cmp with the same arguments and only postreload we notice the compare is
> redundant and remove it.  So, dunno if we want to improve somehow ifcvt
> and for conditions where we'd look through a multiple set instructions
> somehow try harder to reuse the original instruction, or if usub<dwi>4
> should not use compare:CC of the operands and instead use some obfuscation
> to only set CCCmode to prevent ifcvt from punting on it.
>
> Anyway, this part works, bootstrapped/regtested on x86_64-linux and
> i686-linux, ok for trunk?
>
> 2020-01-07  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/93141
>         (subv<mode>4): Use SWIDWI iterator instead of SWI.  Use
>         <general_hilo_operand> instead of <general_operand>.  Use
>         CONST_SCALAR_INT_P instead of CONST_INT_P.
>         (*subv<mode>4_1): Rename to ...
>         (subv<mode>4_1): ... this.
>         (*subv<dwi>4_doubleword, *addv<dwi>4_doubleword_1): New
>         define_insn_and_split patterns.
>         (*subv<mode>4_overflow_1, *addv<mode>4_overflow_2): New define_insn
>         patterns.
>
>         * gcc.target/i386/pr93141-1.c: Add tests with constants that have MSB
>         of the low half of the constant set.
>         * gcc.target/i386/pr93141-2.c: New test.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2020-01-07 08:01:22.618613702 +0100
> +++ gcc/config/i386/i386.md     2020-01-07 10:07:01.770587027 +0100
> @@ -6569,16 +6569,17 @@ (define_insn "*subsi_2_zext"
>  ;; Subtract with jump on overflow.
>  (define_expand "subv<mode>4"
>    [(parallel [(set (reg:CCO FLAGS_REG)
> -                  (eq:CCO (minus:<DWI>
> -                             (sign_extend:<DWI>
> -                                (match_operand:SWI 1 "nonimmediate_operand"))
> -                             (match_dup 4))
> -                          (sign_extend:<DWI>
> -                             (minus:SWI (match_dup 1)
> -                                        (match_operand:SWI 2
> -                                           "<general_operand>")))))
> -             (set (match_operand:SWI 0 "register_operand")
> -                  (minus:SWI (match_dup 1) (match_dup 2)))])
> +                  (eq:CCO
> +                    (minus:<DWI>
> +                      (sign_extend:<DWI>
> +                        (match_operand:SWIDWI 1 "nonimmediate_operand"))
> +                      (match_dup 4))
> +                    (sign_extend:<DWI>
> +                      (minus:SWIDWI (match_dup 1)
> +                                    (match_operand:SWIDWI 2
> +                                               "<general_hilo_operand>")))))
> +             (set (match_operand:SWIDWI 0 "register_operand")
> +                  (minus:SWIDWI (match_dup 1) (match_dup 2)))])
>     (set (pc) (if_then_else
>                (eq (reg:CCO FLAGS_REG) (const_int 0))
>                (label_ref (match_operand 3))
> @@ -6586,7 +6587,7 @@ (define_expand "subv<mode>4"
>    ""
>  {
>    ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
> -  if (CONST_INT_P (operands[2]))
> +  if (CONST_SCALAR_INT_P (operands[2]))
>      operands[4] = operands[2];
>    else
>      operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
> @@ -6608,7 +6609,7 @@ (define_insn "*subv<mode>4"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "<MODE>")])
>
> -(define_insn "*subv<mode>4_1"
> +(define_insn "subv<mode>4_1"
>    [(set (reg:CCO FLAGS_REG)
>         (eq:CCO (minus:<DWI>
>                    (sign_extend:<DWI>
> @@ -6633,6 +6634,162 @@ (define_insn "*subv<mode>4_1"
>                   (const_string "4")]
>               (const_string "<MODE_SIZE>")))])
>
> +(define_insn_and_split "*subv<dwi>4_doubleword"
> +  [(set (reg:CCO FLAGS_REG)
> +       (eq:CCO
> +         (minus:<QWI>
> +           (sign_extend:<QWI>
> +             (match_operand:<DWI> 1 "nonimmediate_operand" "0,0"))
> +           (sign_extend:<QWI>
> +             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" 
> "r<di>,o")))
> +         (sign_extend:<QWI>
> +           (minus:<DWI> (match_dup 1) (match_dup 2)))))
> +   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
> +       (minus:<DWI> (match_dup 1) (match_dup 2)))]
> +  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
> +  "#"
> +  "reload_completed"
> +  [(parallel [(set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 1) (match_dup 2)))
> +             (set (match_dup 0)
> +                  (minus:DWIH (match_dup 1) (match_dup 2)))])
> +   (parallel [(set (reg:CCO FLAGS_REG)
> +                  (eq:CCO
> +                    (minus:<DWI>
> +                      (minus:<DWI>
> +                        (sign_extend:<DWI> (match_dup 4))
> +                        (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
> +                      (sign_extend:<DWI> (match_dup 5)))
> +                    (sign_extend:<DWI>
> +                      (minus:DWIH
> +                        (minus:DWIH
> +                          (match_dup 4)
> +                          (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
> +                        (match_dup 5)))))
> +             (set (match_dup 3)
> +                  (minus:DWIH
> +                    (minus:DWIH
> +                      (match_dup 4)
> +                      (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
> +                    (match_dup 5)))])]
> +{
> +  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
> +})
> +
> +(define_insn_and_split "*subv<dwi>4_doubleword_1"
> +  [(set (reg:CCO FLAGS_REG)
> +       (eq:CCO
> +         (minus:<QWI>
> +           (sign_extend:<QWI>
> +             (match_operand:<DWI> 1 "nonimmediate_operand" "0"))
> +           (match_operand:<QWI> 3 "const_scalar_int_operand" ""))
> +         (sign_extend:<QWI>
> +           (minus:<DWI>
> +             (match_dup 1)
> +             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
> +   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
> +       (minus:<DWI> (match_dup 1) (match_dup 2)))]
> +  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
> +   && CONST_SCALAR_INT_P (operands[2])
> +   && rtx_equal_p (operands[2], operands[3])"
> +  "#"
> +  "reload_completed"
> +  [(parallel [(set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 1) (match_dup 2)))
> +             (set (match_dup 0)
> +                  (minus:DWIH (match_dup 1) (match_dup 2)))])
> +   (parallel [(set (reg:CCO FLAGS_REG)
> +                  (eq:CCO
> +                    (minus:<DWI>
> +                      (minus:<DWI>
> +                        (sign_extend:<DWI> (match_dup 4))
> +                        (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
> +                      (match_dup 5))
> +                    (sign_extend:<DWI>
> +                      (minus:DWIH
> +                        (minus:DWIH
> +                          (match_dup 4)
> +                          (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
> +                        (match_dup 5)))))
> +             (set (match_dup 3)
> +                  (minus:DWIH
> +                    (minus:DWIH
> +                      (match_dup 4)
> +                      (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
> +                    (match_dup 5)))])]
> +{
> +  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
> +  if (operands[2] == const0_rtx)
> +    {
> +      emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
> +                                   operands[5]));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*subv<mode>4_overflow_1"
> +  [(set (reg:CCO FLAGS_REG)
> +       (eq:CCO
> +         (minus:<DWI>
> +           (minus:<DWI>
> +             (sign_extend:<DWI>
> +               (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
> +             (match_operator:<DWI> 4 "ix86_carry_flag_operator"
> +               [(match_operand 3 "flags_reg_operand") (const_int 0)]))
> +           (sign_extend:<DWI>
> +             (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
> +         (sign_extend:<DWI>
> +           (minus:SWI
> +             (minus:SWI
> +               (match_dup 1)
> +               (match_operator:SWI 5 "ix86_carry_flag_operator"
> +                 [(match_dup 3) (const_int 0)]))
> +             (match_dup 2)))))
> +   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
> +       (minus:SWI
> +         (minus:SWI
> +           (match_dup 1)
> +           (match_op_dup 5 [(match_dup 3) (const_int 0)]))
> +         (match_dup 2)))]
> +  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
> +  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
> +  [(set_attr "type" "alu")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "*subv<mode>4_overflow_2"
> +  [(set (reg:CCO FLAGS_REG)
> +       (eq:CCO
> +         (minus:<DWI>
> +           (minus:<DWI>
> +             (sign_extend:<DWI>
> +               (match_operand:SWI 1 "nonimmediate_operand" "%0"))
> +             (match_operator:<DWI> 4 "ix86_carry_flag_operator"
> +               [(match_operand 3 "flags_reg_operand") (const_int 0)]))
> +           (match_operand:<DWI> 6 "const_int_operand" ""))
> +         (sign_extend:<DWI>
> +           (minus:SWI
> +             (minus:SWI
> +               (match_dup 1)
> +               (match_operator:SWI 5 "ix86_carry_flag_operator"
> +                 [(match_dup 3) (const_int 0)]))
> +             (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
> +   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
> +       (minus:SWI
> +         (minus:SWI
> +           (match_dup 1)
> +           (match_op_dup 5 [(match_dup 3) (const_int 0)]))
> +         (match_dup 2)))]
> +  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
> +   && CONST_INT_P (operands[2])
> +   && INTVAL (operands[2]) == INTVAL (operands[6])"
> +  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
> +  [(set_attr "type" "alu")
> +   (set_attr "mode" "<MODE>")
> +   (set (attr "length_immediate")
> +     (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
> +       (const_string "1")
> +       (const_string "4")))])
> +
>  (define_expand "usubv<mode>4"
>    [(parallel [(set (reg:CC FLAGS_REG)
>                    (compare:CC
> --- gcc/testsuite/gcc.target/i386/pr93141-1.c.jj        2020-01-05 
> 13:51:34.452627004 +0100
> +++ gcc/testsuite/gcc.target/i386/pr93141-1.c   2020-01-07 10:45:36.213489037 
> +0100
> @@ -2,15 +2,17 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -masm=att" } */
>  /* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */
> -/* { dg-final { scan-assembler-times "setc\t%" 3 } } */
> -/* { dg-final { scan-assembler-times "seto\t%" 5 } } */
> -/* { dg-final { scan-assembler-times "adc\[lq]\t" 5 } } */
> +/* { dg-final { scan-assembler-times "setc\t%" 5 } } */
> +/* { dg-final { scan-assembler-times "seto\t%" 7 } } */
> +/* { dg-final { scan-assembler-times "adc\[lq]\t" 9 } } */
>
>  #ifdef __x86_64__
>  typedef unsigned __int128 U;
> +typedef unsigned long long HU;
>  typedef signed __int128 S;
>  #else
>  typedef unsigned long long U;
> +typedef unsigned int HU;
>  typedef signed long long S;
>  #endif
>  int o;
> @@ -81,3 +83,39 @@ garply (S x)
>                                  | (S) 0xbeedead, &z);
>    return z;
>  }
> +
> +S
> +waldo (S x)
> +{
> +  S z;
> +  o = __builtin_add_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2))
> +                                     | -(HU) 0xbeedead), &z);
> +  return z;
> +}
> +
> +S
> +fred (S x)
> +{
> +  S z;
> +  o = __builtin_add_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2)))
> +                                     | -(HU) 0xbeedead), &z);
> +  return z;
> +}
> +
> +U
> +plugh (U x)
> +{
> +  U z;
> +  o = __builtin_add_overflow (x, (U) ((((U) 0xdeadbee) << (sizeof (U) * 
> __CHAR_BIT__ / 2))
> +                                     | -(HU) 0xbeedead), &z);
> +  return z;
> +}
> +
> +U
> +xyzzy (U x)
> +{
> +  U z;
> +  o = __builtin_add_overflow (x, (U) ((-(((U) 0xdeadbee) << (sizeof (U) * 
> __CHAR_BIT__ / 2)))
> +                                     | -(HU) 0xbeedead), &z);
> +  return z;
> +}
> --- gcc/testsuite/gcc.target/i386/pr93141-2.c.jj        2020-01-07 
> 10:30:53.966831451 +0100
> +++ gcc/testsuite/gcc.target/i386/pr93141-2.c   2020-01-07 10:43:27.602433611 
> +0100
> @@ -0,0 +1,78 @@
> +/* PR target/93141 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */
> +/* { dg-final { scan-assembler-not "adc\[lq]\t" } } */
> +/* { dg-final { scan-assembler-times "seto\t%" 7 } } */
> +/* { dg-final { scan-assembler-times "sbb\[lq]\t" 5 } } */
> +
> +#ifdef __x86_64__
> +typedef unsigned __int128 U;
> +typedef unsigned long long HU;
> +typedef signed __int128 S;
> +#else
> +typedef unsigned long long U;
> +typedef signed int HU;
> +typedef signed long long S;
> +#endif
> +int o;
> +
> +S
> +qux (S x, S y)
> +{
> +  S z;
> +  o = __builtin_sub_overflow (x, y, &z);
> +  return z;
> +}
> +
> +S
> +quux (S x)
> +{
> +  S z;
> +  o = __builtin_sub_overflow (x, ((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2), &z);
> +  return z;
> +}
> +
> +S
> +corge (S x)
> +{
> +  S z;
> +  o = __builtin_sub_overflow (x, (((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2))
> +                                | (S) 0xbeedead, &z);
> +  return z;
> +}
> +
> +S
> +grault (S x)
> +{
> +  S z;
> +  o = __builtin_sub_overflow (x, -((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2), &z);
> +  return z;
> +}
> +
> +S
> +garply (S x)
> +{
> +  S z;
> +  o = __builtin_sub_overflow (x, (-(((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2)))
> +                                | (S) 0xbeedead, &z);
> +  return z;
> +}
> +
> +S
> +waldo (S x)
> +{
> +  S z;
> +  o = __builtin_sub_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2))
> +                                     | -(HU) 0xbeedead), &z);
> +  return z;
> +}
> +
> +S
> +fred (S x)
> +{
> +  S z;
> +  o = __builtin_sub_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * 
> __CHAR_BIT__ / 2)))
> +                                     | -(HU) 0xbeedead), &z);
> +  return z;
> +}
>
>         Jakub
>

Reply via email to