On Mon, May 26, 2025 at 4:55 PM Hu, Lin1 <lin1...@intel.com> wrote:
>
> Hi, all
>
> Enable -mapxf will change some patterns about adc/sbb.
>
> Hence gcc will raise an extra mov like
>          movq    8(%rdi), %rax
>          adcq    %rax, 8(%rsi), %rax
>          movq    %rax, 8(%rdi)
> rather than
>          movq    8(%rsi), %rax
>          adcq    %rax, 8(%rdi)
>
> The patch add more kinds of peephole2 to eliminate the extra mov.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?
Ok.
>
> BRs,
> Lin
>
> gcc/ChangeLog:
>
>         * config/i386/i386.md: Add 4 new peephole2 by swap the original
>         peephole2's operands' order to support new pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr79173-13.c: New test.
>         * gcc.target/i386/pr79173-14.c: Ditto.
>         * gcc.target/i386/pr79173-15.c: Ditto.
>         * gcc.target/i386/pr79173-16.c: Ditto.
>         * gcc.target/i386/pr79173-17.c: Ditto.
>         * gcc.target/i386/pr79173-18.c: Ditto.
> ---
>  gcc/config/i386/i386.md                    | 186 +++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr79173-13.c |  59 +++++++
>  gcc/testsuite/gcc.target/i386/pr79173-14.c |  59 +++++++
>  gcc/testsuite/gcc.target/i386/pr79173-15.c |  61 +++++++
>  gcc/testsuite/gcc.target/i386/pr79173-16.c |  61 +++++++
>  gcc/testsuite/gcc.target/i386/pr79173-17.c |  32 ++++
>  gcc/testsuite/gcc.target/i386/pr79173-18.c |  33 ++++
>  7 files changed, 491 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr79173-13.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr79173-14.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr79173-15.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr79173-16.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr79173-17.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr79173-18.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index b7a18d583da..4c9cb81d5f9 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -8719,6 +8719,34 @@ (define_peephole2
>               (set (match_dup 1)
>                    (minus:SWI (match_dup 1) (match_dup 0)))])])
>
> +;; Under APX NDD, 'sub reg, mem, reg' is valid.
> +;; New format for
> +;; mov reg0, mem1
> +;; sub reg0, mem2, reg0
> +;; mov mem2, reg0
> +;; to
> +;; mov reg0, mem1
> +;; sub mem2, reg0
> +(define_peephole2
> +  [(set (match_operand:SWI 0 "general_reg_operand")
> +       (match_operand:SWI 1 "memory_operand"))
> +   (parallel [(set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_operand:SWI 2 "memory_operand")
> +                              (match_dup 0)))
> +             (set (match_dup 0)
> +                  (minus:SWI (match_dup 2) (match_dup 0)))])
> +   (set (match_dup 2) (match_dup 0))]
> +  "TARGET_APX_NDD
> +   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
> +   && peep2_reg_dead_p (3, operands[0])
> +   && !reg_overlap_mentioned_p (operands[0], operands[1])
> +   && !reg_overlap_mentioned_p (operands[0], operands[2])"
> +  [(set (match_dup 0) (match_dup 1))
> +   (parallel [(set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 2) (match_dup 0)))
> +             (set (match_dup 2)
> +                  (minus:SWI (match_dup 2) (match_dup 0)))])])
> +
>  ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
>  ;; subl $1, %eax; jnc .Lxx;
>  (define_peephole2
> @@ -9166,6 +9194,118 @@ (define_peephole2
>                                            (match_dup 1))
>                                (match_dup 0)))])])
>
> +;; Under APX NDD, 'adc reg, mem, reg' is valid.
> +;;
> +;; New format for
> +;; mov reg0, mem1
> +;; adc reg0, mem2, reg0
> +;; mov mem1, reg0
> +;; to
> +;; mov reg0, mem2
> +;; adc mem1, reg0
> +(define_peephole2
> +  [(set (match_operand:SWI48 0 "general_reg_operand")
> +       (match_operand:SWI48 1 "memory_operand"))
> +   (parallel [(set (reg:CCC FLAGS_REG)
> +                  (compare:CCC
> +                    (zero_extend:<DWI>
> +                      (plus:SWI48
> +                        (plus:SWI48
> +                          (match_operator:SWI48 5 "ix86_carry_flag_operator"
> +                            [(match_operand 3 "flags_reg_operand")
> +                             (const_int 0)])
> +                          (match_operand:SWI48 2 "memory_operand"))
> +                        (match_dup 0)))
> +                    (plus:<DWI>
> +                      (match_operator:<DWI> 4 "ix86_carry_flag_operator"
> +                        [(match_dup 3) (const_int 0)])
> +                      (zero_extend:<DWI> (match_dup 0)))))
> +             (set (match_dup 0)
> +                  (plus:SWI48 (plus:SWI48 (match_op_dup 5
> +                                            [(match_dup 3) (const_int 0)])
> +                                          (match_dup 2))
> +                              (match_dup 0)))])
> +   (set (match_dup 1) (match_dup 0))]
> +  "TARGET_APX_NDD
> +   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
> +   && peep2_reg_dead_p (3, operands[0])
> +   && !reg_overlap_mentioned_p (operands[0], operands[1])
> +   && !reg_overlap_mentioned_p (operands[0], operands[2])"
> +  [(set (match_dup 0) (match_dup 2))
> +   (parallel [(set (reg:CCC FLAGS_REG)
> +                  (compare:CCC
> +                    (zero_extend:<DWI>
> +                      (plus:SWI48
> +                        (plus:SWI48
> +                          (match_op_dup 5
> +                            [(match_dup 3) (const_int 0)])
> +                          (match_dup 1))
> +                        (match_dup 0)))
> +                    (plus:<DWI>
> +                      (match_op_dup 4
> +                        [(match_dup 3) (const_int 0)])
> +                      (zero_extend:<DWI> (match_dup 0)))))
> +             (set (match_dup 1)
> +                  (plus:SWI48 (plus:SWI48 (match_op_dup 5
> +                                            [(match_dup 3) (const_int 0)])
> +                                          (match_dup 1))
> +                              (match_dup 0)))])])
> +
> +;; New format for
> +;; mov reg0, mem1
> +;; adc reg0, mem2, reg0
> +;; mov mem2, reg0
> +;; to
> +;; mov reg0, mem1
> +;; adc mem2, reg0
> +(define_peephole2
> +  [(set (match_operand:SWI48 0 "general_reg_operand")
> +       (match_operand:SWI48 1 "memory_operand"))
> +   (parallel [(set (reg:CCC FLAGS_REG)
> +                  (compare:CCC
> +                    (zero_extend:<DWI>
> +                      (plus:SWI48
> +                        (plus:SWI48
> +                          (match_operator:SWI48 5 "ix86_carry_flag_operator"
> +                            [(match_operand 3 "flags_reg_operand")
> +                             (const_int 0)])
> +                          (match_operand:SWI48 2 "memory_operand"))
> +                        (match_dup 0)))
> +                    (plus:<DWI>
> +                      (match_operator:<DWI> 4 "ix86_carry_flag_operator"
> +                        [(match_dup 3) (const_int 0)])
> +                      (zero_extend:<DWI> (match_dup 0)))))
> +             (set (match_dup 0)
> +                  (plus:SWI48 (plus:SWI48 (match_op_dup 5
> +                                            [(match_dup 3) (const_int 0)])
> +                                          (match_dup 2))
> +                              (match_dup 0)))])
> +   (set (match_dup 2) (match_dup 0))]
> +  "TARGET_APX_NDD
> +   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
> +   && peep2_reg_dead_p (3, operands[0])
> +   && !reg_overlap_mentioned_p (operands[0], operands[1])
> +   && !reg_overlap_mentioned_p (operands[0], operands[2])"
> +  [(set (match_dup 0) (match_dup 1))
> +   (parallel [(set (reg:CCC FLAGS_REG)
> +                  (compare:CCC
> +                    (zero_extend:<DWI>
> +                      (plus:SWI48
> +                        (plus:SWI48
> +                          (match_op_dup 5
> +                            [(match_dup 3) (const_int 0)])
> +                          (match_dup 2))
> +                        (match_dup 0)))
> +                    (plus:<DWI>
> +                      (match_op_dup 4
> +                        [(match_dup 3) (const_int 0)])
> +                      (zero_extend:<DWI> (match_dup 0)))))
> +             (set (match_dup 2)
> +                  (plus:SWI48 (plus:SWI48 (match_op_dup 5
> +                                            [(match_dup 3) (const_int 0)])
> +                                          (match_dup 2))
> +                              (match_dup 0)))])])
> +
>  (define_peephole2
>    [(parallel [(set (reg:CCC FLAGS_REG)
>                    (compare:CCC
> @@ -9646,6 +9786,52 @@ (define_peephole2
>                                                [(match_dup 3) (const_int 0)]))
>                                 (match_dup 0)))])])
>
> +;; Under APX NDD, 'sbb reg, mem, reg' is valid.
> +;;
> +;; New format for
> +;; mov reg0, mem1
> +;; sbb reg0, mem2, reg0
> +;; mov mem2, reg0
> +;; to
> +;; mov reg0, mem1
> +;; sbb mem2, reg0
> +(define_peephole2
> +  [(set (match_operand:SWI48 0 "general_reg_operand")
> +       (match_operand:SWI48 1 "memory_operand"))
> +   (parallel [(set (reg:CCC FLAGS_REG)
> +                  (compare:CCC
> +                    (zero_extend:<DWI> (match_operand:SWI48 2 
> "memory_operand"))
> +                    (plus:<DWI>
> +                      (match_operator:<DWI> 4 "ix86_carry_flag_operator"
> +                        [(match_operand 3 "flags_reg_operand") (const_int 
> 0)])
> +                      (zero_extend:<DWI>
> +                        (match_dup 0)))))
> +             (set (match_dup 0)
> +                  (minus:SWI48
> +                    (minus:SWI48
> +                      (match_dup 2)
> +                      (match_operator:SWI48 5 "ix86_carry_flag_operator"
> +                        [(match_dup 3) (const_int 0)]))
> +                    (match_dup 0)))])
> +   (set (match_dup 2) (match_dup 0))]
> +  "TARGET_APX_NDD
> +   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
> +   && peep2_reg_dead_p (3, operands[0])
> +   && !reg_overlap_mentioned_p (operands[0], operands[1])
> +   && !reg_overlap_mentioned_p (operands[0], operands[2])"
> +  [(set (match_dup 0) (match_dup 1))
> +   (parallel [(set (reg:CCC FLAGS_REG)
> +                  (compare:CCC
> +                    (zero_extend:<DWI> (match_dup 2))
> +                    (plus:<DWI> (match_op_dup 4
> +                                  [(match_dup 3) (const_int 0)])
> +                                (zero_extend:<DWI> (match_dup 0)))))
> +             (set (match_dup 2)
> +                  (minus:SWI48 (minus:SWI48 (match_dup 2)
> +                                            (match_op_dup 5
> +                                              [(match_dup 3) (const_int 0)]))
> +                               (match_dup 0)))])])
> +
>  (define_peephole2
>    [(set (match_operand:SWI48 6 "general_reg_operand")
>         (match_operand:SWI48 7 "memory_operand"))
> diff --git a/gcc/testsuite/gcc.target/i386/pr79173-13.c 
> b/gcc/testsuite/gcc.target/i386/pr79173-13.c
> new file mode 100644
> index 00000000000..7d5818b125b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr79173-13.c
> @@ -0,0 +1,59 @@
> +/* PR middle-end/79173 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-stack-protector -masm=att -mapxf" } */
> +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +
> +static unsigned long
> +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned 
> long *carry_out)
> +{
> +  unsigned long r;
> +  unsigned long c1 = __builtin_add_overflow (x, y, &r);
> +  unsigned long c2 = __builtin_add_overflow (r, carry_in, &r);
> +  *carry_out = c1 + c2;
> +  return r;
> +}
> +
> +static unsigned long
> +usubc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned 
> long *carry_out)
> +{
> +  unsigned long r;
> +  unsigned long c1 = __builtin_sub_overflow (x, y, &r);
> +  unsigned long c2 = __builtin_sub_overflow (r, carry_in, &r);
> +  *carry_out = c1 + c2;
> +  return r;
> +}
> +
> +void
> +foo (unsigned long *p, unsigned long *q)
> +{
> +  unsigned long c;
> +  p[0] = uaddc (p[0], q[0], 0, &c);
> +  p[1] = uaddc (p[1], q[1], c, &c);
> +  p[2] = uaddc (p[2], q[2], c, &c);
> +  p[3] = uaddc (p[3], q[3], c, &c);
> +}
> +
> +void
> +bar (unsigned long *p, unsigned long *q)
> +{
> +  unsigned long c;
> +  p[0] = usubc (p[0], q[0], 0, &c);
> +  p[1] = usubc (p[1], q[1], c, &c);
> +  p[2] = usubc (p[2], q[2], c, &c);
> +  p[3] = usubc (p[3], q[3], c, &c);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr79173-14.c 
> b/gcc/testsuite/gcc.target/i386/pr79173-14.c
> new file mode 100644
> index 00000000000..de85051fbdd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr79173-14.c
> @@ -0,0 +1,59 @@
> +/* PR middle-end/79173 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-stack-protector -masm=att -mapxf" } */
> +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +
> +static unsigned long
> +uaddc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out)
> +{
> +  unsigned long r;
> +  _Bool c1 = __builtin_add_overflow (x, y, &r);
> +  _Bool c2 = __builtin_add_overflow (r, carry_in, &r);
> +  *carry_out = c1 | c2;
> +  return r;
> +}
> +
> +static unsigned long
> +usubc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out)
> +{
> +  unsigned long r;
> +  _Bool c1 = __builtin_sub_overflow (x, y, &r);
> +  _Bool c2 = __builtin_sub_overflow (r, carry_in, &r);
> +  *carry_out = c1 | c2;
> +  return r;
> +}
> +
> +void
> +foo (unsigned long *p, unsigned long *q)
> +{
> +  _Bool c;
> +  p[0] = uaddc (p[0], q[0], 0, &c);
> +  p[1] = uaddc (p[1], q[1], c, &c);
> +  p[2] = uaddc (p[2], q[2], c, &c);
> +  p[3] = uaddc (p[3], q[3], c, &c);
> +}
> +
> +void
> +bar (unsigned long *p, unsigned long *q)
> +{
> +  _Bool c;
> +  p[0] = usubc (p[0], q[0], 0, &c);
> +  p[1] = usubc (p[1], q[1], c, &c);
> +  p[2] = usubc (p[2], q[2], c, &c);
> +  p[3] = usubc (p[3], q[3], c, &c);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr79173-15.c 
> b/gcc/testsuite/gcc.target/i386/pr79173-15.c
> new file mode 100644
> index 00000000000..c3017f76a0f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr79173-15.c
> @@ -0,0 +1,61 @@
> +/* PR middle-end/79173 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-stack-protector -masm=att -mapxf" } */
> +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +
> +static unsigned long
> +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned 
> long *carry_out)
> +{
> +  unsigned long r;
> +  unsigned long c1 = __builtin_add_overflow (x, y, &r);
> +  unsigned long c2 = __builtin_add_overflow (r, carry_in, &r);
> +  *carry_out = c1 + c2;
> +  return r;
> +}
> +
> +static unsigned long
> +usubc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned 
> long *carry_out)
> +{
> +  unsigned long r;
> +  unsigned long c1 = __builtin_sub_overflow (x, y, &r);
> +  unsigned long c2 = __builtin_sub_overflow (r, carry_in, &r);
> +  *carry_out = c1 + c2;
> +  return r;
> +}
> +
> +unsigned long
> +foo (unsigned long *p, unsigned long *q)
> +{
> +  unsigned long c;
> +  p[0] = uaddc (p[0], q[0], 0, &c);
> +  p[1] = uaddc (p[1], q[1], c, &c);
> +  p[2] = uaddc (p[2], q[2], c, &c);
> +  p[3] = uaddc (p[3], q[3], c, &c);
> +  return c;
> +}
> +
> +unsigned long
> +bar (unsigned long *p, unsigned long *q)
> +{
> +  unsigned long c;
> +  p[0] = usubc (p[0], q[0], 0, &c);
> +  p[1] = usubc (p[1], q[1], c, &c);
> +  p[2] = usubc (p[2], q[2], c, &c);
> +  p[3] = usubc (p[3], q[3], c, &c);
> +  return c;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr79173-16.c 
> b/gcc/testsuite/gcc.target/i386/pr79173-16.c
> new file mode 100644
> index 00000000000..91062fbd56e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr79173-16.c
> @@ -0,0 +1,61 @@
> +/* PR middle-end/79173 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-stack-protector -masm=att -mapxf" } */
> +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +
> +static unsigned long
> +uaddc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out)
> +{
> +  unsigned long r;
> +  _Bool c1 = __builtin_add_overflow (x, y, &r);
> +  _Bool c2 = __builtin_add_overflow (r, carry_in, &r);
> +  *carry_out = c1 ^ c2;
> +  return r;
> +}
> +
> +static unsigned long
> +usubc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out)
> +{
> +  unsigned long r;
> +  _Bool c1 = __builtin_sub_overflow (x, y, &r);
> +  _Bool c2 = __builtin_sub_overflow (r, carry_in, &r);
> +  *carry_out = c1 ^ c2;
> +  return r;
> +}
> +
> +_Bool
> +foo (unsigned long *p, unsigned long *q)
> +{
> +  _Bool c;
> +  p[0] = uaddc (p[0], q[0], 0, &c);
> +  p[1] = uaddc (p[1], q[1], c, &c);
> +  p[2] = uaddc (p[2], q[2], c, &c);
> +  p[3] = uaddc (p[3], q[3], c, &c);
> +  return c;
> +}
> +
> +_Bool
> +bar (unsigned long *p, unsigned long *q)
> +{
> +  _Bool c;
> +  p[0] = usubc (p[0], q[0], 0, &c);
> +  p[1] = usubc (p[1], q[1], c, &c);
> +  p[2] = usubc (p[2], q[2], c, &c);
> +  p[3] = usubc (p[3], q[3], c, &c);
> +  return c;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr79173-17.c 
> b/gcc/testsuite/gcc.target/i386/pr79173-17.c
> new file mode 100644
> index 00000000000..e27f4b95292
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr79173-17.c
> @@ -0,0 +1,32 @@
> +/* PR middle-end/79173 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-stack-protector -masm=att -mapxf" } */
> +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +
> +static unsigned long
> +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned 
> long *carry_out)
> +{
> +  unsigned long r = x + y;
> +  unsigned long c1 = r < x;
> +  r += carry_in;
> +  unsigned long c2 = r < carry_in;
> +  *carry_out = c1 + c2;
> +  return r;
> +}
> +
> +void
> +foo (unsigned long *p, unsigned long *q)
> +{
> +  unsigned long c;
> +  p[0] = uaddc (p[0], q[0], 0, &c);
> +  p[1] = uaddc (p[1], q[1], c, &c);
> +  p[2] = uaddc (p[2], q[2], c, &c);
> +  p[3] = uaddc (p[3], q[3], c, &c);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr79173-18.c 
> b/gcc/testsuite/gcc.target/i386/pr79173-18.c
> new file mode 100644
> index 00000000000..2728ae7fbeb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr79173-18.c
> @@ -0,0 +1,33 @@
> +/* PR middle-end/79173 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -fno-stack-protector -masm=att -mapxf" } */
> +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { 
> target lp64 } } } */
> +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, 
> \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 
> 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */
> +
> +static unsigned long
> +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned 
> long *carry_out)
> +{
> +  unsigned long r = x + y;
> +  unsigned long c1 = r < x;
> +  r += carry_in;
> +  unsigned long c2 = r < carry_in;
> +  *carry_out = c1 + c2;
> +  return r;
> +}
> +
> +unsigned long
> +foo (unsigned long *p, unsigned long *q)
> +{
> +  unsigned long c;
> +  p[0] = uaddc (p[0], q[0], 0, &c);
> +  p[1] = uaddc (p[1], q[1], c, &c);
> +  p[2] = uaddc (p[2], q[2], c, &c);
> +  p[3] = uaddc (p[3], q[3], c, &c);
> +  return c;
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to