Ping.
> -----Original Message----- > From: Tamar Christina <tamar.christ...@arm.com> > Sent: Monday, October 31, 2022 12:00 PM > To: gcc-patches@gcc.gnu.org > Cc: nd <n...@arm.com>; Richard Earnshaw <richard.earns...@arm.com>; > Marcus Shawcroft <marcus.shawcr...@arm.com>; Kyrylo Tkachov > <kyrylo.tkac...@arm.com>; Richard Sandiford > <richard.sandif...@arm.com> > Subject: [PATCH 7/8]AArch64: Consolidate zero and sign extension patterns > and add missing ones. > > Hi All, > > The target has various zero and sign extension patterns. These however live > in various locations around the MD file and almost all of them are split > differently. Due to the various patterns we also ended up missing valid > extensions. For instance smov is almost never generated. > > This change tries to make this more manageable by consolidating the > patterns as much as possible and in doing so fix the missing alternatives. > > There were also some duplicate patterns. Note that the > zero_extend<*_ONLY:mode><SD_HSDI:mode>2 patterns are nearly > identical however QImode lacks an alternative that the others don't have, so > I have left them as > 3 different patterns next to each other. > > In a lot of cases the wrong iterator was used leaving out cases that should > exist. > > I've also changed the masks used for zero extensions to hex instead of > decimal as it's more clear what they do that way, and aligns better with > output of other compilers. > > This leave the bulk of the extensions in just 3 patterns. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-simd.md > (*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>): > Changed to ... > (*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>): ... > This. > (*aarch64_get_lane_extenddi<VS:mode>): New. > * config/aarch64/aarch64.md (<optab>sidi2, *extendsidi2_aarch64, > <optab>qihi2, *extendqihi2_aarch64, *zero_extendsidi2_aarch64): > Remove > duplicate patterns. > (<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2, > *extend<SHORT:mode><GPI:mode>2_aarch64): Remove, > consolidate > into ... > (extend<ALLX:mode><SD_HSDI:mode>2): ... This. > (*zero_extendqihi2_aarch64, > *zero_extend<SHORT:mode><GPI:mode>2_aarch64): Remove, > consolidate into > ... > (zero_extend<SI_ONLY:mode><SD_HSDI:mode>2, > zero_extend<HI_ONLY:mode><SD_HSDI:mode>2, > zero_extend<QI_ONLY:mode><SD_HSDI:mode>2): > (*ands<GPI:mode>_compare0): Renamed to ... > (*ands<SD_HSDI:mode>_compare0): ... This. > * config/aarch64/iterators.md (HI_ONLY, QI_ONLY): New. > (short_mask): Use hex rather than dec and add SI. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/ands_3.c: Update codegen. > * gcc.target/aarch64/sve/slp_1.c: Likewise. > * gcc.target/aarch64/tst_5.c: Likewise. > * gcc.target/aarch64/tst_6.c: Likewise. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > 8a84a8560e982b8155b18541f5504801b3330124..d0b37c4dd48aeafd3d87c90dc > 3270e71af5a72b9 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -4237,19 +4237,34 @@ (define_insn > "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" > [(set_attr "type" "neon_to_gp<VDQQH:q>")] > ) > > -(define_insn > "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>" > +(define_insn "*aarch64_get_lane_extenddi<VS:mode>" > + [(set (match_operand:DI 0 "register_operand" "=r") > + (sign_extend:DI > + (vec_select:<VS:VEL> > + (match_operand:VS 1 "register_operand" "w") > + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] > + "TARGET_SIMD" > + { > + operands[2] = aarch64_endian_lane_rtx (<VS:MODE>mode, > + INTVAL (operands[2])); > + return "smov\\t%x0, %1.<VS:Vetype>[%2]"; > + } > + [(set_attr "type" "neon_to_gp<VS:q>")] > +) > + > +(define_insn > "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>" > [(set (match_operand:GPI 0 "register_operand" "=r") > (zero_extend:GPI > - (vec_select:<VDQQH:VEL> > - (match_operand:VDQQH 1 "register_operand" "w") > + (vec_select:<VDQV_L:VEL> > + (match_operand:VDQV_L 1 "register_operand" "w") > (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] > "TARGET_SIMD" > { > - operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, > + operands[2] = aarch64_endian_lane_rtx (<VDQV_L:MODE>mode, > INTVAL (operands[2])); > - return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]"; > + return "umov\\t%w0, %1.<VDQV_L:Vetype>[%2]"; > } > - [(set_attr "type" "neon_to_gp<VDQQH:q>")] > + [(set_attr "type" "neon_to_gp<VDQV_L:q>")] > ) > > ;; Lane extraction of a value, neither sign nor zero extension diff --git > a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index > 3ea16dbc2557c6a4f37104d44a49f77f768eb53d..09ae1118371f82ca63146fceb9 > 53eb9e820d05a4 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1911,22 +1911,6 @@ (define_insn > "storewb_pair<TX:mode>_<P:mode>" > ;; Sign/Zero extension > ;; ------------------------------------------------------------------- > > -(define_expand "<optab>sidi2" > - [(set (match_operand:DI 0 "register_operand") > - (ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))] > - "" > -) > - > -(define_insn "*extendsidi2_aarch64" > - [(set (match_operand:DI 0 "register_operand" "=r,r") > - (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" > "r,m")))] > - "" > - "@ > - sxtw\t%0, %w1 > - ldrsw\t%0, %1" > - [(set_attr "type" "extend,load_4")] > -) > - > (define_insn "*load_pair_extendsidi2_aarch64" > [(set (match_operand:DI 0 "register_operand" "=r") > (sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" > "Ump"))) @@ -1940,21 +1924,6 @@ (define_insn > "*load_pair_extendsidi2_aarch64" > [(set_attr "type" "load_8")] > ) > > -(define_insn "*zero_extendsidi2_aarch64" > - [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w") > - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" > "r,m,r,m,w,w")))] > - "" > - "@ > - uxtw\t%0, %w1 > - ldr\t%w0, %1 > - fmov\t%s0, %w1 > - ldr\t%s0, %1 > - fmov\t%w0, %s1 > - fmov\t%s0, %s1" > - [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov") > - (set_attr "arch" "*,*,fp,fp,fp,fp")] > -) > - > (define_insn "*load_pair_zero_extendsidi2_aarch64" > [(set (match_operand:DI 0 "register_operand" "=r,w") > (zero_extend:DI (match_operand:SI 1 > "aarch64_mem_pair_operand" "Ump,Ump"))) @@ -1971,61 +1940,64 @@ > (define_insn "*load_pair_zero_extendsidi2_aarch64" > (set_attr "arch" "*,fp")] > ) > > -(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2" > - [(set (match_operand:GPI 0 "register_operand") > - (ANY_EXTEND:GPI (match_operand:SHORT 1 > "nonimmediate_operand")))] > - "" > -) > - > -(define_insn "*extend<SHORT:mode><GPI:mode>2_aarch64" > - [(set (match_operand:GPI 0 "register_operand" "=r,r,r") > - (sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" > "r,m,w")))] > +(define_insn "extend<ALLX:mode><SD_HSDI:mode>2" > + [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,r") > + (sign_extend:SD_HSDI > + (match_operand:ALLX 1 "nonimmediate_operand" "r,m,w")))] > "" > "@ > - sxt<SHORT:size>\t%<GPI:w>0, %w1 > - ldrs<SHORT:size>\t%<GPI:w>0, %1 > - smov\t%<GPI:w>0, %1.<SHORT:size>[0]" > + sxt<ALLX:size>\t%<SD_HSDI:w>0, %w1 > + ldrs<ALLX:size>\t%<SD_HSDI:w>0, %1 > + smov\t%<SD_HSDI:w>0, %1.<ALLX:Vetype>[0]" > [(set_attr "type" "extend,load_4,neon_to_gp") > (set_attr "arch" "*,*,fp")] > ) > > -(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64" > - [(set (match_operand:GPI 0 "register_operand" "=r,r,w,r") > - (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" > "r,m,m,w")))] > +(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2" > + [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w") > + (zero_extend:SD_HSDI > + (match_operand:SI_ONLY 1 "nonimmediate_operand" > "r,m,r,m,w,w")))] > "" > "@ > - and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask> > - ldr<SHORT:size>\t%w0, %1 > - ldr\t%<SHORT:size>0, %1 > - umov\t%w0, %1.<SHORT:size>[0]" > - [(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp") > - (set_attr "arch" "*,*,fp,fp")] > -) > - > -(define_expand "<optab>qihi2" > - [(set (match_operand:HI 0 "register_operand") > - (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))] > - "" > + uxt<SI_ONLY:size>\t%<SD_HSDI:w>0, %w1 > + ldr<SI_ONLY:sizel>\t%w0, %1 > + fmov\t%<SI_ONLY:Vetype>0, %w1 > + ldr\t%<SI_ONLY:Vetype>0, %1 > + fmov\t%w0, %<SI_ONLY:Vetype>1 > + fmov\t%<SI_ONLY:Vetype>0, %<SI_ONLY:Vetype>1" > + [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov") > + (set_attr "arch" "*,*,fp,fp,fp,fp")] > ) > > -(define_insn "*extendqihi2_aarch64" > - [(set (match_operand:HI 0 "register_operand" "=r,r") > - (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" > "r,m")))] > +(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2" > + [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w") > + (zero_extend:SD_HSDI > + (match_operand:HI_ONLY 1 "nonimmediate_operand" > "r,m,r,m,w,w")))] > "" > "@ > - sxtb\t%w0, %w1 > - ldrsb\t%w0, %1" > - [(set_attr "type" "extend,load_4")] > + uxt<HI_ONLY:size>\t%<SD_HSDI:w>0, %w1 > + ldr<HI_ONLY:sizel>\t%w0, %1 > + fmov\t%<HI_ONLY:Vetype>0, %w1 > + ldr\t%<HI_ONLY:Vetype>0, %1 > + umov\t%w0, %1.<HI_ONLY:Vetype>[0] > + fmov\t%<HI_ONLY:Vetype>0, %<HI_ONLY:Vetype>1" > + [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov") > + (set_attr "arch" "*,*,fp16,fp,fp,fp16")] > ) > > -(define_insn "*zero_extendqihi2_aarch64" > - [(set (match_operand:HI 0 "register_operand" "=r,r") > - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" > "r,m")))] > +(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2" > + [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w") > + (zero_extend:SD_HSDI > + (match_operand:QI_ONLY 1 "nonimmediate_operand" > "r,m,m,w,w")))] > "" > "@ > - and\t%w0, %w1, 255 > - ldrb\t%w0, %1" > - [(set_attr "type" "logic_imm,load_4")] > + uxt<QI_ONLY:size>\t%<SD_HSDI:w>0, %w1 > + ldr<QI_ONLY:sizel>\t%w0, %1 > + ldr\t%<QI_ONLY:Vetype>0, %1 > + umov\t%w0, %1.<QI_ONLY:Vetype>[0] > + dup\t%<QI_ONLY:Vetype>0, %1.<QI_ONLY:Vetype>[0]" > + [(set_attr "type" "mov_reg,load_4,f_loads,f_mrc,fmov") > + (set_attr "arch" "*,*,fp,fp,fp")] > ) > > ;; ------------------------------------------------------------------- > @@ -5029,15 +5001,15 @@ (define_insn "*and<mode>_compare0" > [(set_attr "type" "alus_imm")] > ) > > -(define_insn "*ands<GPI:mode>_compare0" > +(define_insn "*ands<SD_HSDI:mode>_compare0" > [(set (reg:CC_NZ CC_REGNUM) > (compare:CC_NZ > - (zero_extend:GPI (match_operand:SHORT 1 "register_operand" > "r")) > + (zero_extend:SD_HSDI (match_operand:ALLX 1 "register_operand" > "r")) > (const_int 0))) > - (set (match_operand:GPI 0 "register_operand" "=r") > - (zero_extend:GPI (match_dup 1)))] > + (set (match_operand:SD_HSDI 0 "register_operand" "=r") > + (zero_extend:SD_HSDI (match_dup 1)))] > "" > - "ands\\t%<GPI:w>0, %<GPI:w>1, <short_mask>" > + "ands\\t%<SD_HSDI:w>0, %<SD_HSDI:w>1, <ALLX:short_mask>" > [(set_attr "type" "alus_imm")] > ) > > diff --git a/gcc/config/aarch64/iterators.md > b/gcc/config/aarch64/iterators.md index > 1df09f7fe2eb35aed96113476541e0faa5393551..e904407b2169e589b7007ff966 > b2d9347a6d0fd2 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -41,6 +41,8 @@ (define_mode_iterator SHORT [QI HI]) ;; Iterators for > single modes, for "@" patterns. > (define_mode_iterator SI_ONLY [SI]) > (define_mode_iterator DI_ONLY [DI]) > +(define_mode_iterator HI_ONLY [HI]) > +(define_mode_iterator QI_ONLY [QI]) > > ;; Iterator for all integer modes (up to 64-bit) (define_mode_iterator ALLI > [QI HI SI DI]) @@ -1033,7 +1035,7 @@ (define_mode_attr w2 [(HF "x") (SF > "x") (DF "w")]) ;; For width of fp registers in fcvt instruction > (define_mode_attr fpw [(DI "s") (SI "d")]) > > -(define_mode_attr short_mask [(HI "65535") (QI "255")]) > +(define_mode_attr short_mask [(SI "0xffffffff") (HI "0xffff") (QI > +"0xff")]) > > ;; For constraints used in scalar immediate vector moves (define_mode_attr > hq [(HI "h") (QI "q")]) diff --git a/gcc/testsuite/gcc.target/aarch64/ands_3.c > b/gcc/testsuite/gcc.target/aarch64/ands_3.c > index > 42cb7f0f0bc86a4aceb09851c31eb2e888d93403..421aa5cea7a51ad810cc9c5653 > a149cb21bb871c 100644 > --- a/gcc/testsuite/gcc.target/aarch64/ands_3.c > +++ b/gcc/testsuite/gcc.target/aarch64/ands_3.c > @@ -9,4 +9,4 @@ f9 (unsigned char x, int y) > return x; > } > > -/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ > \t\]*255" } } */ > +/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ > +\t\]*(x|w)\[0-9\]+,\[ \t\]*0xff" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c > index > 8e35e0b574d49913b43c7d8d4f4ba75f127f42e9..03288976b3397cdbe0e822f94 > f2a6448d9fa9a52 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c > @@ -51,7 +51,6 @@ TEST_ALL (VEC_PERM) > /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ > /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ > /* { dg-final { scan-assembler-not {\tldr} } } */ > -/* { dg-final { scan-assembler-times {\tstr} 2 } } */ > -/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\], > +v[0-9]+\.h\[0\]} 1 } } */ > > /* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git > a/gcc/testsuite/gcc.target/aarch64/tst_5.c > b/gcc/testsuite/gcc.target/aarch64/tst_5.c > index > 0de40a6c47a7d63c1b7a81aeba438a096c0041b8..19034cd74ed07ea4d670c25d > 9ab3d1cff805a483 100644 > --- a/gcc/testsuite/gcc.target/aarch64/tst_5.c > +++ b/gcc/testsuite/gcc.target/aarch64/tst_5.c > @@ -4,7 +4,7 @@ > int > f255 (int x) > { > - if (x & 255) > + if (x & 0xff) > return 1; > return x; > } > @@ -12,10 +12,10 @@ f255 (int x) > int > f65535 (int x) > { > - if (x & 65535) > + if (x & 0xffff) > return 1; > return x; > } > > -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*255" } } */ > -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */ > +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xff" } } */ > +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } > +*/ > diff --git a/gcc/testsuite/gcc.target/aarch64/tst_6.c > b/gcc/testsuite/gcc.target/aarch64/tst_6.c > index > f15ec114c391fed79cc43b7740fde83fb3d4ea53..1c047cfae214b60e5bf003e678 > 1a277202fcc588 100644 > --- a/gcc/testsuite/gcc.target/aarch64/tst_6.c > +++ b/gcc/testsuite/gcc.target/aarch64/tst_6.c > @@ -7,4 +7,4 @@ foo (long x) > return ((short) x != 0) ? x : 1; > } > > -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */ > +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } } > +*/ > > > > > --