Ping.

> -----Original Message-----
> From: Tamar Christina <tamar.christ...@arm.com>
> Sent: Monday, October 31, 2022 12:00 PM
> To: gcc-patches@gcc.gnu.org
> Cc: nd <n...@arm.com>; Richard Earnshaw <richard.earns...@arm.com>;
> Marcus Shawcroft <marcus.shawcr...@arm.com>; Kyrylo Tkachov
> <kyrylo.tkac...@arm.com>; Richard Sandiford
> <richard.sandif...@arm.com>
> Subject: [PATCH 7/8]AArch64: Consolidate zero and sign extension patterns
> and add missing ones.
> 
> Hi All,
> 
> The target has various zero and sign extension patterns.  These however live
> in various locations around the MD file and almost all of them are split
> differently.  Due to the various patterns we also ended up missing valid
> extensions.  For instance smov is almost never generated.
> 
> This change tries to make this more manageable by consolidating the
> patterns as much as possible and in doing so fix the missing alternatives.
> 
> There were also some duplicate patterns.  Note that the
> zero_extend<*_ONLY:mode><SD_HSDI:mode>2  patterns are nearly
> identical however QImode lacks an alternative that the others don't have, so
> I have left them as
> 3 different patterns next to each other.
> 
> In a lot of cases the wrong iterator was used leaving out cases that should
> exist.
> 
> I've also changed the masks used for zero extensions to hex instead of
> decimal as it's more clear what they do that way, and aligns better with
> output of other compilers.
> 
> This leave the bulk of the extensions in just 3 patterns.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * config/aarch64/aarch64-simd.md
>       (*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>):
> Changed to ...
>       (*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>): ...
> This.
>       (*aarch64_get_lane_extenddi<VS:mode>): New.
>       * config/aarch64/aarch64.md (<optab>sidi2, *extendsidi2_aarch64,
>       <optab>qihi2, *extendqihi2_aarch64, *zero_extendsidi2_aarch64):
> Remove
>       duplicate patterns.
>       (<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2,
>       *extend<SHORT:mode><GPI:mode>2_aarch64): Remove,
> consolidate
>       into ...
>       (extend<ALLX:mode><SD_HSDI:mode>2): ... This.
>       (*zero_extendqihi2_aarch64,
>       *zero_extend<SHORT:mode><GPI:mode>2_aarch64): Remove,
> consolidate into
>       ...
>       (zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
>       zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
>       zero_extend<QI_ONLY:mode><SD_HSDI:mode>2):
>       (*ands<GPI:mode>_compare0): Renamed to ...
>       (*ands<SD_HSDI:mode>_compare0): ... This.
>       * config/aarch64/iterators.md (HI_ONLY, QI_ONLY): New.
>       (short_mask): Use hex rather than dec and add SI.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/ands_3.c: Update codegen.
>       * gcc.target/aarch64/sve/slp_1.c: Likewise.
>       * gcc.target/aarch64/tst_5.c: Likewise.
>       * gcc.target/aarch64/tst_6.c: Likewise.
> 
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> index
> 8a84a8560e982b8155b18541f5504801b3330124..d0b37c4dd48aeafd3d87c90dc
> 3270e71af5a72b9 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4237,19 +4237,34 @@ (define_insn
> "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
>    [(set_attr "type" "neon_to_gp<VDQQH:q>")]
>  )
> 
> -(define_insn
> "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
> +(define_insn "*aarch64_get_lane_extenddi<VS:mode>"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +     (sign_extend:DI
> +       (vec_select:<VS:VEL>
> +         (match_operand:VS 1 "register_operand" "w")
> +         (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
> +  "TARGET_SIMD"
> +  {
> +    operands[2] = aarch64_endian_lane_rtx (<VS:MODE>mode,
> +                                        INTVAL (operands[2]));
> +    return "smov\\t%x0, %1.<VS:Vetype>[%2]";
> +  }
> +  [(set_attr "type" "neon_to_gp<VS:q>")]
> +)
> +
> +(define_insn
> "*aarch64_get_lane_zero_extend<GPI:mode><VDQV_L:mode>"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>       (zero_extend:GPI
> -       (vec_select:<VDQQH:VEL>
> -         (match_operand:VDQQH 1 "register_operand" "w")
> +       (vec_select:<VDQV_L:VEL>
> +         (match_operand:VDQV_L 1 "register_operand" "w")
>           (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
>    "TARGET_SIMD"
>    {
> -    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
> +    operands[2] = aarch64_endian_lane_rtx (<VDQV_L:MODE>mode,
>                                          INTVAL (operands[2]));
> -    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
> +    return "umov\\t%w0, %1.<VDQV_L:Vetype>[%2]";
>    }
> -  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
> +  [(set_attr "type" "neon_to_gp<VDQV_L:q>")]
>  )
> 
>  ;; Lane extraction of a value, neither sign nor zero extension diff --git
> a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index
> 3ea16dbc2557c6a4f37104d44a49f77f768eb53d..09ae1118371f82ca63146fceb9
> 53eb9e820d05a4 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1911,22 +1911,6 @@ (define_insn
> "storewb_pair<TX:mode>_<P:mode>"
>  ;; Sign/Zero extension
>  ;; -------------------------------------------------------------------
> 
> -(define_expand "<optab>sidi2"
> -  [(set (match_operand:DI 0 "register_operand")
> -     (ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
> -  ""
> -)
> -
> -(define_insn "*extendsidi2_aarch64"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r")
> -        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand"
> "r,m")))]
> -  ""
> -  "@
> -   sxtw\t%0, %w1
> -   ldrsw\t%0, %1"
> -  [(set_attr "type" "extend,load_4")]
> -)
> -
>  (define_insn "*load_pair_extendsidi2_aarch64"
>    [(set (match_operand:DI 0 "register_operand" "=r")
>       (sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand"
> "Ump"))) @@ -1940,21 +1924,6 @@ (define_insn
> "*load_pair_extendsidi2_aarch64"
>    [(set_attr "type" "load_8")]
>  )
> 
> -(define_insn "*zero_extendsidi2_aarch64"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w")
> -        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand"
> "r,m,r,m,w,w")))]
> -  ""
> -  "@
> -   uxtw\t%0, %w1
> -   ldr\t%w0, %1
> -   fmov\t%s0, %w1
> -   ldr\t%s0, %1
> -   fmov\t%w0, %s1
> -   fmov\t%s0, %s1"
> -  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> -   (set_attr "arch" "*,*,fp,fp,fp,fp")]
> -)
> -
>  (define_insn "*load_pair_zero_extendsidi2_aarch64"
>    [(set (match_operand:DI 0 "register_operand" "=r,w")
>       (zero_extend:DI (match_operand:SI 1
> "aarch64_mem_pair_operand" "Ump,Ump"))) @@ -1971,61 +1940,64 @@
> (define_insn "*load_pair_zero_extendsidi2_aarch64"
>     (set_attr "arch" "*,fp")]
>  )
> 
> -(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
> -  [(set (match_operand:GPI 0 "register_operand")
> -        (ANY_EXTEND:GPI (match_operand:SHORT 1
> "nonimmediate_operand")))]
> -  ""
> -)
> -
> -(define_insn "*extend<SHORT:mode><GPI:mode>2_aarch64"
> -  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
> -        (sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand"
> "r,m,w")))]
> +(define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,r")
> +        (sign_extend:SD_HSDI
> +       (match_operand:ALLX 1 "nonimmediate_operand" "r,m,w")))]
>    ""
>    "@
> -   sxt<SHORT:size>\t%<GPI:w>0, %w1
> -   ldrs<SHORT:size>\t%<GPI:w>0, %1
> -   smov\t%<GPI:w>0, %1.<SHORT:size>[0]"
> +   sxt<ALLX:size>\t%<SD_HSDI:w>0, %w1
> +   ldrs<ALLX:size>\t%<SD_HSDI:w>0, %1
> +   smov\t%<SD_HSDI:w>0, %1.<ALLX:Vetype>[0]"
>    [(set_attr "type" "extend,load_4,neon_to_gp")
>     (set_attr "arch" "*,*,fp")]
>  )
> 
> -(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
> -  [(set (match_operand:GPI 0 "register_operand" "=r,r,w,r")
> -        (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand"
> "r,m,m,w")))]
> +(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
> +        (zero_extend:SD_HSDI
> +       (match_operand:SI_ONLY 1 "nonimmediate_operand"
> "r,m,r,m,w,w")))]
>    ""
>    "@
> -   and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
> -   ldr<SHORT:size>\t%w0, %1
> -   ldr\t%<SHORT:size>0, %1
> -   umov\t%w0, %1.<SHORT:size>[0]"
> -  [(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp")
> -   (set_attr "arch" "*,*,fp,fp")]
> -)
> -
> -(define_expand "<optab>qihi2"
> -  [(set (match_operand:HI 0 "register_operand")
> -        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
> -  ""
> +   uxt<SI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<SI_ONLY:sizel>\t%w0, %1
> +   fmov\t%<SI_ONLY:Vetype>0, %w1
> +   ldr\t%<SI_ONLY:Vetype>0, %1
> +   fmov\t%w0, %<SI_ONLY:Vetype>1
> +   fmov\t%<SI_ONLY:Vetype>0, %<SI_ONLY:Vetype>1"
> +  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp,fp,fp,fp")]
>  )
> 
> -(define_insn "*extendqihi2_aarch64"
> -  [(set (match_operand:HI 0 "register_operand" "=r,r")
> -     (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand"
> "r,m")))]
> +(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
> +        (zero_extend:SD_HSDI
> +       (match_operand:HI_ONLY 1 "nonimmediate_operand"
> "r,m,r,m,w,w")))]
>    ""
>    "@
> -   sxtb\t%w0, %w1
> -   ldrsb\t%w0, %1"
> -  [(set_attr "type" "extend,load_4")]
> +   uxt<HI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<HI_ONLY:sizel>\t%w0, %1
> +   fmov\t%<HI_ONLY:Vetype>0, %w1
> +   ldr\t%<HI_ONLY:Vetype>0, %1
> +   umov\t%w0, %1.<HI_ONLY:Vetype>[0]
> +   fmov\t%<HI_ONLY:Vetype>0, %<HI_ONLY:Vetype>1"
> +  [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
>  )
> 
> -(define_insn "*zero_extendqihi2_aarch64"
> -  [(set (match_operand:HI 0 "register_operand" "=r,r")
> -     (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand"
> "r,m")))]
> +(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
> +  [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
> +        (zero_extend:SD_HSDI
> +       (match_operand:QI_ONLY 1 "nonimmediate_operand"
> "r,m,m,w,w")))]
>    ""
>    "@
> -   and\t%w0, %w1, 255
> -   ldrb\t%w0, %1"
> -  [(set_attr "type" "logic_imm,load_4")]
> +   uxt<QI_ONLY:size>\t%<SD_HSDI:w>0, %w1
> +   ldr<QI_ONLY:sizel>\t%w0, %1
> +   ldr\t%<QI_ONLY:Vetype>0, %1
> +   umov\t%w0, %1.<QI_ONLY:Vetype>[0]
> +   dup\t%<QI_ONLY:Vetype>0, %1.<QI_ONLY:Vetype>[0]"
> +  [(set_attr "type" "mov_reg,load_4,f_loads,f_mrc,fmov")
> +   (set_attr "arch" "*,*,fp,fp,fp")]
>  )
> 
>  ;; -------------------------------------------------------------------
> @@ -5029,15 +5001,15 @@ (define_insn "*and<mode>_compare0"
>    [(set_attr "type" "alus_imm")]
>  )
> 
> -(define_insn "*ands<GPI:mode>_compare0"
> +(define_insn "*ands<SD_HSDI:mode>_compare0"
>    [(set (reg:CC_NZ CC_REGNUM)
>       (compare:CC_NZ
> -      (zero_extend:GPI (match_operand:SHORT 1 "register_operand"
> "r"))
> +      (zero_extend:SD_HSDI (match_operand:ALLX 1 "register_operand"
> "r"))
>        (const_int 0)))
> -   (set (match_operand:GPI 0 "register_operand" "=r")
> -     (zero_extend:GPI (match_dup 1)))]
> +   (set (match_operand:SD_HSDI 0 "register_operand" "=r")
> +     (zero_extend:SD_HSDI (match_dup 1)))]
>    ""
> -  "ands\\t%<GPI:w>0, %<GPI:w>1, <short_mask>"
> +  "ands\\t%<SD_HSDI:w>0, %<SD_HSDI:w>1, <ALLX:short_mask>"
>    [(set_attr "type" "alus_imm")]
>  )
> 
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md index
> 1df09f7fe2eb35aed96113476541e0faa5393551..e904407b2169e589b7007ff966
> b2d9347a6d0fd2 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -41,6 +41,8 @@ (define_mode_iterator SHORT [QI HI])  ;; Iterators for
> single modes, for "@" patterns.
>  (define_mode_iterator SI_ONLY [SI])
>  (define_mode_iterator DI_ONLY [DI])
> +(define_mode_iterator HI_ONLY [HI])
> +(define_mode_iterator QI_ONLY [QI])
> 
>  ;; Iterator for all integer modes (up to 64-bit)  (define_mode_iterator ALLI
> [QI HI SI DI]) @@ -1033,7 +1035,7 @@ (define_mode_attr w2 [(HF "x") (SF
> "x") (DF "w")])  ;; For width of fp registers in fcvt instruction
> (define_mode_attr fpw [(DI "s") (SI "d")])
> 
> -(define_mode_attr short_mask [(HI "65535") (QI "255")])
> +(define_mode_attr short_mask [(SI "0xffffffff") (HI "0xffff") (QI
> +"0xff")])
> 
>  ;; For constraints used in scalar immediate vector moves  (define_mode_attr
> hq [(HI "h") (QI "q")]) diff --git a/gcc/testsuite/gcc.target/aarch64/ands_3.c
> b/gcc/testsuite/gcc.target/aarch64/ands_3.c
> index
> 42cb7f0f0bc86a4aceb09851c31eb2e888d93403..421aa5cea7a51ad810cc9c5653
> a149cb21bb871c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/ands_3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/ands_3.c
> @@ -9,4 +9,4 @@ f9 (unsigned char x, int y)
>    return x;
>  }
> 
> -/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[
> \t\]*255" } } */
> +/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[
> +\t\]*(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> index
> 8e35e0b574d49913b43c7d8d4f4ba75f127f42e9..03288976b3397cdbe0e822f94
> f2a6448d9fa9a52 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
> @@ -51,7 +51,6 @@ TEST_ALL (VEC_PERM)
>  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
>  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
>  /* { dg-final { scan-assembler-not {\tldr} } } */
> -/* { dg-final { scan-assembler-times {\tstr} 2 } } */
> -/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\],
> +v[0-9]+\.h\[0\]} 1 } } */
> 
>  /* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git
> a/gcc/testsuite/gcc.target/aarch64/tst_5.c
> b/gcc/testsuite/gcc.target/aarch64/tst_5.c
> index
> 0de40a6c47a7d63c1b7a81aeba438a096c0041b8..19034cd74ed07ea4d670c25d
> 9ab3d1cff805a483 100644
> --- a/gcc/testsuite/gcc.target/aarch64/tst_5.c
> +++ b/gcc/testsuite/gcc.target/aarch64/tst_5.c
> @@ -4,7 +4,7 @@
>  int
>  f255 (int x)
>  {
> -  if (x & 255)
> +  if (x & 0xff)
>      return 1;
>    return x;
>  }
> @@ -12,10 +12,10 @@ f255 (int x)
>  int
>  f65535 (int x)
>  {
> -  if (x & 65535)
> +  if (x & 0xffff)
>      return 1;
>    return x;
>  }
> 
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*255" } } */
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xff" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } }
> +*/
> diff --git a/gcc/testsuite/gcc.target/aarch64/tst_6.c
> b/gcc/testsuite/gcc.target/aarch64/tst_6.c
> index
> f15ec114c391fed79cc43b7740fde83fb3d4ea53..1c047cfae214b60e5bf003e678
> 1a277202fcc588 100644
> --- a/gcc/testsuite/gcc.target/aarch64/tst_6.c
> +++ b/gcc/testsuite/gcc.target/aarch64/tst_6.c
> @@ -7,4 +7,4 @@ foo (long x)
>     return ((short) x != 0) ? x : 1;
>  }
> 
> -/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*65535" } } */
> +/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]+,\[ \t\]*0xffff" } }
> +*/
> 
> 
> 
> 
> --

Reply via email to