On 10/04/2019 21:31, Steve Ellcey wrote:
> On Wed, 2019-04-10 at 11:10 +0100, Richard Earnshaw (lists) wrote:
>>
>> OK with those changes.
>>
>> R.
> 
> I made the changes you suggested and checked in the patch.  Just to be
> complete, here is the final version of the patch that I checked in.
> 
> 2018-04-10  Steve Ellcey  <sell...@marvell.com>
> 
>       PR rtl-optimization/87763
>       * config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p):
>       New prototype.
>       * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p):
>       New function.
>       * config/aarch64/aarch64.md (*aarch64_bfi<GPI:mode>5_shift):
>       New instruction.
>       (*aarch64_bfi<GPI:mode>5_shift_alt): Ditto.
>       (*aarch64_bfi<GPI:mode>4_noand): Ditto.
>       (*aarch64_bfi<GPI:mode>4_noand_alt): Ditto.
>       (*aarch64_bfi<GPI:mode>4_noshift): Ditto.
> 

You've removed the ..._noshift_alt variant.  That wasn't my intention,
so perhaps you misunderstood what I was trying to say.

The two versions are both needed, since the register tie is not
orthogonal to the constants used in the masks and canonicalization will
not generate a consistent ordering of the operands.

My point is that you can make the 'alt' version from

+(define_insn "*aarch64_bfi<GPI:mode>4_noshift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (match_operand:GPI 3 "register_operand" "r")
+                          (match_operand:GPI 4 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0,
+                                     UINTVAL (operands[4]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4"
+  [(set_attr "type" "bfm")]
+)

by simply duplicating it and permuting the numbering of the operands in
the pattern part, the remaining parts remain identical, including the
final condition and the insn printing string:

+(define_insn "*aarch64_bfi<GPI:mode>4_noshift_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "r")
+                          (match_operand:GPI 4 "const_int_operand" "n"))
+                 (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))))]

Operand order permuted

+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0,
+                                     UINTVAL (operands[4]))"

does not change

+  "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4"

does not change

+  [(set_attr "type" "bfm")]
+)

R.

> 2018-04-10  Steve Ellcey  <sell...@marvell.com>
> 
>       PR rtl-optimization/87763
>       * gcc.target/aarch64/combine_bfxil.c: Change some bfxil checks
>       to bfi.
>       * gcc.target/aarch64/combine_bfi_2.c: New test.
> 
> 
> combine.patch
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index b035e35..b6c0d0a 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx);
>  void aarch64_declare_function_name (FILE *, const char*, tree);
>  bool aarch64_legitimate_pic_operand_p (rtx);
>  bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
> +bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned 
> HOST_WIDE_INT,
> +                                     unsigned HOST_WIDE_INT,
> +                                     unsigned HOST_WIDE_INT);
>  bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
>  bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
>  opt_machine_mode aarch64_sve_pred_mode (unsigned int);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 95e5b03..9be7548 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9336,6 +9336,35 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode 
> mode, rtx mask,
>            & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
>  }
>  
> +/* Return true if the masks and a shift amount from an RTX of the form
> +   ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
> +   a BFI instruction of mode MODE.  See *arch64_bfi patterns.  */
> +
> +bool
> +aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
> +                                unsigned HOST_WIDE_INT mask1,
> +                                unsigned HOST_WIDE_INT shft_amnt,
> +                                unsigned HOST_WIDE_INT mask2)
> +{
> +  unsigned HOST_WIDE_INT t;
> +
> +  /* Verify that there is no overlap in what bits are set in the two masks.  
> */
> +  if (mask1 != ~mask2)
> +    return false;
> +
> +  /* Verify that mask2 is not all zeros or ones.  */
> +  if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
> +    return false;
> +
> +  /* The shift amount should always be less than the mode size.  */
> +  gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
> +
> +  /* Verify that the mask being shifted is contiguous and would be in the
> +     least significant bits after shifting by shft_amnt.  */
> +  t = mask2 + (HOST_WIDE_INT_1U << shft_amnt);
> +  return (t == (t & -t));
> +}
> +
>  /* Calculate the cost of calculating X, storing it in *COST.  Result
>     is true if the total cost of the operation has now been calculated.  */
>  static bool
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index ab8786a..e0df975 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5491,6 +5491,94 @@
>    [(set_attr "type" "bfm")]
>  )
>  
> +;;  Match a bfi instruction where the shift of OP3 means that we are
> +;;  actually copying the least significant bits of OP3 into OP0 by way
> +;;  of the AND masks and the IOR instruction.  A similar instruction
> +;;  with the two parts of the IOR swapped around was never triggered
> +;;  in a bootstrap build and test of GCC so it was not included.
> +
> +(define_insn "*aarch64_bfi<GPI:mode>5_shift"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
> +                          (match_operand:GPI 2 "const_int_operand" "n"))
> +                 (and:GPI (ashift:GPI
> +                           (match_operand:GPI 3 "register_operand" "r")
> +                           (match_operand:GPI 4 
> "aarch64_simd_shift_imm_<mode>" "n"))
> +                          (match_operand:GPI 5 "const_int_operand" "n"))))]
> +  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]),
> +                                   UINTVAL (operands[4]),
> +                                   UINTVAL(operands[5]))"
> +  "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5"
> +  [(set_attr "type" "bfm")]
> +)
> +
> +(define_insn "*aarch64_bfi<GPI:mode>5_shift_alt"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +        (ior:GPI (and:GPI (ashift:GPI
> +                           (match_operand:GPI 1 "register_operand" "r")
> +                           (match_operand:GPI 2 
> "aarch64_simd_shift_imm_<mode>" "n"))
> +                          (match_operand:GPI 3 "const_int_operand" "n"))
> +              (and:GPI (match_operand:GPI 4 "register_operand" "0")
> +                          (match_operand:GPI 5 "const_int_operand" "n"))))]
> +  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[5]),
> +                                   UINTVAL (operands[2]),
> +                                   UINTVAL(operands[3]))"
> +  "bfi\t%<GPI:w>0, %<GPI:w>1, %2, %P3"
> +  [(set_attr "type" "bfm")]
> +)
> +
> +;; Like *aarch64_bfi<GPI:mode>5_shift but with no and of the ashift because
> +;; the shift is large enough to remove the need for an AND instruction.
> +
> +(define_insn "*aarch64_bfi<GPI:mode>4_noand"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
> +                          (match_operand:GPI 2 "const_int_operand" "n"))
> +                 (ashift:GPI
> +                          (match_operand:GPI 3 "register_operand" "r")
> +                          (match_operand:GPI 4 
> "aarch64_simd_shift_imm_<mode>" "n"))))]
> +  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]),
> +                                   UINTVAL (operands[4]),
> +                                   HOST_WIDE_INT_M1U << UINTVAL 
> (operands[4]) )"
> +{
> +  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - UINTVAL 
> (operands[4]));
> +  return "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %5";
> +}
> +  [(set_attr "type" "bfm")]
> +)
> +
> +(define_insn "*aarch64_bfi<GPI:mode>4_noand_alt"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +        (ior:GPI (ashift:GPI
> +                          (match_operand:GPI 1 "register_operand" "r")
> +                          (match_operand:GPI 2 
> "aarch64_simd_shift_imm_<mode>" "n"))
> +              (and:GPI (match_operand:GPI 3 "register_operand" "0")
> +                          (match_operand:GPI 4 "const_int_operand" "n"))))]
> +  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[4]),
> +                                   UINTVAL (operands[2]),
> +                                   HOST_WIDE_INT_M1U << UINTVAL 
> (operands[2]) )"
> +{
> +  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - UINTVAL 
> (operands[2]));
> +  return "bfi\t%<GPI:w>0, %<GPI:w>1, %2, %5";
> +}
> +  [(set_attr "type" "bfm")]
> +)
> +
> +;; Like *aarch64_bfi<GPI:mode>5_shift but with no shifting, we are just
> +;; copying the least significant bits of OP3 to OP0.
> +
> +(define_insn "*aarch64_bfi<GPI:mode>4_noshift"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
> +                          (match_operand:GPI 2 "const_int_operand" "n"))
> +                 (and:GPI (match_operand:GPI 3 "register_operand" "r")
> +                          (match_operand:GPI 4 "const_int_operand" "n"))))]
> +  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0,
> +                                   UINTVAL (operands[4]))"
> +  "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4"
> +  [(set_attr "type" "bfm")]
> +)
> +
>  (define_insn "*extr_insv_lower_reg<mode>"
>    [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
>                         (match_operand 1 "const_int_operand" "n")
> 
> 
> combine-test.patch
> 
> diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c 
> b/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c
> index e69de29..145282d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +int f1(int x, int y)
> +{
> +  return (y & 0xfffffff) | (((x <<28) & 0xf0000000));
> +}
> +
> +
> +int f2(int x, int y)
> +{
> +  return (((x <<28) & 0xf0000000)) | (y & 0xfffffff);
> +}
> +
> +/* { dg-final { scan-assembler-times {\tbfi\t} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c 
> b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
> index 109f989..a2fb31c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
> +++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
> @@ -114,4 +114,5 @@ main (void)
>    return 0;
>  }
>  
> -/* { dg-final { scan-assembler-times "bfxil\\t" 18 } } */
> +/* { dg-final { scan-assembler-times "bfxil\\t" 7 } } */
> +/* { dg-final { scan-assembler-times "bfi\\t" 11 } } */
> 

Reply via email to