Here is my attempt at creating a couple of new instructions to
generate more bfi instructions on aarch64.  I haven't finished
testing this but it helps with gcc.target/aarch64/combine_bfi_1.c.

Before I went any further with it I wanted to see if anyone
else was working on something like this and if this seems like
a reasonable approach.

Steve Ellcey
sell...@marvell.com


2018-01-24  Steve Ellcey  <sell...@marvell.com>

        PR rtl-optimization/87763
        * config/aarch64/aarch64-protos.h
        (aarch64_masks_and_shift_for_aarch64_bfi_p): New prototype.
        * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_aarch64_bfi_p):
        New function.
        * config/aarch64/aarch64.md (*aarch64_bfi<GPI:mode>4_shift):
        New instruction.
        (*aarch64_bfi<GPI:mode>4_noshift): Ditto.


diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index b035e35..ec90053 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -429,6 +429,7 @@ bool aarch64_label_mentioned_p (rtx);
 void aarch64_declare_function_name (FILE *, const char*, tree);
 bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
+bool aarch64_masks_and_shift_for_aarch64_bfi_p (scalar_int_mode, rtx, rtx, rtx);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
 bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5df5a8b..69cc69f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9294,6 +9294,44 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
 	     & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
 }
 
+/* Return true if the masks and a shift amount from an RTX of the form
+   ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
+   a BFI instruction of mode MODE.  See *arch64_bfi patterns.  */
+
+bool
+aarch64_masks_and_shift_for_aarch64_bfi_p (scalar_int_mode mode, rtx mask1,
+					   rtx shft_amnt, rtx mask2)
+{
+  unsigned HOST_WIDE_INT m1, m2, s, t;
+
+  if (!CONST_INT_P (mask1) || !CONST_INT_P (mask2) || !CONST_INT_P (shft_amnt))
+    return false;
+
+  m1 = UINTVAL (mask1);
+  m2 = UINTVAL (mask2);
+  s = UINTVAL (shft_amnt);
+
+  /* Verify that there is no overlap in what bits are set in the two masks.  */
+  if ((m1 + m2 + 1) != 0)
+    return false;
+
+  /* Verify that the shift amount is less than the mode size.  */
+  if (s >= GET_MODE_BITSIZE (mode))
+    return false;
+
+  /* Verify that the mask being shifted is contigious and would be in the
+     least significant bits after shifting by creating a mask 't' based on
+     the number of bits set in mask2 and the shift amount for mask2 and
+     comparing that to the actual mask2.  */
+  t = popcount_hwi (m2);
+  t = (1 << t) - 1;
+  t = t << s;
+  if (t != m2)
+    return false;
+  
+  return true;
+}
+
 /* Calculate the cost of calculating X, storing it in *COST.  Result
    is true if the total cost of the operation has now been calculated.  */
 static bool
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b7f6fe0..e1f526b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5476,6 +5476,41 @@
   [(set_attr "type" "bfm")]
 )
 
+;;  Match a bfi instruction where the shift of OP3 means that we are
+;;  actually copying the least significant bits of OP3 into OP0 by way
+;;  of the AND masks and the IOR instruction.
+
+(define_insn "*aarch64_bfi<GPI:mode>4_shift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (ashift:GPI
+                           (match_operand:GPI 3 "register_operand" "r")
+                           (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n"))
+                          (match_operand:GPI 5 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_aarch64_bfi_p (<MODE>mode, operands[2], operands[4], operands[5])"
+{
+  return "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5";
+}
+  [(set_attr "type" "bfm")]
+)
+
+;; Like the above instruction but with no shifting, we are just copying the
+;; least significant bits of OP3 to OP0.
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noshift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (match_operand:GPI 3 "register_operand" "r")
+                          (match_operand:GPI 4 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_aarch64_bfi_p (<MODE>mode, operands[2], const0_rtx, operands[4])"
+{
+  return "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4";
+}
+  [(set_attr "type" "bfm")]
+)
+
 (define_insn "*extr_insv_lower_reg<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
 			  (match_operand 1 "const_int_operand" "n")

Reply via email to