Hi,

This patch fixes up the lane access patterns to be symmetric to the order in which vectors are stored in registers.

Tested for aarch64-none-elf and aarch64_be-none-elf. OK for trunk?

Thanks,
Tejas Belagod
ARM.

2013-11-21  Tejas Belagod  <tejas.bela...@arm.com>

gcc/
        * config/aarch64/aarch64-simd.md (aarch64_simd_vec_set<mode>): Adjust
        for big-endian element order.
        (aarch64_simd_vec_setv2di): Likewise.
        (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>,
        *aarch64_get_lane_zero_extendsi<mode>, aarch64_get_lane): Likewise.
        (vec_extract): Expand using aarch64_get_lane.
        * config/aarch64/aarch64.h (ENDIAN_LANE_N): New.
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index b9ebdf5..b4a0a5b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -428,9 +428,19 @@
            (match_operand:VQ_S 3 "register_operand" "0,0")
            (match_operand:SI 2 "immediate_operand" "i,i")))]
   "TARGET_SIMD"
-  "@
-   ins\t%0.<Vetype>[%p2], %w1
-   ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"
+  {
+   int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
+   operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
+   switch (which_alternative)
+     {
+     case 0:
+       return "ins\\t%0.<Vetype>[%p2], %w1";
+     case 1:
+       return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+     default:
+       gcc_unreachable ();
+     }
+  }
   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>")]
 )
 
@@ -692,9 +702,19 @@
            (match_operand:V2DI 3 "register_operand" "0,0")
            (match_operand:SI 2 "immediate_operand" "i,i")))]
   "TARGET_SIMD"
-  "@
-   ins\t%0.d[%p2], %1
-   ins\\t%0.d[%p2], %1.d[0]"
+  {
+    int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
+    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
+    switch (which_alternative)
+      {
+      case 0:
+       return "ins\\t%0.d[%p2], %1";
+      case 1:
+        return "ins\\t%0.d[%p2], %1.d[0]";
+      default:
+       gcc_unreachable ();
+      }
+  }
   [(set_attr "type" "neon_from_gp, neon_ins_q")]
 )
 
@@ -719,7 +739,12 @@
            (match_operand:VDQF 3 "register_operand" "0")
            (match_operand:SI 2 "immediate_operand" "i")))]
   "TARGET_SIMD"
-  "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+  {
+    int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
+
+    operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
+    return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+  }
   [(set_attr "type" "neon_ins<q>")]
 )
 
@@ -2022,7 +2047,10 @@
            (match_operand:VDQQH 1 "register_operand" "w")
            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
-  "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
+  }
   [(set_attr "type" "neon_to_gp<q>")]
 )
 
@@ -2033,22 +2061,36 @@
            (match_operand:VDQQH 1 "register_operand" "w")
            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_SIMD"
-  "umov\\t%w0, %1.<Vetype>[%2]"
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    return "umov\\t%w0, %1.<Vetype>[%2]";
+  }
   [(set_attr "type" "neon_to_gp<q>")]
 )
 
 ;; Lane extraction of a value, neither sign nor zero extension
 ;; is guaranteed so upper bits should be considered undefined.
 (define_insn "aarch64_get_lane<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
+  [(set (match_operand:<VEL> 0 "register_operand" "=r, w, Utv")
        (vec_select:<VEL>
-         (match_operand:VALL 1 "register_operand" "w, w")
-         (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])))]
+         (match_operand:VALL 1 "register_operand" "w, w, w")
+         (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
   "TARGET_SIMD"
-  "@
-   umov\\t%<vwcore>0, %1.<Vetype>[%2]
-   dup\\t%<Vetype>0, %1.<Vetype>[%2]"
-  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>")]
+  {
+    operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+    switch (which_alternative)
+      {
+       case 0:
+         return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
+       case 1:
+         return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
+       case 2:
+         return "st1\\t{%1.<Vetype>}[%2], %0";
+       default:
+         gcc_unreachable ();
+      }
+  }
+  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
 )
 
 (define_expand "aarch64_get_lanedi"
@@ -4028,16 +4070,13 @@
 
 ;; Standard pattern name vec_extract<mode>.
 
-(define_insn "vec_extract<mode>"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, 
Utv")
-       (vec_select:<VEL>
-         (match_operand:VALL 1 "register_operand" "w, w, w")
-         (parallel [(match_operand:SI 2 "immediate_operand" "i,i,i")])))]
+(define_expand "vec_extract<mode>"
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
+   (match_operand:VALL 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_SIMD"
-  "@
-  umov\\t%<vw>0, %1.<Vetype>[%2]
-  dup\\t%<Vetype>0, %1.<Vetype>[%2]
-  st1\\t{%1.<Vetype>}[%2], %0"
-  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
-)
-
+{
+    emit_insn
+      (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
+    DONE;
+})
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 8b55a7b..ab5e45c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -854,4 +854,7 @@ extern enum aarch64_code_model aarch64_cmodel;
   ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
    || (MODE) == V4SFmode || (MODE) == V2DImode || mode == V2DFmode)
 
+#define ENDIAN_LANE_N(mode, n)  \
+  (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
+
 #endif /* GCC_AARCH64_H */

Reply via email to