Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi, > > As subject, this patch renames the two-source-register TBL/TBX RTL > patterns so that their names better reflect what they do, rather than > confusing them with tbl3 or tbx4 patterns. Also use the correct > "neon_tbl2" type attribute for both patterns. > > Rename single-source-register TBL/TBX patterns for consistency. > > Bootstrapped and regression tested on aarch64-none-linux-gnu - no > issues. > > Ok for master?
OK. Nice clean-up, thanks. Richard > Thanks, > Jonathan > > --- > > gcc/ChangeLog: > > 2021-07-08 Jonathan Wright <jonathan.wri...@arm.com> > > * config/aarch64/aarch64-simd-builtins.def: Use two variant > generators for all TBL/TBX intrinsics and rename to > consistent forms: qtbl[1234] or qtbx[1234]. > * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): > Rename to... > (aarch64_qtbl1<mode>): This. > (aarch64_tbx1<mode>): Rename to... > (aarch64_qtbx1<mode>): This. > (aarch64_tbl2v16qi): Delete. > (aarch64_tbl3<mode>): Rename to... > (aarch64_qtbl2<mode>): This. > (aarch64_tbx4<mode>): Rename to... > (aarch64_qtbx2<mode>): This. > * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use > renamed qtbl1 and qtbl2 RTL patterns. > * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 > RTL pattern. > (vqtbl1_s8): Likewise. > (vqtbl1_u8): Likewise. > (vqtbl1q_p8): Likewise. > (vqtbl1q_s8): Likewise. > (vqtbl1q_u8): Likewise. > (vqtbx1_s8): Use renamed qtbx1 RTL pattern. > (vqtbx1_u8): Likewise. > (vqtbx1_p8): Likewise. > (vqtbx1q_s8): Likewise. > (vqtbx1q_u8): Likewise. > (vqtbx1q_p8): Likewise. > (vtbl1_s8): Use renamed qtbl1 RTL pattern. > (vtbl1_u8): Likewise. > (vtbl1_p8): Likewise. > (vtbl2_s8): Likewise > (vtbl2_u8): Likewise. > (vtbl2_p8): Likewise. > (vtbl3_s8): Use renamed qtbl2 RTL pattern. > (vtbl3_u8): Likewise. > (vtbl3_p8): Likewise. > (vtbl4_s8): Likewise. > (vtbl4_u8): Likewise. > (vtbl4_p8): Likewise. > (vtbx2_s8): Use renamed qtbx2 RTL pattern. > (vtbx2_u8): Likewise. > (vtbx2_p8): Likewise. > (vqtbl2_s8): Use renamed qtbl2 RTL pattern. > (vqtbl2_u8): Likewise. > (vqtbl2_p8): Likewise. > (vqtbl2q_s8): Likewise. > (vqtbl2q_u8): Likewise. > (vqtbl2q_p8): Likewise. > (vqtbx2_s8): Use renamed qtbx2 RTL pattern. > (vqtbx2_u8): Likewise. > (vqtbx2_p8): Likewise. > (vqtbx2q_s8): Likewise. > (vqtbx2q_u8): Likewise. > (vqtbx2q_p8): Likewise. > (vtbx4_s8): Likewise. > (vtbx4_u8): Likewise. > (vtbx4_p8): Likewise. > > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def > b/gcc/config/aarch64/aarch64-simd-builtins.def > index > 063f503ebd96657f017dfaa067cb231991376bda..b7f1237b1ffd0d4ca283c853be1cc94b9fc35260 > 100644 > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > @@ -718,37 +718,31 @@ > VAR1 (BINOPP, crypto_pmull, 0, NONE, di) > VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di) > > - /* Implemented by aarch64_tbl3<mode>. */ > - VAR1 (BINOP, tbl3, 0, NONE, v8qi) > - VAR1 (BINOP, tbl3, 0, NONE, v16qi) > + /* Implemented by aarch64_qtbl1<mode>. */ > + VAR2 (BINOP, qtbl1, 0, NONE, v8qi, v16qi) > + VAR2 (BINOPU, qtbl1, 0, NONE, v8qi, v16qi) > > - /* Implemented by aarch64_tbl1<mode>. */ > - VAR2 (BINOP, tbl1, 0, NONE, v8qi, v16qi) > - VAR2 (BINOPU, tbl1, 0, NONE, v8qi, v16qi) > + /* Implemented by aarch64_qtbl2<mode>. */ > + VAR2 (BINOP, qtbl2, 0, NONE, v8qi, v16qi) > > /* Implemented by aarch64_qtbl3<mode>. */ > - VAR1 (BINOP, qtbl3, 0, NONE, v8qi) > - VAR1 (BINOP, qtbl3, 0, NONE, v16qi) > + VAR2 (BINOP, qtbl3, 0, NONE, v8qi, v16qi) > > /* Implemented by aarch64_qtbl4<mode>. */ > - VAR1 (BINOP, qtbl4, 0, NONE, v8qi) > - VAR1 (BINOP, qtbl4, 0, NONE, v16qi) > + VAR2 (BINOP, qtbl4, 0, NONE, v8qi, v16qi) > > - /* Implemented by aarch64_tbx1<mode>. */ > - VAR2 (TERNOP, tbx1, 0, NONE, v8qi, v16qi) > - VAR2 (TERNOPU, tbx1, 0, NONE, v8qi, v16qi) > + /* Implemented by aarch64_qtbx1<mode>. */ > + VAR2 (TERNOP, qtbx1, 0, NONE, v8qi, v16qi) > + VAR2 (TERNOPU, qtbx1, 0, NONE, v8qi, v16qi) > > - /* Implemented by aarch64_tbx4<mode>. */ > - VAR1 (TERNOP, tbx4, 0, NONE, v8qi) > - VAR1 (TERNOP, tbx4, 0, NONE, v16qi) > + /* Implemented by aarch64_qtbx2<mode>. */ > + VAR2 (TERNOP, qtbx2, 0, NONE, v8qi, v16qi) > > /* Implemented by aarch64_qtbx3<mode>. */ > - VAR1 (TERNOP, qtbx3, 0, NONE, v8qi) > - VAR1 (TERNOP, qtbx3, 0, NONE, v16qi) > + VAR2 (TERNOP, qtbx3, 0, NONE, v8qi, v16qi) > > /* Implemented by aarch64_qtbx4<mode>. */ > - VAR1 (TERNOP, qtbx4, 0, NONE, v8qi) > - VAR1 (TERNOP, qtbx4, 0, NONE, v16qi) > + VAR2 (TERNOP, qtbx4, 0, NONE, v8qi, v16qi) > > /* Builtins for ARMv8.1-A Adv.SIMD instructions. */ > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > 74890989cb3045798bf8d0241467eaaf72238297..7332a735d35846e0d9375ad2686ed7ecdb09cd29 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -6948,7 +6948,7 @@ > { > rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); > rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); > - emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); > + emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2])); > } > DONE; > } > @@ -7425,7 +7425,7 @@ > DONE; > }) > > -(define_insn "aarch64_tbl1<mode>" > +(define_insn "aarch64_qtbl1<mode>" > [(set (match_operand:VB 0 "register_operand" "=w") > (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") > (match_operand:VB 2 "register_operand" "w")] > @@ -7435,7 +7435,7 @@ > [(set_attr "type" "neon_tbl1<q>")] > ) > > -(define_insn "aarch64_tbx1<mode>" > +(define_insn "aarch64_qtbx1<mode>" > [(set (match_operand:VB 0 "register_operand" "=w") > (unspec:VB [(match_operand:VB 1 "register_operand" "0") > (match_operand:V16QI 2 "register_operand" "w") > @@ -7448,27 +7448,17 @@ > > ;; Two source registers. > > -(define_insn "aarch64_tbl2v16qi" > - [(set (match_operand:V16QI 0 "register_operand" "=w") > - (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") > - (match_operand:V16QI 2 "register_operand" "w")] > - UNSPEC_TBL))] > - "TARGET_SIMD" > - "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" > - [(set_attr "type" "neon_tbl2_q")] > -) > - > -(define_insn "aarch64_tbl3<mode>" > +(define_insn "aarch64_qtbl2<mode>" > [(set (match_operand:VB 0 "register_operand" "=w") > (unspec:VB [(match_operand:OI 1 "register_operand" "w") > (match_operand:VB 2 "register_operand" "w")] > UNSPEC_TBL))] > "TARGET_SIMD" > "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>" > - [(set_attr "type" "neon_tbl3")] > + [(set_attr "type" "neon_tbl2")] > ) > > -(define_insn "aarch64_tbx4<mode>" > +(define_insn "aarch64_qtbx2<mode>" > [(set (match_operand:VB 0 "register_operand" "=w") > (unspec:VB [(match_operand:VB 1 "register_operand" "0") > (match_operand:OI 2 "register_operand" "w") > @@ -7476,7 +7466,7 @@ > UNSPEC_TBX))] > "TARGET_SIMD" > "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>" > - [(set_attr "type" "neon_tbl4")] > + [(set_attr "type" "neon_tbl2")] > ) > > ;; Three source registers. > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > f5b25a7f7041645921e6ad85714efda73b993492..3bdf19d71b54d0ade8e5648323f6e1f012bc4f8f > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -22047,11 +22047,11 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx > op1, rtx sel) > /* Expand the argument to a V16QI mode by duplicating it. */ > rtx pair = gen_reg_rtx (V16QImode); > emit_insn (gen_aarch64_combinev8qi (pair, op0, op0)); > - emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); > + emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel)); > } > else > { > - emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel)); > + emit_insn (gen_aarch64_qtbl1v16qi (target, op0, sel)); > } > } > else > @@ -22062,13 +22062,13 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx > op1, rtx sel) > { > pair = gen_reg_rtx (V16QImode); > emit_insn (gen_aarch64_combinev8qi (pair, op0, op1)); > - emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); > + emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel)); > } > else > { > pair = gen_reg_rtx (OImode); > emit_insn (gen_aarch64_combinev16qi (pair, op0, op1)); > - emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel)); > + emit_insn (gen_aarch64_qtbl2v16qi (target, pair, sel)); > } > } > } > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index > 00d76ea937ace5763746478cbdfadf6479e0b15a..1048d7c7eaac14554142eaa7544159a50929b7f1 > 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -9534,90 +9534,90 @@ __extension__ extern __inline poly8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbl1_p8 (poly8x16_t __tab, uint8x8_t __idx) > { > - return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __tab, > - (int8x8_t) __idx); > + return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __tab, > + (int8x8_t) __idx); > } > > __extension__ extern __inline int8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbl1_s8 (int8x16_t __tab, uint8x8_t __idx) > { > - return __builtin_aarch64_tbl1v8qi (__tab, (int8x8_t) __idx); > + return __builtin_aarch64_qtbl1v8qi (__tab, (int8x8_t) __idx); > } > > __extension__ extern __inline uint8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbl1_u8 (uint8x16_t __tab, uint8x8_t __idx) > { > - return __builtin_aarch64_tbl1v8qi_uuu (__tab, __idx); > + return __builtin_aarch64_qtbl1v8qi_uuu (__tab, __idx); > } > > __extension__ extern __inline poly8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbl1q_p8 (poly8x16_t __tab, uint8x16_t __idx) > { > - return (poly8x16_t) __builtin_aarch64_tbl1v16qi ((int8x16_t) __tab, > - (int8x16_t) __idx); > + return (poly8x16_t) __builtin_aarch64_qtbl1v16qi ((int8x16_t) __tab, > + (int8x16_t) __idx); > } > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbl1q_s8 (int8x16_t __tab, uint8x16_t __idx) > { > - return __builtin_aarch64_tbl1v16qi (__tab, (int8x16_t) __idx); > + return __builtin_aarch64_qtbl1v16qi (__tab, (int8x16_t) __idx); > } > > __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbl1q_u8 (uint8x16_t __tab, uint8x16_t __idx) > { > - return __builtin_aarch64_tbl1v16qi_uuu (__tab, __idx); > + return __builtin_aarch64_qtbl1v16qi_uuu (__tab, __idx); > } > > __extension__ extern __inline int8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx) > { > - return __builtin_aarch64_tbx1v8qi (__r, __tab, (int8x8_t) __idx); > + return __builtin_aarch64_qtbx1v8qi (__r, __tab, (int8x8_t) __idx); > } > > __extension__ extern __inline uint8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx) > { > - return __builtin_aarch64_tbx1v8qi_uuuu (__r, __tab, __idx); > + return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __tab, __idx); > } > > __extension__ extern __inline poly8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx) > { > - return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r, > - (int8x16_t) __tab, > - (int8x8_t) __idx); > + return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r, > + (int8x16_t) __tab, > + (int8x8_t) __idx); > } > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx) > { > - return __builtin_aarch64_tbx1v16qi (__r, __tab, (int8x16_t) __idx); > + return __builtin_aarch64_qtbx1v16qi (__r, __tab, (int8x16_t) __idx); > } > > __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx) > { > - return __builtin_aarch64_tbx1v16qi_uuuu (__r, __tab, __idx); > + return __builtin_aarch64_qtbx1v16qi_uuuu (__r, __tab, __idx); > } > > __extension__ extern __inline poly8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx) > { > - return (poly8x16_t) __builtin_aarch64_tbx1v16qi ((int8x16_t) __r, > - (int8x16_t) __tab, > - (int8x16_t) __idx); > + return (poly8x16_t) __builtin_aarch64_qtbx1v16qi ((int8x16_t) __r, > + (int8x16_t) __tab, > + (int8x16_t) __idx); > } > > /* V7 legacy table intrinsics. */ > @@ -9628,7 +9628,7 @@ vtbl1_s8 (int8x8_t __tab, int8x8_t __idx) > { > int8x16_t __temp = vcombine_s8 (__tab, > vcreate_s8 (__AARCH64_UINT64_C (0x0))); > - return __builtin_aarch64_tbl1v8qi (__temp, __idx); > + return __builtin_aarch64_qtbl1v8qi (__temp, __idx); > } > > __extension__ extern __inline uint8x8_t > @@ -9637,7 +9637,7 @@ vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx) > { > uint8x16_t __temp = vcombine_u8 (__tab, > vcreate_u8 (__AARCH64_UINT64_C (0x0))); > - return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx); > + return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx); > } > > __extension__ extern __inline poly8x8_t > @@ -9646,8 +9646,8 @@ vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx) > { > poly8x16_t __temp = vcombine_p8 (__tab, > vcreate_p8 (__AARCH64_UINT64_C (0x0))); > - return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp, > - (int8x8_t) __idx); > + return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp, > + (int8x8_t) __idx); > } > > __extension__ extern __inline int8x8_t > @@ -9655,7 +9655,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, > __artificial__)) > vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx) > { > int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); > - return __builtin_aarch64_tbl1v8qi (__temp, __idx); > + return __builtin_aarch64_qtbl1v8qi (__temp, __idx); > } > > __extension__ extern __inline uint8x8_t > @@ -9663,7 +9663,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, > __artificial__)) > vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx) > { > uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); > - return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx); > + return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx); > } > > __extension__ extern __inline poly8x8_t > @@ -9671,15 +9671,14 @@ __attribute__ ((__always_inline__, __gnu_inline__, > __artificial__)) > vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx) > { > poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); > - return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp, > - (int8x8_t) __idx); > + return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp, > + (int8x8_t) __idx); > } > > __extension__ extern __inline int8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) > { > - int8x8_t __result; > int8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); > @@ -9688,15 +9687,13 @@ vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = __builtin_aarch64_tbl3v8qi (__o, __idx); > - return __result; > + return __builtin_aarch64_qtbl2v8qi (__o, __idx); > } > > __extension__ extern __inline uint8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) > { > - uint8x8_t __result; > uint8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); > @@ -9705,15 +9702,13 @@ vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); > - return __result; > + return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); > } > > __extension__ extern __inline poly8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) > { > - poly8x8_t __result; > poly8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); > @@ -9722,15 +9717,13 @@ vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); > - return __result; > + return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); > } > > __extension__ extern __inline int8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) > { > - int8x8_t __result; > int8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); > @@ -9739,15 +9732,13 @@ vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = __builtin_aarch64_tbl3v8qi (__o, __idx); > - return __result; > + return __builtin_aarch64_qtbl2v8qi (__o, __idx); > } > > __extension__ extern __inline uint8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) > { > - uint8x8_t __result; > uint8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); > @@ -9756,15 +9747,13 @@ vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); > - return __result; > + return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); > } > > __extension__ extern __inline poly8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) > { > - poly8x8_t __result; > poly8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); > @@ -9773,8 +9762,7 @@ vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); > - return __result; > + return(poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); > } > > __extension__ extern __inline int8x8_t > @@ -9782,7 +9770,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, > __artificial__)) > vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx) > { > int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); > - return __builtin_aarch64_tbx1v8qi (__r, __temp, __idx); > + return __builtin_aarch64_qtbx1v8qi (__r, __temp, __idx); > } > > __extension__ extern __inline uint8x8_t > @@ -9790,7 +9778,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, > __artificial__)) > vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx) > { > uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); > - return __builtin_aarch64_tbx1v8qi_uuuu (__r, __temp, __idx); > + return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __temp, __idx); > } > > __extension__ extern __inline poly8x8_t > @@ -9798,9 +9786,9 @@ __attribute__ ((__always_inline__, __gnu_inline__, > __artificial__)) > vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx) > { > poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); > - return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r, > - (int8x16_t) __temp, > - (int8x8_t) __idx); > + return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r, > + (int8x16_t) __temp, > + (int8x8_t) __idx); > } > > /* End of temporary inline asm. */ > @@ -23335,7 +23323,7 @@ vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); > - return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); > + return __builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); > } > > __extension__ extern __inline uint8x8_t > @@ -23345,7 +23333,7 @@ vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); > + return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); > } > > __extension__ extern __inline poly8x8_t > @@ -23355,7 +23343,7 @@ vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); > + return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); > } > > __extension__ extern __inline int8x16_t > @@ -23365,7 +23353,7 @@ vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); > + return __builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); > } > > __extension__ extern __inline uint8x16_t > @@ -23375,7 +23363,7 @@ vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); > + return (uint8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); > } > > __extension__ extern __inline poly8x16_t > @@ -23385,7 +23373,7 @@ vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); > + return (poly8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); > } > > /* vqtbl3 */ > @@ -23539,7 +23527,7 @@ vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t > __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); > - return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx); > + return __builtin_aarch64_qtbx2v8qi (__r, __o, (int8x8_t)__idx); > } > > __extension__ extern __inline uint8x8_t > @@ -23549,8 +23537,8 @@ vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, > uint8x8_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, > - (int8x8_t)__idx); > + return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, > + (int8x8_t)__idx); > } > > __extension__ extern __inline poly8x8_t > @@ -23560,8 +23548,8 @@ vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, > uint8x8_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, > - (int8x8_t)__idx); > + return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, > + (int8x8_t)__idx); > } > > __extension__ extern __inline int8x16_t > @@ -23571,7 +23559,7 @@ vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, > uint8x16_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); > - return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx); > + return __builtin_aarch64_qtbx2v16qi (__r, __o, (int8x16_t)__idx); > } > > __extension__ extern __inline uint8x16_t > @@ -23581,7 +23569,7 @@ vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, > uint8x16_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, > + return (uint8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o, > (int8x16_t)__idx); > } > > @@ -23592,8 +23580,8 @@ vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, > uint8x16_t __idx) > __builtin_aarch64_simd_oi __o; > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); > - return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, > - (int8x16_t)__idx); > + return (poly8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o, > + (int8x16_t)__idx); > } > > /* vqtbx3 */ > @@ -28511,7 +28499,6 @@ __extension__ extern __inline int8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) > { > - int8x8_t __result; > int8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); > @@ -28520,15 +28507,13 @@ vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t > __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); > - return __result; > + return __builtin_aarch64_qtbx2v8qi (__r, __o, __idx); > } > > __extension__ extern __inline uint8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) > { > - uint8x8_t __result; > uint8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); > @@ -28537,16 +28522,14 @@ vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, > uint8x8_t __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, > - (int8x8_t)__idx); > - return __result; > + return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, > + (int8x8_t)__idx); > } > > __extension__ extern __inline poly8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) > { > - poly8x8_t __result; > poly8x16x2_t __temp; > __builtin_aarch64_simd_oi __o; > __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); > @@ -28555,9 +28538,8 @@ vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t > __idx) > (int8x16_t) __temp.val[0], 0); > __o = __builtin_aarch64_set_qregoiv16qi (__o, > (int8x16_t) __temp.val[1], 1); > - __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, > - (int8x8_t)__idx); > - return __result; > + return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, > + (int8x8_t)__idx); > } > > /* vtrn */