Both a ping and a repost with the new VNx names. See: https://gcc.gnu.org/ml/gcc-patches/2017-11/msg00592.html
for the full series. Thanks, Richard --- This patch adds support for SVE LD[234], ST[234] and associated structure modes. Unlike Advanced SIMD, these modes are extra-long vector modes instead of integer modes. 2017-11-06 Richard Sandiford <richard.sandif...@linaro.org> Alan Hayward <alan.hayw...@arm.com> David Sherwood <david.sherw...@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Define x2, x3 and x4 vector modes for SVE. * config/aarch64/aarch64-protos.h (aarch64_sve_struct_memory_operand_p): Declare. * config/aarch64/iterators.md (SVE_STRUCT): New mode iterator. (vector_count, insn_length, VSINGLE, vsingle): New mode attributes. (VPRED, vpred): Handle SVE structure modes. * config/aarch64/constraints.md (Utx): New constraint. * config/aarch64/predicates.md (aarch64_sve_struct_memory_operand) (aarch64_sve_struct_nonimmediate_operand): New predicates. * config/aarch64/aarch64.md (UNSPEC_LDN, UNSPEC_STN): New unspecs. * config/aarch64/aarch64-sve.md (mov<mode>, *aarch64_sve_mov<mode>_le) (*aarch64_sve_mov<mode>_be, pred_mov<mode>): New patterns for structure modes. Split into pieces after RA. (vec_load_lanes<mode><vsingle>, vec_mask_load_lanes<mode><vsingle>) (vec_store_lanes<mode><vsingle>, vec_mask_store_lanes<mode><vsingle>): New patterns. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Handle SVE structure modes. (aarch64_classify_address): Likewise. (sizetochar): Move earlier in file. (aarch64_print_operand): Handle SVE register lists. (aarch64_array_mode): New function. (aarch64_sve_struct_memory_operand_p): Likewise. (TARGET_ARRAY_MODE): Redefine. Index: gcc/config/aarch64/aarch64-modes.def =================================================================== --- gcc/config/aarch64/aarch64-modes.def 2017-12-22 16:00:58.471012631 +0000 +++ gcc/config/aarch64/aarch64-modes.def 2017-12-22 16:01:42.042358758 +0000 @@ -87,6 +87,9 @@ INT_MODE (XI, 64); /* Give SVE vectors the names normally used for 256-bit vectors. The actual number depends on command-line flags. */ SVE_MODES (1, VNx16, VNx8, VNx4, VNx2) +SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) +SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) +SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) /* Quad float: 128-bit floating mode for long doubles. */ FLOAT_MODE (TF, 16, ieee_quad_format); Index: gcc/config/aarch64/aarch64-protos.h =================================================================== --- gcc/config/aarch64/aarch64-protos.h 2017-12-22 16:00:58.471012631 +0000 +++ gcc/config/aarch64/aarch64-protos.h 2017-12-22 16:01:42.043358720 +0000 @@ -432,6 +432,7 @@ rtx aarch64_simd_gen_const_vector_dup (m bool aarch64_simd_mem_operand_p (rtx); bool aarch64_sve_ld1r_operand_p (rtx); bool aarch64_sve_ldr_operand_p (rtx); +bool aarch64_sve_struct_memory_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); rtx aarch64_tls_get_addr (void); tree aarch64_fold_builtin (tree, int, tree *, bool); Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md 2017-12-22 16:00:58.477012402 +0000 +++ gcc/config/aarch64/iterators.md 2017-12-22 16:01:42.045358644 +0000 @@ -250,6 +250,14 @@ (define_mode_iterator VMUL_CHANGE_NLANES (define_mode_iterator SVE_ALL [VNx16QI VNx8HI VNx4SI VNx2DI VNx8HF VNx4SF VNx2DF]) +;; All SVE vector structure modes. +(define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI + VNx16HF VNx8SF VNx4DF + VNx48QI VNx24HI VNx12SI VNx6DI + VNx24HF VNx12SF VNx6DF + VNx64QI VNx32HI VNx16SI VNx8DI + VNx32HF VNx16SF VNx8DF]) + ;; All SVE vector modes that have 8-bit or 16-bit elements. (define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF]) @@ -587,9 +595,16 @@ (define_mode_attr Vetype [(V8QI "b") (V1 ;; Equivalent of "size" for a vector element. (define_mode_attr Vesize [(VNx16QI "b") - (VNx8HI "h") (VNx8HF "h") - (VNx4SI "w") (VNx4SF "w") - (VNx2DI "d") (VNx2DF "d")]) + (VNx8HI "h") (VNx8HF "h") + (VNx4SI "w") (VNx4SF "w") + (VNx2DI "d") (VNx2DF "d") + (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") + (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") + (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") + (VNx8SI "w") (VNx12SI "w") (VNx16SI "w") + (VNx8SF "w") (VNx12SF "w") (VNx16SF "w") + (VNx4DI "d") (VNx6DI "d") (VNx8DI "d") + (VNx4DF "d") (VNx6DF "d") (VNx8DF "d")]) ;; Vetype is used everywhere in scheduling type and assembly output, ;; sometimes they are not the same, for example HF modes on some @@ -957,17 +972,93 @@ (define_mode_attr insn_count [(OI "8") ( ;; No need of iterator for -fPIC as it use got_lo12 for both modes. (define_mode_attr got_modifier [(SI "gotpage_lo14") (DI "gotpage_lo15")]) -;; The predicate mode associated with an SVE data mode. +;; The number of subvectors in an SVE_STRUCT. +(define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2") + (VNx8SI "2") (VNx4DI "2") + (VNx16HF "2") (VNx8SF "2") (VNx4DF "2") + (VNx48QI "3") (VNx24HI "3") + (VNx12SI "3") (VNx6DI "3") + (VNx24HF "3") (VNx12SF "3") (VNx6DF "3") + (VNx64QI "4") (VNx32HI "4") + (VNx16SI "4") (VNx8DI "4") + (VNx32HF "4") (VNx16SF "4") (VNx8DF "4")]) + +;; The number of instruction bytes needed for an SVE_STRUCT move. This is +;; equal to vector_count * 4. +(define_mode_attr insn_length [(VNx32QI "8") (VNx16HI "8") + (VNx8SI "8") (VNx4DI "8") + (VNx16HF "8") (VNx8SF "8") (VNx4DF "8") + (VNx48QI "12") (VNx24HI "12") + (VNx12SI "12") (VNx6DI "12") + (VNx24HF "12") (VNx12SF "12") (VNx6DF "12") + (VNx64QI "16") (VNx32HI "16") + (VNx16SI "16") (VNx8DI "16") + (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")]) + +;; The type of a subvector in an SVE_STRUCT. +(define_mode_attr VSINGLE [(VNx32QI "VNx16QI") + (VNx16HI "VNx8HI") (VNx16HF "VNx8HF") + (VNx8SI "VNx4SI") (VNx8SF "VNx4SF") + (VNx4DI "VNx2DI") (VNx4DF "VNx2DF") + (VNx48QI "VNx16QI") + (VNx24HI "VNx8HI") (VNx24HF "VNx8HF") + (VNx12SI "VNx4SI") (VNx12SF "VNx4SF") + (VNx6DI "VNx2DI") (VNx6DF "VNx2DF") + (VNx64QI "VNx16QI") + (VNx32HI "VNx8HI") (VNx32HF "VNx8HF") + (VNx16SI "VNx4SI") (VNx16SF "VNx4SF") + (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")]) + +;; ...and again in lower case. +(define_mode_attr vsingle [(VNx32QI "vnx16qi") + (VNx16HI "vnx8hi") (VNx16HF "vnx8hf") + (VNx8SI "vnx4si") (VNx8SF "vnx4sf") + (VNx4DI "vnx2di") (VNx4DF "vnx2df") + (VNx48QI "vnx16qi") + (VNx24HI "vnx8hi") (VNx24HF "vnx8hf") + (VNx12SI "vnx4si") (VNx12SF "vnx4sf") + (VNx6DI "vnx2di") (VNx6DF "vnx2df") + (VNx64QI "vnx16qi") + (VNx32HI "vnx8hi") (VNx32HF "vnx8hf") + (VNx16SI "vnx4si") (VNx16SF "vnx4sf") + (VNx8DI "vnx2di") (VNx8DF "vnx2df")]) + +;; The predicate mode associated with an SVE data mode. For structure modes +;; this is equivalent to the <VPRED> of the subvector mode. (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8HI "VNx8BI") (VNx8HF "VNx8BI") (VNx4SI "VNx4BI") (VNx4SF "VNx4BI") - (VNx2DI "VNx2BI") (VNx2DF "VNx2BI")]) + (VNx2DI "VNx2BI") (VNx2DF "VNx2BI") + (VNx32QI "VNx16BI") + (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") + (VNx8SI "VNx4BI") (VNx8SF "VNx4BI") + (VNx4DI "VNx2BI") (VNx4DF "VNx2BI") + (VNx48QI "VNx16BI") + (VNx24HI "VNx8BI") (VNx24HF "VNx8BI") + (VNx12SI "VNx4BI") (VNx12SF "VNx4BI") + (VNx6DI "VNx2BI") (VNx6DF "VNx2BI") + (VNx64QI "VNx16BI") + (VNx32HI "VNx8BI") (VNx32HF "VNx8BI") + (VNx16SI "VNx4BI") (VNx16SF "VNx4BI") + (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")]) ;; ...and again in lower case. (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8HI "vnx8bi") (VNx8HF "vnx8bi") (VNx4SI "vnx4bi") (VNx4SF "vnx4bi") - (VNx2DI "vnx2bi") (VNx2DF "vnx2bi")]) + (VNx2DI "vnx2bi") (VNx2DF "vnx2bi") + (VNx32QI "vnx16bi") + (VNx16HI "vnx8bi") (VNx16HF "vnx8bi") + (VNx8SI "vnx4bi") (VNx8SF "vnx4bi") + (VNx4DI "vnx2bi") (VNx4DF "vnx2bi") + (VNx48QI "vnx16bi") + (VNx24HI "vnx8bi") (VNx24HF "vnx8bi") + (VNx12SI "vnx4bi") (VNx12SF "vnx4bi") + (VNx6DI "vnx2bi") (VNx6DF "vnx2bi") + (VNx64QI "vnx16bi") + (VNx32HI "vnx8bi") (VNx32HF "vnx4bi") + (VNx16SI "vnx4bi") (VNx16SF "vnx4bi") + (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")]) ;; ------------------------------------------------------------------- ;; Code Iterators Index: gcc/config/aarch64/constraints.md =================================================================== --- gcc/config/aarch64/constraints.md 2017-12-22 16:00:58.476012440 +0000 +++ gcc/config/aarch64/constraints.md 2017-12-22 16:01:42.045358644 +0000 @@ -237,6 +237,12 @@ (define_memory_constraint "Uty" (and (match_code "mem") (match_test "aarch64_sve_ld1r_operand_p (op)"))) +(define_memory_constraint "Utx" + "@internal + An address valid for SVE structure mov patterns (as distinct from + LD[234] and ST[234] patterns)." + (match_operand 0 "aarch64_sve_struct_memory_operand")) + (define_constraint "Ufc" "A floating point constant which can be used with an\ FMOV immediate operation." Index: gcc/config/aarch64/predicates.md =================================================================== --- gcc/config/aarch64/predicates.md 2017-12-22 16:00:58.477012402 +0000 +++ gcc/config/aarch64/predicates.md 2017-12-22 16:01:42.045358644 +0000 @@ -482,6 +482,14 @@ (define_predicate "aarch64_sve_general_o (match_operand 0 "aarch64_sve_ldr_operand") (match_test "aarch64_mov_operand_p (op, mode)")))) +(define_predicate "aarch64_sve_struct_memory_operand" + (and (match_code "mem") + (match_test "aarch64_sve_struct_memory_operand_p (op)"))) + +(define_predicate "aarch64_sve_struct_nonimmediate_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_struct_memory_operand"))) + ;; Doesn't include immediates, since those are handled by the move ;; patterns instead. (define_predicate "aarch64_sve_dup_operand" Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2017-12-22 16:00:58.476012440 +0000 +++ gcc/config/aarch64/aarch64.md 2017-12-22 16:01:42.045358644 +0000 @@ -161,6 +161,8 @@ (define_c_enum "unspec" [ UNSPEC_PACK UNSPEC_FLOAT_CONVERT UNSPEC_WHILE_LO + UNSPEC_LDN + UNSPEC_STN ]) (define_c_enum "unspecv" [ Index: gcc/config/aarch64/aarch64-sve.md =================================================================== --- gcc/config/aarch64/aarch64-sve.md 2017-12-22 16:00:58.471012631 +0000 +++ gcc/config/aarch64/aarch64-sve.md 2017-12-22 16:01:42.043358720 +0000 @@ -189,6 +189,105 @@ (define_insn "maskstore<mode><vpred>" "st1<Vesize>\t%1.<Vetype>, %2, %0" ) +;; SVE structure moves. +(define_expand "mov<mode>" + [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "general_operand"))] + "TARGET_SVE" + { + /* Big-endian loads and stores need to be done via LD1 and ST1; + see the comment at the head of the file for details. */ + if ((MEM_P (operands[0]) || MEM_P (operands[1])) + && BYTES_BIG_ENDIAN) + { + gcc_assert (can_create_pseudo_p ()); + aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); + DONE; + } + + if (CONSTANT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } + } +) + +;; Unpredicated structure moves (little-endian). +(define_insn "*aarch64_sve_mov<mode>_le" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] + "TARGET_SVE && !BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "<insn_length>")] +) + +;; Unpredicated structure moves (big-endian). Memory accesses require +;; secondary reloads. +(define_insn "*aarch64_sve_mov<mode>_le" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") + (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] + "TARGET_SVE && BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "<insn_length>")] +) + +;; Split unpredicated structure moves into pieces. This is the same +;; for both big-endian and little-endian code, although it only needs +;; to handle memory operands for little-endian code. +(define_split + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] + "TARGET_SVE && reload_completed" + [(const_int 0)] + { + rtx dest = operands[0]; + rtx src = operands[1]; + if (REG_P (dest) && REG_P (src)) + aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); + else + for (unsigned int i = 0; i < <vector_count>; ++i) + { + rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + emit_insn (gen_rtx_SET (subdest, subsrc)); + } + DONE; + } +) + +;; Predicated structure moves. This works for both endiannesses but in +;; practice is only useful for big-endian. +(define_insn_and_split "pred_mov<mode>" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") + (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" + "#" + "&& reload_completed" + [(const_int 0)] + { + for (unsigned int i = 0; i < <vector_count>; ++i) + { + rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], + <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], + <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); + } + DONE; + } + [(set_attr "length" "<insn_length>")] +) + (define_expand "mov<mode>" [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") (match_operand:PRED_ALL 1 "general_operand"))] @@ -460,6 +559,60 @@ (define_insn "*vec_series<mode>_plus" } ) +;; Unpredicated LD[234]. +(define_expand "vec_load_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "register_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "memory_operand")] + UNSPEC_LDN))] + "TARGET_SVE" + { + operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +;; Predicated LD[234]. +(define_insn "vec_mask_load_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "memory_operand" "m")] + UNSPEC_LDN))] + "TARGET_SVE" + "ld<vector_count><Vesize>\t%0, %2/z, %1" +) + +;; Unpredicated ST[234]. This is always a full update, so the dependence +;; on the old value of the memory location (via (match_dup 0)) is redundant. +;; There doesn't seem to be any obvious benefit to treating the all-true +;; case differently though. In particular, it's very unlikely that we'll +;; only find out during RTL that a store_lanes is dead. +(define_expand "vec_store_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "memory_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "register_operand") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + { + operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +;; Predicated ST[234]. +(define_insn "vec_mask_store_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "register_operand" "w") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + "st<vector_count><Vesize>\t%1, %2, %0" +) + (define_expand "vec_perm<mode>" [(match_operand:SVE_ALL 0 "register_operand") (match_operand:SVE_ALL 1 "register_operand") Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2017-12-22 16:00:42.829606965 +0000 +++ gcc/config/aarch64/aarch64.c 2017-12-22 16:01:42.044358682 +0000 @@ -1178,9 +1178,15 @@ aarch64_classify_vector_mode (machine_mo || inner == DImode || inner == DFmode)) { - if (TARGET_SVE - && known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) - return VEC_SVE_DATA; + if (TARGET_SVE) + { + if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) + return VEC_SVE_DATA; + if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2) + || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3) + || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4)) + return VEC_SVE_DATA | VEC_STRUCT; + } /* This includes V1DF but not V1DI (which doesn't exist). */ if (TARGET_SIMD @@ -1208,6 +1214,18 @@ aarch64_sve_data_mode_p (machine_mode mo return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; } +/* Implement target hook TARGET_ARRAY_MODE. */ +static opt_machine_mode +aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) +{ + if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA + && IN_RANGE (nelems, 2, 4)) + return mode_for_vector (GET_MODE_INNER (mode), + GET_MODE_NUNITS (mode) * nelems); + + return opt_machine_mode (); +} + /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ static bool aarch64_array_mode_supported_p (machine_mode mode, @@ -5778,6 +5796,18 @@ aarch64_classify_address (struct aarch64 ? offset_4bit_signed_scaled_p (mode, offset) : offset_9bit_signed_scaled_p (mode, offset)); + if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT)) + { + poly_int64 end_offset = (offset + + GET_MODE_SIZE (mode) + - BYTES_PER_SVE_VECTOR); + return (type == ADDR_QUERY_M + ? offset_4bit_signed_scaled_p (mode, offset) + : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset) + && offset_9bit_signed_scaled_p (SVE_BYTE_MODE, + end_offset))); + } + if (vec_flags == VEC_SVE_PRED) return offset_9bit_signed_scaled_p (mode, offset); @@ -6490,6 +6520,20 @@ aarch64_print_vector_float_operand (FILE return true; } +/* Return the equivalent letter for size. */ +static char +sizetochar (int size) +{ + switch (size) + { + case 64: return 'd'; + case 32: return 's'; + case 16: return 'h'; + case 8 : return 'b'; + default: gcc_unreachable (); + } +} + /* Print operand X to file F in a target specific manner according to CODE. The acceptable formatting commands given by CODE are: 'c': An integer or symbol address without a preceding # @@ -6777,7 +6821,18 @@ aarch64_print_operand (FILE *f, rtx x, i { case REG: if (aarch64_sve_data_mode_p (GET_MODE (x))) - asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + { + if (REG_NREGS (x) == 1) + asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + else + { + char suffix + = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x))); + asm_fprintf (f, "{z%d.%c - z%d.%c}", + REGNO (x) - V0_REGNUM, suffix, + END_REGNO (x) - V0_REGNUM - 1, suffix); + } + } else asm_fprintf (f, "%s", reg_names [REGNO (x)]); break; @@ -12952,20 +13007,6 @@ aarch64_final_prescan_insn (rtx_insn *in } -/* Return the equivalent letter for size. */ -static char -sizetochar (int size) -{ - switch (size) - { - case 64: return 'd'; - case 32: return 's'; - case 16: return 'h'; - case 8 : return 'b'; - default: gcc_unreachable (); - } -} - /* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX instruction. */ @@ -13560,6 +13601,28 @@ aarch64_sve_ldr_operand_p (rtx op) && addr.type == ADDRESS_REG_IMM); } +/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode. + We need to be able to access the individual pieces, so the range + is different from LD[234] and ST[234]. */ +bool +aarch64_sve_struct_memory_operand_p (rtx op) +{ + if (!MEM_P (op)) + return false; + + machine_mode mode = GET_MODE (op); + struct aarch64_address_info addr; + if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false, + ADDR_QUERY_ANY) + || addr.type != ADDRESS_REG_IMM) + return false; + + poly_int64 first = addr.const_offset; + poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR; + return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first) + && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last)); +} + /* Emit a register copy from operand to operand, taking care not to early-clobber source registers in the process. @@ -17629,6 +17692,9 @@ #define TARGET_VECTOR_MODE_SUPPORTED_P a #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ aarch64_builtin_support_vector_misalignment +#undef TARGET_ARRAY_MODE +#define TARGET_ARRAY_MODE aarch64_array_mode + #undef TARGET_ARRAY_MODE_SUPPORTED_P #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p