INSR of GPRs involves a cross-file move while INSR of FPRs doesn't. We should therefore disparage the GPR version relative to the FPR version.
The patch also adds MOVPRFX handling, but this is only tested properly by the ACLE. Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf. Applied as r274192. Richard 2019-08-07 Richard Sandiford <richard.sandif...@arm.com> gcc/ * config/aarch64/aarch64-sve.md (vec_shl_insert_<mode>): Add MOVPRFX alternatives. Make the GPR alternatives more expensive than the FPR ones. gcc/testsuite/ * gcc.target/aarch64/sve/init_12.c: Expect w1 to be moved into a temporary FPR. Index: gcc/config/aarch64/aarch64-sve.md =================================================================== --- gcc/config/aarch64/aarch64-sve.md 2019-08-07 20:08:50.880452586 +0100 +++ gcc/config/aarch64/aarch64-sve.md 2019-08-07 20:10:43.051618619 +0100 @@ -835,15 +835,18 @@ (define_expand "vec_init<mode><Vel>" ;; Shift an SVE vector left and insert a scalar into element 0. (define_insn "vec_shl_insert_<mode>" - [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") + [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??&w, ?&w") (unspec:SVE_ALL - [(match_operand:SVE_ALL 1 "register_operand" "0, 0") - (match_operand:<VEL> 2 "register_operand" "rZ, w")] + [(match_operand:SVE_ALL 1 "register_operand" "0, 0, w, w") + (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] UNSPEC_INSR))] "TARGET_SVE" "@ insr\t%0.<Vetype>, %<vwcore>2 - insr\t%0.<Vetype>, %<Vetype>2" + insr\t%0.<Vetype>, %<Vetype>2 + movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2 + movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2" + [(set_attr "movprfx" "*,*,yes,yes")] ) ;; ------------------------------------------------------------------------- Index: gcc/testsuite/gcc.target/aarch64/sve/init_12.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/init_12.c 2019-07-29 09:46:41.910859821 +0100 +++ gcc/testsuite/gcc.target/aarch64/sve/init_12.c 2019-08-07 20:10:43.051618619 +0100 @@ -10,12 +10,13 @@ typedef int32_t vnx4si __attribute__((ve /* ** foo: +** fmov (s[0-9]+), w1 ** mov (z[0-9]+\.s), w2 ** mov (z[0-9]+\.s), w0 -** insr \2, w1 -** insr \2, w1 -** insr \2, w1 -** zip1 \2, \2, \1 +** insr \3, \1 +** insr \3, \1 +** insr \3, \1 +** zip1 \3, \3, \2 ** ... */ __attribute__((noipa))