For vector move insns like vmovdqa/vmovdqu, their evex counterparts requrire explicit suffix 64/32/16/8. The usage of these instruction are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select vmovaps/vmovups for vector load/store insns that contains EGPR.
gcc/ChangeLog: * config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used, adjust mnemonic for vmovduq/vmovdqa. * config/i386/sse.md (*<extract_type>_vinsert<shuffletype><extract_suf>_0): Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa. (avx_vec_concat<mode>): Likewise, and separate alternative 0 to avx_noavx512f. --- gcc/config/i386/i386.cc | 31 ++++++++++++++++++++++++++++++- gcc/config/i386/sse.md | 34 ++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 412f3aefc43..f5d642948bc 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -5469,6 +5469,11 @@ ix86_get_ssemov (rtx *operands, unsigned size, bool evex_reg_p = (size == 64 || EXT_REX_SSE_REG_P (operands[0]) || EXT_REX_SSE_REG_P (operands[1])); + + bool egpr_p = (TARGET_APX_EGPR + && (x86_extended_rex2reg_mentioned_p (operands[0]) + || x86_extended_rex2reg_mentioned_p (operands[1]))); + machine_mode scalar_mode; const char *opcode = NULL; @@ -5547,6 +5552,12 @@ ix86_get_ssemov (rtx *operands, unsigned size, ? "vmovdqu16" : "vmovdqu64") : "vmovdqa64"); + else if (egpr_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "%vmovups") + : "%vmovaps"); else opcode = (misaligned_p ? (TARGET_AVX512BW @@ -5563,6 +5574,8 @@ ix86_get_ssemov (rtx *operands, unsigned size, case E_TFmode: if (evex_reg_p) opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else if (egpr_p) + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; else opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; break; @@ -5581,6 +5594,12 @@ ix86_get_ssemov (rtx *operands, unsigned size, ? "vmovdqu8" : "vmovdqu64") : "vmovdqa64"); + else if (egpr_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu8" + : "%vmovups") + : "%vmovaps"); else opcode = (misaligned_p ? (TARGET_AVX512BW @@ -5589,12 +5608,18 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovdqa"); break; case E_HImode: - if (evex_reg_p) + if (evex_reg_p || egpr_p) opcode = (misaligned_p ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") : "vmovdqa64"); + else if (egpr_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "%vmovups") + : "%vmovaps"); else opcode = (misaligned_p ? (TARGET_AVX512BW @@ -5605,6 +5630,8 @@ ix86_get_ssemov (rtx *operands, unsigned size, case E_SImode: if (evex_reg_p) opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; + else if (egpr_p) + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; else opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; break; @@ -5613,6 +5640,8 @@ ix86_get_ssemov (rtx *operands, unsigned size, case E_OImode: if (evex_reg_p) opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else if (egpr_p) + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; else opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 192e746fda3..bd6674d34f9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -18918,6 +18918,12 @@ (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0" { if (which_alternative == 0) return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}"; + bool egpr_used = (TARGET_APX_EGPR + && x86_extended_rex2reg_mentioned_p (operands[2])); + const char *align_templ = egpr_used ? "vmovdqa\t{%2, %x0|%x0, %2}" + : "vmovaps\t{%2, %x0|%x0, %2}"; + const char *unalign_templ = egpr_used ? "vmovdqu\t{%2, %x0|%x0, %2}" + : "vmovups\t{%2, %x0|%x0, %2}"; switch (<MODE>mode) { case E_V8DFmode: @@ -18933,17 +18939,17 @@ (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0" case E_V8DImode: if (misaligned_operand (operands[2], <ssequartermode>mode)) return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}" - : "vmovdqu\t{%2, %x0|%x0, %2}"; + : unalign_templ; else return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}" - : "vmovdqa\t{%2, %x0|%x0, %2}"; + : align_templ; case E_V16SImode: if (misaligned_operand (operands[2], <ssequartermode>mode)) return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}" - : "vmovdqu\t{%2, %x0|%x0, %2}"; + : unalign_templ; else return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}" - : "vmovdqa\t{%2, %x0|%x0, %2}"; + : align_templ; default: gcc_unreachable (); } @@ -27652,11 +27658,13 @@ (define_insn "avx_vec_concat<mode>" [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv") (vec_concat:V_256_512 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm") - (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))] + (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xBt,vm,C,C")))] "TARGET_AVX && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode) || !MEM_P (operands[1]))" { + bool egpr_used = (TARGET_APX_EGPR + && x86_extended_rex2reg_mentioned_p (operands[1])); switch (which_alternative) { case 0: @@ -27704,7 +27712,8 @@ (define_insn "avx_vec_concat<mode>" if (misaligned_operand (operands[1], <ssehalfvecmode>mode)) { if (which_alternative == 2) - return "vmovdqu\t{%1, %t0|%t0, %1}"; + return egpr_used ? "vmovups\t{%1, %t0|%t0, %1}" + : "vmovdqu\t{%1, %t0|%t0, %1}"; else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8) return "vmovdqu64\t{%1, %t0|%t0, %1}"; else @@ -27713,7 +27722,8 @@ (define_insn "avx_vec_concat<mode>" else { if (which_alternative == 2) - return "vmovdqa\t{%1, %t0|%t0, %1}"; + return egpr_used ? "vmovaps\t{%1, %t0|%t0, %1}" + : "vmovdqa\t{%1, %t0|%t0, %1}"; else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8) return "vmovdqa64\t{%1, %t0|%t0, %1}"; else @@ -27723,7 +27733,8 @@ (define_insn "avx_vec_concat<mode>" if (misaligned_operand (operands[1], <ssehalfvecmode>mode)) { if (which_alternative == 2) - return "vmovdqu\t{%1, %x0|%x0, %1}"; + return egpr_used ? "vmovups\t{%1, %x0|%x0, %1}" + : "vmovdqu\t{%1, %x0|%x0, %1}"; else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8) return "vmovdqu64\t{%1, %x0|%x0, %1}"; else @@ -27732,7 +27743,8 @@ (define_insn "avx_vec_concat<mode>" else { if (which_alternative == 2) - return "vmovdqa\t{%1, %x0|%x0, %1}"; + return egpr_used ? "vmovaps\t{%1, %x0|%x0, %1}" + : "vmovdqa\t{%1, %x0|%x0, %1}"; else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8) return "vmovdqa64\t{%1, %x0|%x0, %1}"; else @@ -27745,7 +27757,9 @@ (define_insn "avx_vec_concat<mode>" gcc_unreachable (); } } - [(set_attr "type" "sselog,sselog,ssemov,ssemov") + [(set_attr "isa" "noavx512f,avx512f,*,*") + (set_attr "gpr32" "0,1,1,1") + (set_attr "type" "sselog,sselog,ssemov,ssemov") (set_attr "prefix_extra" "1,1,*,*") (set_attr "length_immediate" "1,1,*,*") (set_attr "prefix" "maybe_evex") -- 2.31.1