Hello! This RFC patch illustrates the idea of using STV pass to load/store any TImode constant using SSE insns. The testcase:
--cut here-- __int128 x; __int128 test_1 (void) { x = (__int128) 0x00112233; } __int128 test_2 (void) { x = ((__int128) 0x0011223344556677 << 64); } __int128 test_3 (void) { x = ((__int128) 0x0011223344556677 << 64) + (__int128) 0x0011223344556677; } --cut here-- currently compiles (-O2) on x86_64 to: test_1: movq $1122867, x(%rip) movq $0, x+8(%rip) ret test_2: xorl %eax, %eax movabsq $4822678189205111, %rdx movq %rax, x(%rip) movq %rdx, x+8(%rip) ret test_3: movabsq $4822678189205111, %rax movabsq $4822678189205111, %rdx movq %rax, x(%rip) movq %rdx, x+8(%rip) ret However, using the attached patch, we compile all tests to: test: movdqa .LC0(%rip), %xmm0 movaps %xmm0, x(%rip) ret Ilya, HJ - do you think new sequences are better, or - as suggested by Jakub - they are beneficial with STV pass, as we are now able to load any immediate value? A variant of this patch can also be used to load DImode values to 32bit STV pass. Uros.
Index: i386.c =================================================================== --- i386.c (revision 235526) +++ i386.c (working copy) @@ -2854,29 +2854,16 @@ timode_scalar_to_vector_candidate_p (rtx_insn *ins if (MEM_P (dst)) { - /* Check for store. Only support store from register or standard - SSE constants. Memory must be aligned or unaligned store is - optimal. */ - if (misaligned_operand (dst, TImode) - && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) - return false; - - switch (GET_CODE (src)) - { - default: - return false; - - case REG: - return true; - - case CONST_INT: - return standard_sse_constant_p (src, TImode); - } + /* Check for store. Memory must be aligned + or unaligned store is optimal. */ + return ((REG_P (src) || CONST_SCALAR_INT_P (src)) + && (!misaligned_operand (dst, TImode) + || TARGET_SSE_UNALIGNED_STORE_OPTIMAL)); } else if (MEM_P (src)) { - /* Check for load. Memory must be aligned or unaligned load is - optimal. */ + /* Check for load. Memory must be aligned + or unaligned load is optimal. */ return (REG_P (dst) && (!misaligned_operand (src, TImode) || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); @@ -3744,6 +3731,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) PUT_MODE (XEXP (tmp, 0), V1TImode); } /* FALLTHRU */ + case MEM: PUT_MODE (dst, V1TImode); break; @@ -3759,28 +3747,26 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) PUT_MODE (src, V1TImode); break; - case CONST_INT: - switch (standard_sse_constant_p (src, TImode)) - { - case 1: - src = CONST0_RTX (GET_MODE (dst)); - break; - case 2: - src = CONSTM1_RTX (GET_MODE (dst)); - break; - default: - gcc_unreachable (); - } - if (NONDEBUG_INSN_P (insn)) - { - rtx tmp = gen_reg_rtx (V1TImode); - /* Since there are no instructions to store standard SSE - constant, temporary register usage is required. */ - emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); - dst = tmp; - } - break; + CASE_CONST_SCALAR_INT: + { + rtx vec = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); + if (NONDEBUG_INSN_P (insn)) + { + rtx tmp = gen_reg_rtx (V1TImode); + + if (!standard_sse_constant_p (src, TImode)) + vec = validize_mem (force_const_mem (V1TImode, vec)); + + /* We can only store from a SSE register. */ + emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); + dst = tmp; + } + + src = vec; + break; + } + default: gcc_unreachable (); } @@ -14784,8 +14770,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx #endif break; - case CONST_INT: - case CONST_WIDE_INT: + CASE_CONST_SCALAR_INT: switch (mode) { case TImode: @@ -14823,10 +14808,7 @@ ix86_cannot_force_const_mem (machine_mode mode, rt /* We can always put integral constants and vectors in memory. */ switch (GET_CODE (x)) { - case CONST_INT: - case CONST_WIDE_INT: - case CONST_DOUBLE: - case CONST_VECTOR: + CASE_CONST_ANY: return false; default: