Hello!

This RFC patch illustrates the idea of using STV pass to load/store
any TImode constant using SSE insns. The testcase:

--cut here--
__int128 x;

__int128 test_1 (void)
{
  x = (__int128) 0x00112233;
}

__int128 test_2 (void)
{
  x = ((__int128) 0x0011223344556677 << 64);
}

__int128 test_3 (void)
{
  x = ((__int128) 0x0011223344556677 << 64) + (__int128) 0x0011223344556677;
}
--cut here--

currently compiles (-O2) on x86_64 to:

test_1:
        movq    $1122867, x(%rip)
        movq    $0, x+8(%rip)
        ret

test_2:
        xorl    %eax, %eax
        movabsq $4822678189205111, %rdx
        movq    %rax, x(%rip)
        movq    %rdx, x+8(%rip)
        ret

test_3:
        movabsq $4822678189205111, %rax
        movabsq $4822678189205111, %rdx
        movq    %rax, x(%rip)
        movq    %rdx, x+8(%rip)
        ret

However, using the attached patch, we compile all tests to:

test:
        movdqa  .LC0(%rip), %xmm0
        movaps  %xmm0, x(%rip)
        ret

Ilya, HJ - do you think new sequences are better, or - as suggested by
Jakub - they are beneficial with STV pass, as we are now able to load
any immediate value? A variant of this patch can also be used to load
DImode values to 32bit STV pass.

Uros.
Index: i386.c
===================================================================
--- i386.c      (revision 235526)
+++ i386.c      (working copy)
@@ -2854,29 +2854,16 @@ timode_scalar_to_vector_candidate_p (rtx_insn *ins
 
   if (MEM_P (dst))
     {
-      /* Check for store.  Only support store from register or standard
-        SSE constants.  Memory must be aligned or unaligned store is
-        optimal.  */
-      if (misaligned_operand (dst, TImode)
-         && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
-       return false;
-
-      switch (GET_CODE (src))
-       {
-       default:
-         return false;
-
-       case REG:
-         return true;
-
-       case CONST_INT:
-         return standard_sse_constant_p (src, TImode);
-       }
+      /* Check for store.  Memory must be aligned
+        or unaligned store is optimal.  */
+      return ((REG_P (src) || CONST_SCALAR_INT_P (src))
+             && (!misaligned_operand (dst, TImode)
+                 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL));
     }
   else if (MEM_P (src))
     {
-      /* Check for load.  Memory must be aligned or unaligned load is
-        optimal.  */
+      /* Check for load.  Memory must be aligned
+        or unaligned load is optimal.  */
       return (REG_P (dst)
              && (!misaligned_operand (src, TImode)
                  || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
@@ -3744,6 +3731,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
          PUT_MODE (XEXP (tmp, 0), V1TImode);
       }
       /* FALLTHRU */
+
     case MEM:
       PUT_MODE (dst, V1TImode);
       break;
@@ -3759,28 +3747,26 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
       PUT_MODE (src, V1TImode);
       break;
 
-    case CONST_INT:
-      switch (standard_sse_constant_p (src, TImode))
-       {
-       case 1:
-         src = CONST0_RTX (GET_MODE (dst));
-         break;
-       case 2:
-         src = CONSTM1_RTX (GET_MODE (dst));
-         break;
-       default:
-         gcc_unreachable ();
-       }
-      if (NONDEBUG_INSN_P (insn))
-       {
-         rtx tmp = gen_reg_rtx (V1TImode);
-         /* Since there are no instructions to store standard SSE
-            constant, temporary register usage is required.  */
-         emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
-         dst = tmp;
-       }
-      break;
+    CASE_CONST_SCALAR_INT:
+      {
+       rtx vec = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
 
+       if (NONDEBUG_INSN_P (insn))
+         {
+           rtx tmp = gen_reg_rtx (V1TImode);
+
+           if (!standard_sse_constant_p (src, TImode))
+             vec = validize_mem (force_const_mem (V1TImode, vec));
+
+           /* We can only store from a SSE register.  */
+           emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
+           dst = tmp;
+         }
+
+       src = vec;
+       break;
+      }
+  
     default:
       gcc_unreachable ();
     }
@@ -14784,8 +14770,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx
 #endif
       break;
 
-    case CONST_INT:
-    case CONST_WIDE_INT:
+    CASE_CONST_SCALAR_INT:
       switch (mode)
        {
        case TImode:
@@ -14823,10 +14808,7 @@ ix86_cannot_force_const_mem (machine_mode mode, rt
   /* We can always put integral constants and vectors in memory.  */
   switch (GET_CODE (x))
     {
-    case CONST_INT:
-    case CONST_WIDE_INT:
-    case CONST_DOUBLE:
-    case CONST_VECTOR:
+    CASE_CONST_ANY:
       return false;
 
     default:

Reply via email to