Hello!

The testcase from the PR triggers a split in *movdf_internal_rex64
move pattern too late in the compilation process.  Attached patch
fixes this by emitting relevant moves directly, without splitting them
to DImode moves at all [it looks to me, that _rex64 pattern was copied
directly from 32bit *movdf_internal, since it doesn't take into
account the fact, that we can move DFmode value with movq/movabsq
insn].

The only complication is with DFmode immediates. A movabsq insn can
copy the value into register directly, with a bit of trickery in
ix86_print_operand. However, to encourage gcc to load FP constants
from memory, "F -> r" case is penalized with "!". "F -> m" case is
also penalized, since there is no direct DF/DImode move of immediate
to memory, and this alternative still has to be split (this
alternative is the same as in DImode case). As an added benefit, all
new instructions (modulo F->m case) can be marked as "imove" type
instead of "multi" type.

2011-03-23  Uros Bizjak  <ubiz...@gmail.com>

        PR target/48237
        * config/i386/i386.md (*movdf_internal_rex64): Do not split
        alternatives that can be handled with movq or movabsq insn.
        (*movdf_internal): Disable for !TARGET_64BIT.
        (*movdf_internal_nointeger): Ditto.
        * config/i386/i386.c (ix86_print_operand): Handle DFmode immediates.

testsuite/ChangeLog:

2011-03-23  Uros Bizjak  <ubiz...@gmail.com>

        PR target/48237
        * gcc.target/i386/pr48237.c: New test.

The newly added code in ix86_print_operand is in fact never triggered
in the testsuite, and I was not able to construct a testcase that
would exercise this part of the compiler, so in order to avoid nasty
surprises, I would kindly ask other x86 maintainers to review newly
added code, especially ix86_print_operand part (the approach was in
fact copied from output_pic_addr_const function).

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {, -m32}.

Thanks,
Uros.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 171353)
+++ config/i386/i386.md (working copy)
@@ -2915,9 +2915,9 @@
 
 (define_insn "*movdf_internal_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-               "=f,m,f,r  ,m ,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+               "=f,m,f,r ,m,!r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
        (match_operand:DF 1 "general_operand"
-               "fm,f,G,rmF,Fr,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+               "fm,f,G,rm,r,F ,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2938,9 +2938,15 @@
 
     case 3:
     case 4:
-      return "#";
+      return "mov{q}\t{%1, %0|%0, %1}";
 
     case 5:
+      return "movabs{q}\t{%1, %0|%0, %1}";
+
+    case 6:
+      return "#";
+
+    case 7:
       switch (get_attr_mode (insn))
        {
        case MODE_V4SF:
@@ -2958,9 +2964,9 @@
        default:
          gcc_unreachable ();
        }
-    case 6:
-    case 7:
     case 8:
+    case 9:
+    case 10:
       switch (get_attr_mode (insn))
        {
        case MODE_V4SF:
@@ -2995,17 +3001,27 @@
          gcc_unreachable ();
        }
 
-    case 9:
-    case 10:
+    case 11:
+    case 12:
     return "%vmovd\t{%1, %0|%0, %1}";
 
     default:
       gcc_unreachable();
     }
 }
-  [(set_attr "type" 
"fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+  [(set_attr "type" 
"fmov,fmov,fmov,imov,imov,imov,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+   (set (attr "modrm")
+     (if_then_else
+       (and (eq_attr "alternative" "5") (eq_attr "type" "imov"))
+        (const_string "0")
+        (const_string "*")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "alternative" "5") (eq_attr "type" "imov"))
+        (const_string "8")
+        (const_string "*")))
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5,6")
        (const_string "orig")
        (const_string "maybe_vex")))
    (set (attr "prefix_data16")
@@ -3015,18 +3031,18 @@
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,1,2")
                 (const_string "DF")
-              (eq_attr "alternative" "3,4,9,10")
+              (eq_attr "alternative" "3,4,5,6,11,12")
                 (const_string "DI")
 
               /* For SSE1, we have many fewer alternatives.  */
               (eq (symbol_ref "TARGET_SSE2") (const_int 0))
-                (cond [(eq_attr "alternative" "5,6")
+                (cond [(eq_attr "alternative" "7,8")
                          (const_string "V4SF")
                       ]
                   (const_string "V2SF"))
 
               /* xorps is one byte shorter.  */
-              (eq_attr "alternative" "5")
+              (eq_attr "alternative" "7")
                 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                            (const_int 0))
                          (const_string "V4SF")
@@ -3041,7 +3057,7 @@
                  chains, otherwise use short move to avoid extra work.
 
                  movaps encodes one byte shorter.  */
-              (eq_attr "alternative" "6")
+              (eq_attr "alternative" "8")
                 (cond
                   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                        (const_int 0))
@@ -3054,7 +3070,7 @@
               /* For architectures resolving dependencies on register
                  parts we may avoid extra work to zero out upper part
                  of register.  */
-              (eq_attr "alternative" "7")
+              (eq_attr "alternative" "9")
                 (if_then_else
                   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
                       (const_int 0))
@@ -3068,7 +3084,7 @@
                "=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
        (match_operand:DF 1 "general_operand"
                "fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && optimize_function_for_speed_p (cfun)
    && TARGET_INTEGER_DFMODE_MOVES
    && (reload_in_progress || reload_completed
@@ -3208,9 +3224,9 @@
                        "=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  ")
        (match_operand:DF 1 "general_operand"
                        "fm,f,G,*roF,*Fr,C   ,Y2*x,mY2*x,Y2*x"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ((optimize_function_for_size_p (cfun)
-       || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_INTEGER_DFMODE_MOVES)
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || (!(TARGET_SSE2 && TARGET_SSE_MATH)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 171353)
+++ config/i386/i386.c  (working copy)
@@ -14398,17 +14398,26 @@ ix86_print_operand (FILE *file, rtx x, i
        fprintf (file, "0x%08x", (unsigned int) l);
     }
 
-  /* These float cases don't actually occur as immediate operands.  */
   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
     {
-      char dstr[30];
+      REAL_VALUE_TYPE r;
+      long l[2];
 
-      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
-      fputs (dstr, file);
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+       putc ('$', file);
+      /* We can use %d if the number is <32 bits and positive.  */
+      if (l[1] || l[0] < 0)
+       fprintf (file, "0x%lx%08lx",
+                (unsigned long) l[1], (unsigned long) l[0]);
+      else
+       fprintf (file, HOST_WIDE_INT_PRINT_DEC, l[0]);
     }
 
-  else if (GET_CODE (x) == CONST_DOUBLE
-          && GET_MODE (x) == XFmode)
+  /* These float cases don't actually occur as immediate operands.  */
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
     {
       char dstr[30];
 
Index: testsuite/gcc.target/i386/pr48237.c
===================================================================
--- testsuite/gcc.target/i386/pr48237.c (revision 0)
+++ testsuite/gcc.target/i386/pr48237.c (revision 0)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fcaller-saves -fschedule-insns2 -fselective-scheduling2 
-mtune=core2" } */
+
+union double_union
+{
+  double d;
+  int i[2];
+};
+
+void bar (int, ...);
+
+void
+foo (double d)
+{
+  union double_union du = { d };
+  while (1)
+    {
+      du.i[1] -= 0x100000L;
+      bar (0, du.d);
+      du.d += d;
+    }
+}

Reply via email to