Re: [Patch] Bug 88521 - gcc 9.0 from r266355 miscompile x265 for mingw-w64 target

Mateusz Thu, 20 Dec 2018 00:32:02 -0800

W dniu 20.12.2018 o 09:12, Lokesh Janghel pisze:
> Hi Mateuszb,
> 
> I tested with your proposition patch and it is working right.
> I also added the patch with test case.
> Please let me know your thoughts/suggestions.
> 
> 
> Thanks
> Lokesh
 
Patch looks good to me, thanks!


On 64-bit target we should be OK.

We could take a look on 32-bit target.
I've attached patch that do the same for 32- and 64-bit targets.
Sample result:
$ cat t.cpp
float fun1(void)
{
    return 4.14f;
}

typedef struct {float x;} Float;

Float fun2(void)
{
    Float v;
    v.x = 4.14f;
    return v;
}

double fun3(void)
{
    return 3.13;
}

typedef struct {double x;} Double;

Double fun4(void)
{
    Double v;
    v.x = 3.13;
    return v;
}
Mateusz@Mateusz-i7 /c/temp
$ g++ -c -Wall -O2 -o t.o t.cpp

Mateusz@Mateusz-i7 /c/temp
$ objdump -dr t.o

t.o:     file format pe-x86-64


Disassembly of section .text:

0000000000000000 <_Z4fun1v>:
   0:   f3 0f 10 05 00 00 00    movss  0x0(%rip),%xmm0        # 8 <_Z4fun1v+0x8>
   7:   00
                        4: R_X86_64_PC32        .rdata
   8:   c3                      retq
   9:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)

0000000000000010 <_Z4fun2v>:
  10:   8b 05 00 00 00 00       mov    0x0(%rip),%eax        # 16 <_Z4fun2v+0x6>
                        12: R_X86_64_PC32       .rdata
  16:   c3                      retq
  17:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  1e:   00 00

0000000000000020 <_Z4fun3v>:
  20:   f2 0f 10 05 08 00 00    movsd  0x8(%rip),%xmm0        # 30 <_Z4fun4v>
  27:   00
                        24: R_X86_64_PC32       .rdata
  28:   c3                      retq
  29:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)

0000000000000030 <_Z4fun4v>:
  30:   48 8b 05 08 00 00 00    mov    0x8(%rip),%rax        # 3f <_Z4fun4v+0xf>
                        33: R_X86_64_PC32       .rdata
  37:   c3                      retq
  38:   90                      nop
  39:   90                      nop
  3a:   90                      nop
  3b:   90                      nop
  3c:   90                      nop
  3d:   90                      nop
  3e:   90                      nop
  3f:   90                      nop

Mateusz@Mateusz-i7 /c/temp
$ m32- 900

Mateusz@Mateusz-i7 /c/temp
$ g++ -c -Wall -O2 -o t32.o t.cpp

Mateusz@Mateusz-i7 /c/temp
$ objdump -dr t32.o

t32.o:     file format pe-i386


Disassembly of section .text:

00000000 <__Z4fun1v>:
   0:   d9 05 00 00 00 00       flds   0x0
                        2: dir32        .rdata
   6:   c3                      ret
   7:   8d b4 26 00 00 00 00    lea    0x0(%esi,%eiz,1),%esi
   e:   66 90                   xchg   %ax,%ax

00000010 <__Z4fun2v>:
  10:   a1 00 00 00 00          mov    0x0,%eax
                        11: dir32       .rdata
  15:   c3                      ret
  16:   8d b4 26 00 00 00 00    lea    0x0(%esi,%eiz,1),%esi
  1d:   8d 76 00                lea    0x0(%esi),%esi

00000020 <__Z4fun3v>:
  20:   dd 05 08 00 00 00       fldl   0x8
                        22: dir32       .rdata
  26:   c3                      ret
  27:   8d b4 26 00 00 00 00    lea    0x0(%esi,%eiz,1),%esi
  2e:   66 90                   xchg   %ax,%ax

00000030 <__Z4fun4v>:
  30:   b8 0a d7 a3 70          mov    $0x70a3d70a,%eax
  35:   ba 3d 0a 09 40          mov    $0x40090a3d,%edx
  3a:   c3                      ret
  3b:   90                      nop
  3c:   90                      nop
  3d:   90                      nop
  3e:   90                      nop
  3f:   90                      nop

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c      (revision 267291)
+++ gcc/config/i386/i386.c      (working copy)
@@ -8990,6 +8990,66 @@
 }
 
 static rtx
+function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
+                     const_tree fntype, const_tree fn, const_tree valtype)
+{
+  unsigned int regno;
+
+  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
+     we normally prevent this case when mmx is not available.  However
+     some ABIs may require the result to be returned like DImode.  */
+  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+    regno = FIRST_MMX_REG;
+
+  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
+     we prevent this case when sse is not available.  However some ABIs
+     may require the result to be returned like integer TImode.  */
+  else if (mode == TImode
+          || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+    regno = FIRST_SSE_REG;
+
+  /* 32-byte vector modes in %ymm0.   */
+  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
+    regno = FIRST_SSE_REG;
+
+  /* 64-byte vector modes in %zmm0.   */
+  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
+    regno = FIRST_SSE_REG;
+
+  /* Floating point return values in %st(0)
+     (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes).  */
+  else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
+          && (GET_MODE_SIZE (mode) > 8
+              || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
+    regno = FIRST_FLOAT_REG;
+  else
+    /* Most things go in %eax.  */
+    regno = AX_REG;
+
+  /* Override FP return register with %xmm0 for local functions when
+     SSE math is enabled or for functions with sseregparm attribute.  */
+  if ((fn || fntype) && (mode == SFmode || mode == DFmode))
+    {
+      int sse_level = ix86_function_sseregparm (fntype, fn, false);
+      if (sse_level == -1)
+       {
+         error ("calling %qD with SSE calling convention without "
+                "SSE/SSE2 enabled", fn);
+         sorry ("this is a GCC bug that can be worked around by adding "
+                "attribute used to function called");
+       }
+      else if ((sse_level >= 1 && mode == SFmode)
+              || (sse_level == 2 && mode == DFmode))
+       regno = FIRST_SSE_REG;
+    }
+
+  /* OImode shouldn't be used directly.  */
+  gcc_assert (mode != OImode);
+
+  return gen_rtx_REG (orig_mode, regno);
+}
+
+static rtx
 function_value_64 (machine_mode orig_mode, machine_mode mode,
                   const_tree valtype)
 {
@@ -9063,6 +9123,13 @@
              && !COMPLEX_MODE_P (mode))
            regno = FIRST_SSE_REG;
          break;
+       case 8:
+       case 4:
+         if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
+           break;
+         if (mode == SFmode || mode == DFmode)
+           regno = FIRST_SSE_REG;
+         break;
        default:
          break;
         }
@@ -9081,8 +9148,13 @@
     fn = fntype_or_decl;
   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
 
-  if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
-    return function_value_ms_64 (orig_mode, mode, valtype);
+  if (ix86_function_type_abi (fntype) == MS_ABI)
+    {
+      if (TARGET_64BIT)
+       return function_value_ms_64 (orig_mode, mode, valtype);
+      else
+       return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
+    }
   else if (TARGET_64BIT)
     return function_value_64 (orig_mode, mode, valtype);
   else

Re: [Patch] Bug 88521 - gcc 9.0 from r266355 miscompile x265 for mingw-w64 target

Reply via email to