W dniu 20.12.2018 o 09:12, Lokesh Janghel pisze: > Hi Mateuszb, > > I tested with your proposition patch and it is working right. > I also added the patch with test case. > Please let me know your thoughts/suggestions. > > > Thanks > Lokesh Patch looks good to me, thanks!
On 64-bit target we should be OK. We could take a look on 32-bit target. I've attached patch that do the same for 32- and 64-bit targets. Sample result: $ cat t.cpp float fun1(void) { return 4.14f; } typedef struct {float x;} Float; Float fun2(void) { Float v; v.x = 4.14f; return v; } double fun3(void) { return 3.13; } typedef struct {double x;} Double; Double fun4(void) { Double v; v.x = 3.13; return v; } Mateusz@Mateusz-i7 /c/temp $ g++ -c -Wall -O2 -o t.o t.cpp Mateusz@Mateusz-i7 /c/temp $ objdump -dr t.o t.o: file format pe-x86-64 Disassembly of section .text: 0000000000000000 <_Z4fun1v>: 0: f3 0f 10 05 00 00 00 movss 0x0(%rip),%xmm0 # 8 <_Z4fun1v+0x8> 7: 00 4: R_X86_64_PC32 .rdata 8: c3 retq 9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 0000000000000010 <_Z4fun2v>: 10: 8b 05 00 00 00 00 mov 0x0(%rip),%eax # 16 <_Z4fun2v+0x6> 12: R_X86_64_PC32 .rdata 16: c3 retq 17: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 1e: 00 00 0000000000000020 <_Z4fun3v>: 20: f2 0f 10 05 08 00 00 movsd 0x8(%rip),%xmm0 # 30 <_Z4fun4v> 27: 00 24: R_X86_64_PC32 .rdata 28: c3 retq 29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 0000000000000030 <_Z4fun4v>: 30: 48 8b 05 08 00 00 00 mov 0x8(%rip),%rax # 3f <_Z4fun4v+0xf> 33: R_X86_64_PC32 .rdata 37: c3 retq 38: 90 nop 39: 90 nop 3a: 90 nop 3b: 90 nop 3c: 90 nop 3d: 90 nop 3e: 90 nop 3f: 90 nop Mateusz@Mateusz-i7 /c/temp $ m32- 900 Mateusz@Mateusz-i7 /c/temp $ g++ -c -Wall -O2 -o t32.o t.cpp Mateusz@Mateusz-i7 /c/temp $ objdump -dr t32.o t32.o: file format pe-i386 Disassembly of section .text: 00000000 <__Z4fun1v>: 0: d9 05 00 00 00 00 flds 0x0 2: dir32 .rdata 6: c3 ret 7: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi e: 66 90 xchg %ax,%ax 00000010 <__Z4fun2v>: 10: a1 00 00 00 00 mov 0x0,%eax 11: dir32 .rdata 15: c3 ret 16: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi 1d: 8d 76 00 lea 0x0(%esi),%esi 00000020 <__Z4fun3v>: 20: dd 05 08 00 00 00 fldl 0x8 22: dir32 .rdata 26: c3 ret 27: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi 2e: 66 90 xchg %ax,%ax 00000030 <__Z4fun4v>: 30: b8 0a d7 a3 70 mov $0x70a3d70a,%eax 35: ba 3d 0a 09 40 mov $0x40090a3d,%edx 3a: c3 ret 3b: 90 nop 3c: 90 nop 3d: 90 nop 3e: 90 nop 3f: 90 nop
Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 267291) +++ gcc/config/i386/i386.c (working copy) @@ -8990,6 +8990,66 @@ } static rtx +function_value_ms_32 (machine_mode orig_mode, machine_mode mode, + const_tree fntype, const_tree fn, const_tree valtype) +{ + unsigned int regno; + + /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where + we normally prevent this case when mmx is not available. However + some ABIs may require the result to be returned like DImode. */ + if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) + regno = FIRST_MMX_REG; + + /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where + we prevent this case when sse is not available. However some ABIs + may require the result to be returned like integer TImode. */ + else if (mode == TImode + || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) + regno = FIRST_SSE_REG; + + /* 32-byte vector modes in %ymm0. */ + else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) + regno = FIRST_SSE_REG; + + /* 64-byte vector modes in %zmm0. */ + else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) + regno = FIRST_SSE_REG; + + /* Floating point return values in %st(0) + (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ + else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 + && (GET_MODE_SIZE (mode) > 8 + || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) + regno = FIRST_FLOAT_REG; + else + /* Most things go in %eax. */ + regno = AX_REG; + + /* Override FP return register with %xmm0 for local functions when + SSE math is enabled or for functions with sseregparm attribute. */ + if ((fn || fntype) && (mode == SFmode || mode == DFmode)) + { + int sse_level = ix86_function_sseregparm (fntype, fn, false); + if (sse_level == -1) + { + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", fn); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } + else if ((sse_level >= 1 && mode == SFmode) + || (sse_level == 2 && mode == DFmode)) + regno = FIRST_SSE_REG; + } + + /* OImode shouldn't be used directly. */ + gcc_assert (mode != OImode); + + return gen_rtx_REG (orig_mode, regno); +} + +static rtx function_value_64 (machine_mode orig_mode, machine_mode mode, const_tree valtype) { @@ -9063,6 +9123,13 @@ && !COMPLEX_MODE_P (mode)) regno = FIRST_SSE_REG; break; + case 8: + case 4: + if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) + break; + if (mode == SFmode || mode == DFmode) + regno = FIRST_SSE_REG; + break; default: break; } @@ -9081,8 +9148,13 @@ fn = fntype_or_decl; fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; - if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) - return function_value_ms_64 (orig_mode, mode, valtype); + if (ix86_function_type_abi (fntype) == MS_ABI) + { + if (TARGET_64BIT) + return function_value_ms_64 (orig_mode, mode, valtype); + else + return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); + } else if (TARGET_64BIT) return function_value_64 (orig_mode, mode, valtype); else