Hi, this patch works around quite nasty issue where we compile function with SSE calling convention because it is local, but it is called from function that does nothave SSE (by target attribute or LTO merging).
We used to produce silent wrong code here for years, but I added error to block this. The error however turned out to be overly restrictive hitting during build of VLC where function have implied SSE parameters but they are all unused. The proper fix for the issue requires us to collect list of all target optimization nodes of calles of each local function and consider them when deciding on the SSE parms. I will work on that incrementally. Bootstrapped/regtested x86_64-linux, will commit it shortly. Honza PR target/pr66047.c * i386.c (ix86_function_sseregparm): Only return -1 if local function with implied regparm is called from -mno-sse function. (init_cumulative_args): Output error if ix86_function_sseregparm return -1 and SSE register would be needed. (function_arg_advance_32): Likewise. (function_arg_32): Likewise. * i386.h (ix86_args): Add decl field. * gcc.target/i386/pr66047.c: New testcase. Index: testsuite/gcc.target/i386/pr66047.c =================================================================== --- testsuite/gcc.target/i386/pr66047.c (revision 0) +++ testsuite/gcc.target/i386/pr66047.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-sse" } */ +/* { dg-require-effective-target ia32 } */ +__attribute__((target ("sse2"), noinline)) static void +foo (void) +{ + asm volatile ("" : : : "memory"); +} + +void +bar (void) +{ + foo (); +} + Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 223100) +++ config/i386/i386.c (working copy) @@ -5895,7 +5895,10 @@ ix86_function_regparm (const_tree type, /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and DFmode (2) arguments in SSE registers for a function with the indicated TYPE and DECL. DECL may be NULL when calling function - indirectly or considering a libcall. Otherwise return 0. */ + indirectly or considering a libcall. Return -1 if any FP parameter + should be rejected by error. This is used in siutation we imply SSE + calling convetion but the function is called from another function with + SSE disabled. Otherwise return 0. */ static int ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) @@ -5944,14 +5947,13 @@ ix86_function_sseregparm (const_tree typ { /* Refuse to produce wrong code when local function with SSE enabled is called from SSE disabled function. - We may work hard to work out these scenarios but hopefully - it doesnot matter in practice. */ + FIXME: We need a way to detect these cases cross-ltrans partition + and avoid using SSE calling conventions on local functions called + from function with SSE disabled. For now at least delay the + warning until we know we are going to produce wrong code. + See PR66047 */ if (!TARGET_SSE && warn) - { - error ("calling %qD with SSE caling convention without " - "SSE/SSE2 enabled", decl); - return 0; - } + return -1; return TARGET_SSE2_P (target_opts_for_fn (target->decl) ->x_ix86_isa_flags) ? 2 : 1; } @@ -6507,6 +6509,7 @@ init_cumulative_args (CUMULATIVE_ARGS *c cum->bnd_regno = FIRST_BND_REG; cum->bnds_in_bt = 0; cum->force_bnd_pass = 0; + cum->decl = fndecl; if (!TARGET_64BIT) { @@ -7452,6 +7455,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS HOST_WIDE_INT words) { int res = 0; + bool error_p = NULL; switch (mode) { @@ -7484,9 +7488,13 @@ function_arg_advance_32 (CUMULATIVE_ARGS gcc_unreachable (); case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 2) break; case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 1) break; /* FALLTHRU */ @@ -7542,6 +7550,14 @@ function_arg_advance_32 (CUMULATIVE_ARGS } break; } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } return res; } @@ -7674,10 +7690,11 @@ ix86_function_arg_advance (cumulative_ar (otherwise it is an extra parameter matching an ellipsis). */ static rtx -function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, +function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, machine_mode orig_mode, const_tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { + bool error_p = false; /* Avoid the AL settings for the Unix64 ABI. */ if (mode == VOIDmode) return constm1_rtx; @@ -7718,9 +7735,13 @@ function_arg_32 (const CUMULATIVE_ARGS * break; case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 2) break; case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 1) break; /* FALLTHRU */ @@ -7779,6 +7800,14 @@ function_arg_32 (const CUMULATIVE_ARGS * } break; } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } return NULL_RTX; } @@ -8258,8 +8287,15 @@ function_value_32 (machine_mode orig_mod if ((fn || fntype) && (mode == SFmode || mode == DFmode)) { int sse_level = ix86_function_sseregparm (fntype, fn, false); - if ((sse_level >= 1 && mode == SFmode) - || (sse_level == 2 && mode == DFmode)) + if (sse_level == -1) + { + error ("calling %qD with SSE caling convention without " + "SSE/SSE2 enabled", fn); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } + else if ((sse_level >= 1 && mode == SFmode) + || (sse_level == 2 && mode == DFmode)) regno = FIRST_SSE_REG; } Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 223100) +++ config/i386/i386.h (working copy) @@ -1688,6 +1688,7 @@ typedef struct ix86_args { int stdarg; /* Set to 1 if function is stdarg. */ enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise MS_ABI for ms abi. */ + tree decl; /* Callee decl. */ } CUMULATIVE_ARGS; /* Initialize a variable CUM of type CUMULATIVE_ARGS