Hello, I'd like to have tighter control over the individual situations that -mrecip handles, and I think the user might appreciate this too. Hence I've introduced four new target options -mrecip-div, -mrecip-sqrt, -mrecip-vec-div and -mrecip-vec-sqrt. I've redefined -mrecip to be equivalent to using those four options together. In addition one can selectively disable some part via -mrecip -mno-recip-vec for instance.
I was split mind about the approach, I could also have done like rs6000 (-mrecip=<csv list>) with the disadvantage of having to write an own parser as our opt framework can't deal with comma separated lists of masks. With the approach I chose our opt framework gets most of the work done. I've decided to not use four new bits from target_flags, and instead created a new mask (recip_mask). Four bits would have fit in target bits right now, but in the future we might want to add more specialization, like modes for which the reciprocals are active. What do you think? Ciao, Michael. * i386/i386.opt (recip_mask_explicit, x_recip_mask_explicit): New variable and cl_target member. (mrecip-div, mrecip-sqrt, mrecip-vec-div, mrecip-vec-sqrt): New options. * common/config/i386/i386-common.c (ix86_handle_option): Handle new options. * i386/i386.md (divsf3): Check OPTION_RECIP_DIV. (sqrt<mode>2): Check OPTION_RECIP_SQRT. * i386/sse.md (div<mode>3): Check OPTION_RECIP_VEC_DIV. (sqrt<mode>2): Check OPTION_RECIP_VEC_SQRT. * i386/i386.c (ix86_option_override_internal): Set recip_mask for -mrecip. (ix86_function_specific_save): Save recip_mask_explicit. (ix86_function_specific_restore): Restore recip_mask_explicit. * doc/invoke.texi (ix86 Options): Document the new options. Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 178101) +++ config/i386/i386.md (working copy) @@ -7050,7 +7050,9 @@ (define_expand "divsf3" "(TARGET_80387 && X87_ENABLE_ARITH (SFmode)) || TARGET_SSE_MATH" { - if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () + if (TARGET_SSE_MATH + && OPTION_RECIP_DIV + && optimize_insn_for_speed_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -13422,7 +13424,9 @@ (define_expand "sqrt<mode>2" || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" { if (<MODE>mode == SFmode - && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun) + && TARGET_SSE_MATH + && OPTION_RECIP_SQRT + && !optimize_function_for_size_p (cfun) && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 178101) +++ config/i386/sse.md (working copy) @@ -772,7 +772,9 @@ (define_expand "div<mode>3" { ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands); - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () + if (TARGET_SSE_MATH + && OPTION_RECIP_VEC_DIV + && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -850,7 +852,9 @@ (define_expand "sqrt<mode>2" (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))] "TARGET_SSE" { - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () + if (TARGET_SSE_MATH + && OPTION_RECIP_VEC_SQRT + && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { Index: config/i386/i386.opt =================================================================== --- config/i386/i386.opt (revision 178101) +++ config/i386/i386.opt (working copy) @@ -31,6 +31,9 @@ HOST_WIDE_INT ix86_isa_flags = TARGET_64 Variable HOST_WIDE_INT ix86_isa_flags_explicit +Variable +int recip_mask_explicit + ;; Definitions to add to the cl_target_option structure ;; -march= processor TargetSave @@ -56,6 +59,9 @@ HOST_WIDE_INT x_ix86_isa_flags_explicit TargetSave int ix86_target_flags_explicit +TargetSave +int x_recip_mask_explicit + ;; whether -mtune was not specified TargetSave unsigned char tune_defaulted @@ -373,6 +379,22 @@ mrecip Target Report Mask(RECIP) Save Generate reciprocals instead of divss and sqrtss. +mrecip-div +Target Mask(RECIP_DIV) Var(recip_mask) Save +Generate reciprocal estimations instead of scalar divisions. + +mrecip-sqrt +Target Mask(RECIP_SQRT) Var(recip_mask) Save +Generate reciprocal estimations instead of scalar sqrt. + +mrecip-vec-div +Target Mask(RECIP_VEC_DIV) Var(recip_mask) Save +Generate reciprocal estimations instead of vector divisions. + +mrecip-vec-sqrt +Target Mask(RECIP_VEC_SQRT) Var(recip_mask) Save +Generate reciprocal estimations instead of vector sqrt. + mcld Target Report Mask(CLD) Save Generate cld instruction in the function prologue. Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 178101) +++ config/i386/i386.c (working copy) @@ -3806,6 +3806,19 @@ ix86_option_override_internal (bool main target_flags &= ~MASK_VZEROUPPER; } + if (TARGET_RECIP) + { + recip_mask |= (OPTION_MASK_RECIP_DIV | OPTION_MASK_RECIP_SQRT + | OPTION_MASK_RECIP_VEC_DIV + | OPTION_MASK_RECIP_VEC_SQRT) & ~recip_mask_explicit; + } + else if (target_flags_explicit & MASK_RECIP) + { + recip_mask &= ~((OPTION_MASK_RECIP_DIV | OPTION_MASK_RECIP_SQRT + | OPTION_MASK_RECIP_VEC_DIV + | OPTION_MASK_RECIP_VEC_SQRT) & ~recip_mask_explicit); + } + /* Save the initial options in case the user does function specific options. */ if (main_args_p) @@ -3938,6 +3951,7 @@ ix86_function_specific_save (struct cl_t ptr->arch_specified = ix86_arch_specified; ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit; ptr->ix86_target_flags_explicit = target_flags_explicit; + ptr->x_recip_mask_explicit = recip_mask_explicit; /* The fields are char but the variables are not; make sure the values fit in the fields. */ @@ -3965,6 +3979,7 @@ ix86_function_specific_restore (struct c ix86_arch_specified = ptr->arch_specified; ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; target_flags_explicit = ptr->ix86_target_flags_explicit; + recip_mask_explicit = ptr->x_recip_mask_explicit; /* Recreate the arch feature tests if the arch changed */ if (old_arch != ix86_arch) Index: common/config/i386/i386-common.c =================================================================== --- common/config/i386/i386-common.c (revision 178101) +++ common/config/i386/i386-common.c (working copy) @@ -553,6 +553,58 @@ ix86_handle_option (struct gcc_options * } return true; + case OPT_mrecip_div: + if (value) + { + opts->x_recip_mask |= OPTION_MASK_RECIP_DIV; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_DIV; + } + else + { + opts->x_recip_mask &= ~OPTION_MASK_RECIP_DIV; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_DIV; + } + return true; + + case OPT_mrecip_sqrt: + if (value) + { + opts->x_recip_mask |= OPTION_MASK_RECIP_SQRT; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_SQRT; + } + else + { + opts->x_recip_mask &= ~OPTION_MASK_RECIP_SQRT; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_SQRT; + } + return true; + + case OPT_mrecip_vec_div: + if (value) + { + opts->x_recip_mask |= OPTION_MASK_RECIP_VEC_DIV; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_DIV; + } + else + { + opts->x_recip_mask &= ~OPTION_MASK_RECIP_VEC_DIV; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_DIV; + } + return true; + + case OPT_mrecip_vec_sqrt: + if (value) + { + opts->x_recip_mask |= OPTION_MASK_RECIP_VEC_SQRT; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_SQRT; + } + else + { + opts->x_recip_mask &= ~OPTION_MASK_RECIP_VEC_SQRT; + opts->x_recip_mask_explicit |= OPTION_MASK_RECIP_VEC_SQRT; + } + return true; + /* Comes from final.c -- no real reason to change it. */ #define MAX_CODE_ALIGN 16 Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 178101) +++ doc/invoke.texi (working copy) @@ -604,7 +604,9 @@ Objective-C and Objective-C++ Dialects}. -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} @gol -mincoming-stack-boundary=@var{num} @gol --mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip -mvzeroupper @gol +-mcld -mcx16 -msahf -mmovbe -mcrc32 @gol +-mrecip -mrecip-div -mrecip-sqrt -mrecip-vec-div -mrecip-vec-sqrt @gol +-mvzeroupper @gol -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol @@ -12804,6 +12806,31 @@ Note that GCC implements 1.0f/sqrtf(x) i already with @option{-ffast-math} (or the above option combination), and doesn't need @option{-mrecip}. +@item -mrecip-div +@itemx -mrecip-sqrt +@itemx -mrecip-vec-div +@itemx -mrecip-vec-sqrt +@itemx -mno-recip-div +@itemx -mno-recip-sqrt +@itemx -mno-recip-vec-div +@itemx -mno-recip-vec-sqrt +@opindex mrecip-div +@opindex mrecip-sqrt +@opindex mrecip-vec-div +@opindex mrecip-vec-sqrt +@opindex mno-recip-div +@opindex mno-recip-sqrt +@opindex mno-recip-vec-div +@opindex mno-recip-vec-sqrt +These options control the use of reciprocal estimate instructions +in detail. @option{-mrecip-div} controls scalar division, +@option{-mrecip-sqrt} scalar square root, @option{-mrecip-vec-div} +vectorized division and @option{-mrecip-vec-sqrt} vectorized +square root. @option{-mrecip} actually is equivalent to mentioning +all these four options, see also there. The corresponding negative +options like @option{-mno-recip-div} can be used to selectively +disable some variants. + @item -mveclibabi=@var{type} @opindex mveclibabi Specifies the ABI type to use for vectorizing intrinsics using an