On November 27, 2015 8:40:56 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >The recent changes where vector sqrt is represented in the IL using >IFN_SQRT instead of target specific builtins broke the discovery >of vector rsqrt, as targetm.builtin_reciprocal is called only >on builtin functions (not internal functions). Furthermore, >for internal fns, not only the IFN_* is significant, but also the >types (modes actually) of the lhs and/or arguments. > >This patch adjusts the target hook, so that the backends can just >inspect >the call (builtin or internal function), whatever it is. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Though the other option would be to add an optab with corresponding IFN. Richard. >2015-11-27 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/68501 > * target.def (builtin_reciprocal): Replace the 3 arguments with > a gcall * one, adjust description. > * targhooks.h (default_builtin_reciprocal): Replace the 3 arguments > with a gcall * one. > * targhooks.c (default_builtin_reciprocal): Likewise. > * tree-ssa-math-opts.c (pass_cse_reciprocals::execute): Use > targetm.builtin_reciprocal even on internal functions, adjust > the arguments and allow replacing an internal function with normal > built-in. > * config/i386/i386.c (ix86_builtin_reciprocal): Replace the 3 >arguments > with a gcall * one. Handle internal fns too. > * config/rs6000/rs6000.c (rs6000_builtin_reciprocal): Likewise. > * config/aarch64/aarch64.c (aarch64_builtin_reciprocal): Likewise. > * doc/tm.texi (builtin_reciprocal): Document. > >--- gcc/target.def.jj 2015-11-18 11:19:19.000000000 +0100 >+++ gcc/target.def 2015-11-27 16:37:07.870823670 +0100 >@@ -2463,13 +2463,9 @@ identical versions.", > DEFHOOK > (builtin_reciprocal, >"This hook should return the DECL of a function that implements >reciprocal of\n\ >-the builtin function with builtin function code @var{fn}, or\n\ >-@code{NULL_TREE} if such a function is not available. @var{md_fn} is >true\n\ >-when @var{fn} is a code of a machine-dependent builtin function. >When\n\ >-@var{sqrt} is true, additional optimizations that apply only to the >reciprocal\n\ >-of a square root function are performed, and only reciprocals of >@code{sqrt}\n\ >-function are valid.", >- tree, (unsigned fn, bool md_fn, bool sqrt), >+the builtin or internal function call @var{call}, or\n\ >+@code{NULL_TREE} if such a function is not available.", >+ tree, (gcall *call), > default_builtin_reciprocal) > >/* For a vendor-specific TYPE, return a pointer to a >statically-allocated >--- gcc/targhooks.h.jj 2015-11-18 11:19:17.000000000 +0100 >+++ gcc/targhooks.h 2015-11-27 16:37:44.828301093 +0100 >@@ -90,7 +90,7 @@ extern tree default_builtin_vectorized_c > >extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, >tree, int); > >-extern tree default_builtin_reciprocal (unsigned int, bool, bool); >+extern tree default_builtin_reciprocal (gcall *); > > extern HOST_WIDE_INT default_vector_alignment (const_tree); > >--- gcc/targhooks.c.jj 2015-11-18 11:19:17.000000000 +0100 >+++ gcc/targhooks.c 2015-11-27 16:38:21.461783097 +0100 >@@ -600,9 +600,7 @@ default_builtin_vectorization_cost (enum > /* Reciprocal. */ > > tree >-default_builtin_reciprocal (unsigned int fn ATTRIBUTE_UNUSED, >- bool md_fn ATTRIBUTE_UNUSED, >- bool sqrt ATTRIBUTE_UNUSED) >+default_builtin_reciprocal (gcall *) > { > return NULL_TREE; > } >--- gcc/tree-ssa-math-opts.c.jj 2015-11-25 09:57:47.000000000 +0100 >+++ gcc/tree-ssa-math-opts.c 2015-11-27 17:07:22.756162308 +0100 >@@ -601,19 +601,17 @@ pass_cse_reciprocals::execute (function > > if (is_gimple_call (stmt1) > && gimple_call_lhs (stmt1) >- && (fndecl = gimple_call_fndecl (stmt1)) >- && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL >- || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) >+ && (gimple_call_internal_p (stmt1) >+ || ((fndecl = gimple_call_fndecl (stmt1)) >+ && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL >+ || (DECL_BUILT_IN_CLASS (fndecl) >+ == BUILT_IN_MD))))) > { >- enum built_in_function code; >- bool md_code, fail; >+ bool fail; > imm_use_iterator ui; > use_operand_p use_p; > >- code = DECL_FUNCTION_CODE (fndecl); >- md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; >- >- fndecl = targetm.builtin_reciprocal (code, md_code, false); >+ fndecl = targetm.builtin_reciprocal (as_a <gcall *> (stmt1)); > if (!fndecl) > continue; > >@@ -639,8 +637,28 @@ pass_cse_reciprocals::execute (function > continue; > > gimple_replace_ssa_lhs (stmt1, arg1); >- gimple_call_set_fndecl (stmt1, fndecl); >- update_stmt (stmt1); >+ if (gimple_call_internal_p (stmt1)) >+ { >+ auto_vec<tree, 4> args; >+ for (unsigned int i = 0; >+ i < gimple_call_num_args (stmt1); i++) >+ args.safe_push (gimple_call_arg (stmt1, i)); >+ gcall *stmt2 = gimple_build_call_vec (fndecl, args); >+ gimple_call_set_lhs (stmt2, arg1); >+ if (gimple_vdef (stmt1)) >+ { >+ gimple_set_vdef (stmt2, gimple_vdef (stmt1)); >+ SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2; >+ } >+ gimple_set_vuse (stmt2, gimple_vuse (stmt1)); >+ gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt1); >+ gsi_replace (&gsi2, stmt2, true); >+ } >+ else >+ { >+ gimple_call_set_fndecl (stmt1, fndecl); >+ update_stmt (stmt1); >+ } > reciprocal_stats.rfuncs_inserted++; > > FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) >--- gcc/config/i386/i386.c.jj 2015-11-25 09:49:57.000000000 +0100 >+++ gcc/config/i386/i386.c 2015-11-27 17:24:30.743625244 +0100 >@@ -42680,16 +42680,40 @@ ix86_vectorize_builtin_scatter (const_tr > reciprocal of the function, or NULL_TREE if not available. */ > > static tree >-ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool) >+ix86_builtin_reciprocal (gcall *call) > { > if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p () > && flag_finite_math_only && !flag_trapping_math > && flag_unsafe_math_optimizations)) > return NULL_TREE; > >- if (md_fn) >+ if (gimple_call_internal_p (call)) >+ switch (gimple_call_internal_fn (call)) >+ { >+ tree type; >+ case IFN_SQRT: >+ type = TREE_TYPE (gimple_call_lhs (call)); >+ switch (TYPE_MODE (type)) >+ { >+ /* Vectorized version of sqrt to rsqrt conversion. */ >+ case V4SFmode: >+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); >+ >+ case V8SFmode: >+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); >+ >+ default: >+ return NULL_TREE; >+ } >+ >+ default: >+ return NULL_TREE; >+ } >+ >+ tree fndecl = gimple_call_fndecl (call); >+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) > /* Machine dependent builtins. */ >- switch (fn) >+ switch (DECL_FUNCTION_CODE (fndecl)) > { > /* Vectorized version of sqrt to rsqrt conversion. */ > case IX86_BUILTIN_SQRTPS_NR: >@@ -42703,7 +42727,7 @@ ix86_builtin_reciprocal (unsigned int fn > } > else > /* Normal builtins. */ >- switch (fn) >+ switch (DECL_FUNCTION_CODE (fndecl)) > { > /* Sqrt to rsqrt conversion. */ > case BUILT_IN_SQRTF: >--- gcc/config/rs6000/rs6000.c.jj 2015-11-26 10:41:04.000000000 +0100 >+++ gcc/config/rs6000/rs6000.c 2015-11-27 17:26:11.238203965 +0100 >@@ -32643,14 +32643,42 @@ rs6000_memory_move_cost (machine_mode mo > reciprocal of the function, or NULL_TREE if not available. */ > > static tree >-rs6000_builtin_reciprocal (unsigned int fn, bool md_fn, >- bool sqrt ATTRIBUTE_UNUSED) >+rs6000_builtin_reciprocal (gcall *call) > { > if (optimize_insn_for_size_p ()) > return NULL_TREE; > >- if (md_fn) >- switch (fn) >+ if (gimple_call_internal_p (call)) >+ switch (gimple_call_internal_fn (call)) >+ { >+ tree type; >+ case IFN_SQRT: >+ type = TREE_TYPE (gimple_call_lhs (call)); >+ switch (TYPE_MODE (type)) >+ { >+ case V2DFmode: >+ if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) >+ return NULL_TREE; >+ >+ return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; >+ >+ case V4SFmode: >+ if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) >+ return NULL_TREE; >+ >+ return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF]; >+ >+ default: >+ return NULL_TREE; >+ } >+ >+ default: >+ return NULL_TREE; >+ } >+ >+ tree fndecl = gimple_call_fndecl (call); >+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) >+ switch (DECL_FUNCTION_CODE (fndecl)) > { > case VSX_BUILTIN_XVSQRTDP: > if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) >@@ -32669,7 +32697,7 @@ rs6000_builtin_reciprocal (unsigned int > } > > else >- switch (fn) >+ switch (DECL_FUNCTION_CODE (fndecl)) > { > case BUILT_IN_SQRT: > if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode)) >--- gcc/config/aarch64/aarch64.c.jj 2015-11-23 17:13:34.000000000 +0100 >+++ gcc/config/aarch64/aarch64.c 2015-11-27 17:14:22.295230125 +0100 >@@ -7103,19 +7103,21 @@ aarch64_memory_move_cost (machine_mode m > reciprocal square root builtins. */ > > static tree >-aarch64_builtin_reciprocal (unsigned int fn, >- bool md_fn, >- bool) >+aarch64_builtin_reciprocal (gcall *call) > { > if (flag_trapping_math > || !flag_unsafe_math_optimizations > || optimize_size > || ! (aarch64_tune_params.extra_tuning_flags > & AARCH64_EXTRA_TUNE_RECIP_SQRT)) >- { > return NULL_TREE; >- } > >+ if (gimple_call_internal_p (call) >+ return NULL_TREE; >+ >+ tree fndecl = gimple_call_fndecl (call); >+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); >+ bool md_fn = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; > return aarch64_builtin_rsqrt (fn, md_fn); > } > >--- gcc/doc/tm.texi.jj 2015-11-18 11:19:16.000000000 +0100 >+++ gcc/doc/tm.texi 2015-11-27 16:48:40.388031894 +0100 >@@ -5608,14 +5608,10 @@ be placed in an @code{object_block} stru > The default version returns true for all decls. > @end deftypefn > >-@deftypefn {Target Hook} tree TARGET_BUILTIN_RECIPROCAL (unsigned >@var{fn}, bool @var{md_fn}, bool @var{sqrt}) >+@deftypefn {Target Hook} tree TARGET_BUILTIN_RECIPROCAL (gcall >*@var{call}) >This hook should return the DECL of a function that implements >reciprocal of >-the builtin function with builtin function code @var{fn}, or >-@code{NULL_TREE} if such a function is not available. @var{md_fn} is >true >-when @var{fn} is a code of a machine-dependent builtin function. When >-@var{sqrt} is true, additional optimizations that apply only to the >reciprocal >-of a square root function are performed, and only reciprocals of >@code{sqrt} >-function are valid. >+the builtin or internal function call @var{call}, or >+@code{NULL_TREE} if such a function is not available. > @end deftypefn > >@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD >(void) > > Jakub