Hi! On Thu, 13 Nov 2014 19:09:49 +0100, Jakub Jelinek <ja...@redhat.com> wrote: > On Thu, Nov 13, 2014 at 05:59:11PM +0100, Thomas Schwinge wrote: > > --- gcc/builtins.c > > +++ gcc/builtins.c
> > +/* Expand OpenACC acc_on_device. > > + > > + This has to happen late (that is, not in early folding; > > expand_builtin_*, > > + rather than fold_builtin_*), as we have to act differently for host and > > + acceleration device (ACCEL_COMPILER conditional). */ > > + > > +static rtx > > +expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED) > > +{ > > + if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE)) > > + return NULL_RTX; > > + > > + tree arg, v1, v2, ret; > > + location_t loc; > > + > > + arg = CALL_EXPR_ARG (exp, 0); > > + arg = builtin_save_expr (arg); > > + loc = EXPR_LOCATION (exp); > > + > > + /* Build: (arg == v1 || arg == v2) ? 1 : 0. */ > > + > > +#ifdef ACCEL_COMPILER > > + v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_not_host */ 3); > > + v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device); > > +#else > > + v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_none */ 0); > > + v2 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_host */ 2); > > +#endif > > + > > + v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1); > > + v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2); > > + > > + /* Can't use TRUTH_ORIF_EXPR, as that is not supported by > > + expand_expr_real*. */ > > + ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2); > > + ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, > > + ret, integer_one_node, integer_zero_node); > > + > > + return expand_normal (ret); > > If you can't fold it late (which is indeed a problem for -O0), > then I'd suggest to implement this more RTL-ish. > So, avoid the builtin_save_expr, instead > rtx op = expand_normal (arg); > Don't build v1/v2 as trees (and, please fix the TODOs), but rtxes, (acc_device_* TODOs already resolved earlier on.) > just > rtx v1 = GEN_INT (...); > rtx v2 = GEN_INT (...); > machine_mode mode = TYPE_MODE (TREE_TYPE (arg)); > rtx ret = gen_reg_rtx (TYPE_MODE (integer_type_node)); > emit_move_insn (ret, const0_rtx); > rtx_code_label *done_label = gen_label_rtx (); > emit_cmp_and_jump_insns (op, v1, NE, NULL_RTX, mode, > false, done_label, PROB_EVEN); > emit_cmp_and_jump_insns (op, v2, NE, NULL_RTX, mode, > false, done_label, PROB_EVEN); > emit_move_insn (ret, const1_rtx); > emit_label (done_label); > return ret; > or similar. Thanks for the review/suggestion/code! > Note, it would still be worthwhile to fold the builtin, at least > when optimizing, after IPA. Dunno if we have some property you can check, > and Richard B. could suggest where it would be most appropriate (if GIMPLE > guarded match.pd entry, or what), gimple_fold, etc. I'll make a note to have a look at that later on. > I bet I should handle omp_is_initial_device (); similarly. Yeah. Committed to gomp-4_0-branch in r218858: commit da5ad5aec1c0f9b230ecb2dc00620a5598de5066 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Thu Dec 18 10:42:30 2014 +0000 OpenACC acc_on_device: Make builtin expansion more RTXy. gcc/ * builtins.c (expand_builtin_acc_on_device): Make more RTXy. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@218858 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 5 +++++ gcc/builtins.c | 44 +++++++++++++++++++++----------------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index b370616..a3650c5 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,3 +1,8 @@ +2014-12-18 Thomas Schwinge <tho...@codesourcery.com> + Jakub Jelinek <ja...@redhat.com> + + * builtins.c (expand_builtin_acc_on_device): Make more RTXy. + 2014-12-17 Thomas Schwinge <tho...@codesourcery.com> Bernd Schmidt <ber...@codesourcery.com> diff --git gcc/builtins.c gcc/builtins.c index fcf3f53..e946521 100644 --- gcc/builtins.c +++ gcc/builtins.c @@ -5889,38 +5889,36 @@ expand_stack_save (void) acceleration device (ACCEL_COMPILER conditional). */ static rtx -expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED) +expand_builtin_acc_on_device (tree exp, rtx target) { if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE)) return NULL_RTX; - tree arg, v1, v2, ret; - location_t loc; - - arg = CALL_EXPR_ARG (exp, 0); - arg = builtin_save_expr (arg); - loc = EXPR_LOCATION (exp); - - /* Build: (arg == v1 || arg == v2) ? 1 : 0. */ + tree arg = CALL_EXPR_ARG (exp, 0); + /* Return (arg == v1 || arg == v2) ? 1 : 0. */ + machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg)); + rtx v = expand_normal (arg), v1, v2; #ifdef ACCEL_COMPILER - v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NOT_HOST); - v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device); + v1 = GEN_INT (GOMP_DEVICE_NOT_HOST); + v2 = GEN_INT (ACCEL_COMPILER_acc_device); #else - v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NONE); - v2 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_HOST); + v1 = GEN_INT (GOMP_DEVICE_NONE); + v2 = GEN_INT (GOMP_DEVICE_HOST); #endif + machine_mode target_mode = TYPE_MODE (integer_type_node); + if (!REG_P (target) || GET_MODE (target) != target_mode) + target = gen_reg_rtx (target_mode); + emit_move_insn (target, const0_rtx); + rtx_code_label *done_label = gen_label_rtx (); + emit_cmp_and_jump_insns (v, v1, NE, NULL_RTX, v_mode, + false, done_label, PROB_EVEN); + emit_cmp_and_jump_insns (v, v2, NE, NULL_RTX, v_mode, + false, done_label, PROB_EVEN); + emit_move_insn (target, const1_rtx); + emit_label (done_label); - v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1); - v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2); - - /* Can't use TRUTH_ORIF_EXPR, as that is not supported by - expand_expr_real*. */ - ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2); - ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, - ret, integer_one_node, integer_zero_node); - - return expand_normal (ret); + return target; } Grüße, Thomas
pgpTD1xP58om4.pgp
Description: PGP signature