Hi Nathan! On Mon, 17 Aug 2015 15:30:16 -0400, Nathan Sidwell <nat...@acm.org> wrote: > I've committed this patch to add a new pair of internal functions. These > will > be used in implementing reductions. > > They'll be emitted around reduction finalization, and implement the locking > required for the general case of combining reduction values. They may be > transformed in the oacc_xform pass, and the default behaviour is to delete > them, > if there is no RTL expander. For PTX we delete them if they are at the > vector > level. > > This avoids needing machine-specific builtins to expand to, and thus should > result in less backend code duplication.
With the __builtin_nvptx_lock and __builtin_nvptx_unlock builtins removed, should the gcc.target/nvptx/spinlock-1.c and gcc.target/nvptx/spinlock-2.c test cases then be removed, too, or should these be re-written differently? For reference: $ grep ^ gcc/testsuite/gcc.target/nvptx/spinlock-*.c gcc/testsuite/gcc.target/nvptx/spinlock-1.c:/* { dg-do compile } */ gcc/testsuite/gcc.target/nvptx/spinlock-1.c:void Foo () gcc/testsuite/gcc.target/nvptx/spinlock-1.c:{ gcc/testsuite/gcc.target/nvptx/spinlock-1.c: __builtin_nvptx_lock (0); gcc/testsuite/gcc.target/nvptx/spinlock-1.c: __builtin_nvptx_unlock (0); gcc/testsuite/gcc.target/nvptx/spinlock-1.c:} gcc/testsuite/gcc.target/nvptx/spinlock-1.c: gcc/testsuite/gcc.target/nvptx/spinlock-1.c: gcc/testsuite/gcc.target/nvptx/spinlock-1.c:/* { dg-final { scan-assembler-times ".atom.global.cas.b32" 2 } } */ gcc/testsuite/gcc.target/nvptx/spinlock-1.c:/* { dg-final { scan-assembler ".global .u32 __global_lock;" } } */ gcc/testsuite/gcc.target/nvptx/spinlock-1.c:/* { dg-final { scan-assembler-not ".shared .u32 __shared_lock;" } } */ gcc/testsuite/gcc.target/nvptx/spinlock-2.c:/* { dg-do compile } */ gcc/testsuite/gcc.target/nvptx/spinlock-2.c:void Foo () gcc/testsuite/gcc.target/nvptx/spinlock-2.c:{ gcc/testsuite/gcc.target/nvptx/spinlock-2.c: __builtin_nvptx_lock (1); gcc/testsuite/gcc.target/nvptx/spinlock-2.c: __builtin_nvptx_unlock (1); gcc/testsuite/gcc.target/nvptx/spinlock-2.c:} gcc/testsuite/gcc.target/nvptx/spinlock-2.c: gcc/testsuite/gcc.target/nvptx/spinlock-2.c:/* { dg-final { scan-assembler-times ".atom.shared.cas.b32" 2 } } */ gcc/testsuite/gcc.target/nvptx/spinlock-2.c:/* { dg-final { scan-assembler ".shared .u32 __shared_lock;" } } */ gcc/testsuite/gcc.target/nvptx/spinlock-2.c:/* { dg-final { scan-assembler-not ".global .u32 __global_lock;" } } */ > 2015-08-17 Nathan Sidwell <nat...@codesourcery.com> > > * target.def (lock_unlock): New GOACC hook. > * targhooks.h (default_goacc_lock_unlock): Declare. > * doc/tm.texi.in (TARGET_GOACC_LOCK_UNLOCK): Add. > * doc/tm.texi: Rebuilt. > * internal-fn.def (GOACC_LOCK, GOACC_UNLOCK): New. > * internal-fn.c (expand_GOACC_LOCK, expand_GOACC_UNLOCK): New. > * omp-low.c (execute_oacc_transform): Add lock/unlock handling. > (default_goacc_lock_unlock): New. > * config/nvptx/nvptx-protos.h (nvptx_expand_oacc_lock_unlock): Declare. > * config/nvptx/nvptx.md (UNSPECV_UNLOCK): Delete. > (oacc_lock, oacc_unlock): New expanders. > (nvptx_spinlock, nvptx_spinunlock): Use UNSPECV_LOCK. > * config/nvptx/nvptx.c (nvptx_expand_oacc_lock_unlock): New. > (nvptx_expand_lock_unlock): Delete. > (nvptx_expand_lock, nvptx_expand_unlock): Delete. > (nvptx_expand_work_red_addr): Fixup address generation. > (enum nvptx_types): Delete NT_VOID_UINT. > (builtins): Delete nvptx_lock and nvptx_unlock. > (nvptx_init_builtins): Adjust. > (nvptx_xform_lock_unlock): New. > (TARGET_GOACC_LOCK_UNLOCK): Override. > > Index: gcc/config/nvptx/nvptx-protos.h > =================================================================== > --- gcc/config/nvptx/nvptx-protos.h (revision 226951) > +++ gcc/config/nvptx/nvptx-protos.h (working copy) > @@ -34,6 +34,7 @@ extern const char *nvptx_section_for_dec > #ifdef RTX_CODE > extern void nvptx_expand_oacc_fork (rtx); > extern void nvptx_expand_oacc_join (rtx); > +extern void nvptx_expand_oacc_lock_unlock (rtx, bool); > extern void nvptx_expand_call (rtx, rtx); > extern rtx nvptx_expand_compare (rtx); > extern const char *nvptx_ptx_type_from_mode (machine_mode, bool); > Index: gcc/config/nvptx/nvptx.md > =================================================================== > --- gcc/config/nvptx/nvptx.md (revision 226951) > +++ gcc/config/nvptx/nvptx.md (working copy) > @@ -61,7 +61,6 @@ > > (define_c_enum "unspecv" [ > UNSPECV_LOCK > - UNSPECV_UNLOCK > UNSPECV_CAS > UNSPECV_XCHG > UNSPECV_BARSYNC > @@ -1366,6 +1365,26 @@ > return asms[INTVAL (operands[1])]; > }) > > +(define_expand "oacc_lock" > + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "") > + (match_operand:SI 1 "const_int_operand" "")] > + UNSPECV_LOCK)] > + "" > +{ > + nvptx_expand_oacc_lock_unlock (operands[0], true); > + DONE; > +}) > + > +(define_expand "oacc_unlock" > + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "") > + (match_operand:SI 1 "const_int_operand" "")] > + UNSPECV_LOCK)] > + "" > +{ > + nvptx_expand_oacc_lock_unlock (operands[0], false); > + DONE; > +}) > + > (define_insn "nvptx_fork" > [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] > UNSPECV_FORK)] > @@ -1576,7 +1595,7 @@ > [(parallel > [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m") > (match_operand:SI 1 "const_int_operand" "i")] > - UNSPECV_UNLOCK) > + UNSPECV_LOCK) > (match_operand:SI 2 "register_operand" "=R") > (match_operand:BI 3 "register_operand" "=R") > (label_ref (match_operand 4 "" ""))])] > @@ -1586,7 +1605,7 @@ > (define_insn "nvptx_spinunlock" > [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m") > (match_operand:SI 1 "const_int_operand" "i")] > - UNSPECV_UNLOCK) > + UNSPECV_LOCK) > (match_operand:SI 2 "register_operand" "=R")] > "" > "atom%R1.exch.b32 %2,%0,0;") > Index: gcc/config/nvptx/nvptx.c > =================================================================== > --- gcc/config/nvptx/nvptx.c (revision 226951) > +++ gcc/config/nvptx/nvptx.c (working copy) > @@ -1164,6 +1164,39 @@ nvptx_expand_oacc_join (rtx mode) > emit_insn (gen_nvptx_joining (mode)); > } > > +/* Expander for reduction locking and unlocking. We expect SRC to be > + gang or worker level. */ > + > +void > +nvptx_expand_oacc_lock_unlock (rtx src, bool lock) > +{ > + unsigned HOST_WIDE_INT kind; > + rtx pat; > + > + kind = INTVAL (src) == GOMP_DIM_GANG ? LOCK_GLOBAL : LOCK_SHARED; > + lock_used[kind] = true; > + > + rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]); > + rtx space = GEN_INT (lock_space[kind]); > + rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind])); > + rtx tmp = gen_reg_rtx (SImode); > + > + if (!lock) > + emit_insn (barrier); > + if (lock) > + { > + rtx_code_label *label = gen_label_rtx (); > + > + LABEL_NUSES (label)++; > + pat = gen_nvptx_spinlock (mem, space, tmp, gen_reg_rtx (BImode), > label); > + } > + else > + pat = gen_nvptx_spinunlock (mem, space, tmp); > + emit_insn (pat); > + if (lock) > + emit_insn (barrier); > +} > + > /* Generate instruction(s) to unpack a 64 bit object into 2 32 bit > objects. */ > > @@ -3306,62 +3339,6 @@ nvptx_expand_shuffle_down (tree exp, rtx > return target; > } > > -/* Expander for locking and unlocking. */ > -static rtx > -nvptx_expand_lock_unlock (tree exp, bool lock) > -{ > - rtx src = expand_expr (CALL_EXPR_ARG (exp, 0), > - NULL_RTX, SImode, EXPAND_NORMAL); > - unsigned HOST_WIDE_INT kind; > - rtx pat; > - > - kind = GET_CODE (src) == CONST_INT ? INTVAL (src) : LOCK_MAX; > - if (kind >= LOCK_MAX) > - error ("builtin %D requires constant argument less than %u", > - get_callee_fndecl (exp), LOCK_MAX); > - lock_used[kind] = true; > - > - rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]); > - rtx space = GEN_INT (lock_space[kind]); > - rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind])); > - > - if (!lock) > - emit_insn (barrier); > - if (lock) > - { > - rtx_code_label *label = gen_label_rtx (); > - > - LABEL_NUSES (label)++; > - pat = gen_nvptx_spinlock (mem, space, > - gen_reg_rtx (SImode), gen_reg_rtx (BImode), > - label); > - } > - else > - pat = gen_nvptx_spinunlock (mem, space, gen_reg_rtx (SImode)); > - emit_insn (pat); > - if (lock) > - emit_insn (barrier); > - return const0_rtx; > -} > - > -/* Lock expander. */ > - > -static rtx > -nvptx_expand_lock (tree exp, rtx ARG_UNUSED (target), > - machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore)) > -{ > - return nvptx_expand_lock_unlock (exp, true); > -} > - > -/* Unlock expander. */ > - > -static rtx > -nvptx_expand_unlock (tree exp, rtx ARG_UNUSED (target), > - machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore)) > -{ > - return nvptx_expand_lock_unlock (exp, false); > -} > - > /* Worker reduction address expander. */ > static rtx > nvptx_expand_work_red_addr (tree exp, rtx target, > @@ -3413,12 +3390,16 @@ nvptx_expand_work_red_addr (tree exp, rt > /* Return offset into worker reduction array. */ > unsigned offset = loop.vars[ix].second; > > - rtx addr = gen_reg_rtx (Pmode); > - emit_move_insn (addr, > - gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset))); > + emit_insn (gen_rtx_SET (target, worker_red_sym)); > + > + if (offset) > + emit_insn (gen_rtx_SET (target, > + gen_rtx_PLUS (Pmode, target, GEN_INT (offset)))); > + > emit_insn (gen_rtx_SET (target, > - gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), > + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, target), > UNSPEC_FROM_SHARED))); > + > return target; > } > > @@ -3428,7 +3409,6 @@ enum nvptx_types > NT_ULL_ULL_INT, > NT_FLT_FLT_INT, > NT_DBL_DBL_INT, > - NT_VOID_UINT, > NT_UINTPTR_UINT_UINT, > NT_ULLPTR_UINT_UINT, > NT_FLTPTR_UINT_UINT, > @@ -3446,8 +3426,6 @@ static const struct builtin_description > nvptx_expand_shuffle_down}, > {"__builtin_nvptx_shuffle_downd", NT_DBL_DBL_INT, > nvptx_expand_shuffle_down}, > - {"__builtin_nvptx_lock", NT_VOID_UINT, nvptx_expand_lock}, > - {"__builtin_nvptx_unlock", NT_VOID_UINT, nvptx_expand_unlock}, > {"__builtin_nvptx_work_red_addr", NT_UINTPTR_UINT_UINT, > nvptx_expand_work_red_addr}, > {"__builtin_nvptx_work_red_addrll", NT_ULLPTR_UINT_UINT, > @@ -3492,9 +3470,6 @@ nvptx_init_builtins (void) > types[NT_DBL_DBL_INT] > = build_function_type_list (double_type_node, double_type_node, > integer_type_node, NULL_TREE); > - types[NT_VOID_UINT] > - = build_function_type_list (void_type_node, unsigned_type_node, > NULL_TREE); > - > types[NT_UINTPTR_UINT_UINT] > = build_function_type_list (build_pointer_type (unsigned_type_node), > unsigned_type_node, unsigned_type_node, > @@ -3628,6 +3603,20 @@ nvptx_xform_fork_join (gimple_stmt_itera > > return false; > } > + > +/* Check lock & unlock. We only need the gang- & worker-level ones. > + */ > + > +static bool > +nvptx_xform_lock_unlock (gimple_stmt_iterator *ARG_UNUSED (gsi), > + gimple stmt, > + const int *ARG_UNUSED (dims), > + bool ARG_UNUSED (is_fork)) > +{ > + tree arg = gimple_call_arg (stmt, 0); > + > + return TREE_INT_CST_LOW (arg) > GOMP_DIM_WORKER; > +} > > #undef TARGET_OPTION_OVERRIDE > #define TARGET_OPTION_OVERRIDE nvptx_option_override > @@ -3732,6 +3721,9 @@ nvptx_xform_fork_join (gimple_stmt_itera > #undef TARGET_GOACC_FORK_JOIN > #define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join > > +#undef TARGET_GOACC_LOCK_UNLOCK > +#define TARGET_GOACC_LOCK_UNLOCK nvptx_xform_lock_unlock > + > struct gcc_target targetm = TARGET_INITIALIZER; > > #include "gt-nvptx.h" > Index: gcc/targhooks.h > =================================================================== > --- gcc/targhooks.h (revision 226951) > +++ gcc/targhooks.h (working copy) > @@ -111,6 +111,8 @@ extern bool default_goacc_validate_dims > extern unsigned default_goacc_dim_limit (unsigned); > extern bool default_goacc_fork_join (gimple_stmt_iterator *, gimple, > const int [], bool); > +extern bool default_goacc_lock_unlock (gimple_stmt_iterator *, gimple, > + const int [], bool); > > /* These are here, and not in hooks.[ch], because not all users of > hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ > Index: gcc/target.def > =================================================================== > --- gcc/target.def (revision 226951) > +++ gcc/target.def (working copy) > @@ -1670,6 +1670,15 @@ default hook returns true, if there is n > bool, (gimple_stmt_iterator *, gimple, const int[], bool), > default_goacc_fork_join) > > +DEFHOOK > +(lock_unlock, > +"This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function\n\ > +calls to target-specific gimple. It is executed during the oacc_xform\n\ > +pass. It should return true, if the functions should be deleted. The\n\ > +default hook returns true, if there is no RTL expanders for them.", > +bool, (gimple_stmt_iterator *, gimple, const int[], bool), > +default_goacc_lock_unlock) > + > HOOK_VECTOR_END (goacc) > > /* Functions relating to vectorization. */ > Index: gcc/internal-fn.def > =================================================================== > --- gcc/internal-fn.def (revision 226951) > +++ gcc/internal-fn.def (working copy) > @@ -83,3 +83,9 @@ DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW > single INTEGER_CST argument. */ > DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".") > DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".") > + > +/* LOCK and UNLOCK operate a mutex used for reductions. The first > + argument is the compute dimension of the reduction and the second > + argument is a loop identifer. */ > +DEF_INTERNAL_FN (GOACC_LOCK, ECF_NOTHROW | ECF_LEAF, "..") > +DEF_INTERNAL_FN (GOACC_UNLOCK, ECF_NOTHROW | ECF_LEAF, "..") > Index: gcc/omp-low.c > =================================================================== > --- gcc/omp-low.c (revision 226951) > +++ gcc/omp-low.c (working copy) > @@ -14743,19 +14743,24 @@ execute_oacc_transform () > { > default: break; > > + case IFN_GOACC_DIM_POS: > case IFN_GOACC_DIM_SIZE: > - oacc_xform_dim (&gsi, stmt, dims, false); > + oacc_xform_dim (&gsi, stmt, dims, > + ifn_code == IFN_GOACC_DIM_POS); > break; > > - case IFN_GOACC_DIM_POS: > - oacc_xform_dim (&gsi, stmt, dims, true); > - break; > + case IFN_GOACC_LOCK: > + case IFN_GOACC_UNLOCK: > + if (targetm.goacc.lock_unlock > + (&gsi, stmt, dims, ifn_code == IFN_GOACC_LOCK)) > + goto remove; > > case IFN_GOACC_FORK: > case IFN_GOACC_JOIN: > if (targetm.goacc.fork_join > (&gsi, stmt, dims, ifn_code == IFN_GOACC_FORK)) > { > + remove: > replace_uses_by (gimple_vdef (stmt), > gimple_vuse (stmt)); > gsi_remove (&gsi, true); > @@ -14814,7 +14819,6 @@ default_goacc_fork_join (gimple_stmt_ite > gimple ARG_UNUSED (stmt), > const int *ARG_UNUSED (dims), bool is_fork) > { > - /* If there is no expander, we can delete the functions. */ > if (is_fork) > { > #ifndef HAVE_oacc_fork > @@ -14827,6 +14831,31 @@ default_goacc_fork_join (gimple_stmt_ite > return true; > #endif > } > + > + return false; > +} > + > +/* Default lock/unlock early expander. Delete the function calls if > + there is no RTL expander. */ > + > +bool > +default_goacc_lock_unlock (gimple_stmt_iterator *ARG_UNUSED (gsi), > + gimple ARG_UNUSED (stmt), > + const int*ARG_UNUSED (dims), > + bool is_lock) > +{ > + if (is_lock) > + { > +#ifndef HAVE_oacc_lock > + return true; > +#endif > + } > + else > + { > +#ifndef HAVE_oacc_unlock > + return true; > +#endif > + } > > return false; > } > Index: gcc/internal-fn.c > =================================================================== > --- gcc/internal-fn.c (revision 226951) > +++ gcc/internal-fn.c (working copy) > @@ -2025,6 +2025,32 @@ expand_GOACC_DIM_POS (gcall *ARG_UNUSED > #endif > } > > +static void > +expand_GOACC_LOCK (gcall *ARG_UNUSED (stmt)) > +{ > +#ifdef HAVE_oacc_lock > + rtx dim = expand_normal (gimple_call_arg (stmt, 0)); > + rtx id = expand_normal (gimple_call_arg (stmt, 1)); > + > + emit_insn (gen_oacc_lock (dim, id)); > +#else > + gcc_unreachable (); > +#endif > +} > + > +static void > +expand_GOACC_UNLOCK (gcall *ARG_UNUSED (stmt)) > +{ > +#ifdef HAVE_oacc_unlock > + rtx dim = expand_normal (gimple_call_arg (stmt, 0)); > + rtx id = expand_normal (gimple_call_arg (stmt, 1)); > + > + emit_insn (gen_oacc_unlock (dim, id)); > +#else > + gcc_unreachable (); > +#endif > +} > + > /* Routines to expand each internal function, indexed by function number. > Each routine has the prototype: > > Index: gcc/doc/tm.texi > =================================================================== > --- gcc/doc/tm.texi (revision 226951) > +++ gcc/doc/tm.texi (working copy) > @@ -5760,6 +5760,13 @@ pass. It should return true, if the fun > default hook returns true, if there is no RTL expanders for them. > @end deftypefn > > +@deftypefn {Target Hook} bool TARGET_GOACC_LOCK_UNLOCK (gimple_stmt_iterator > *@var{}, @var{gimple}, const @var{int[]}, @var{bool}) > +This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function > +calls to target-specific gimple. It is executed during the oacc_xform > +pass. It should return true, if the functions should be deleted. The > +default hook returns true, if there is no RTL expanders for them. > +@end deftypefn > + > @node Anchored Addresses > @section Anchored Addresses > @cindex anchored addresses > Index: gcc/doc/tm.texi.in > =================================================================== > --- gcc/doc/tm.texi.in (revision 226951) > +++ gcc/doc/tm.texi.in (working copy) > @@ -4251,6 +4251,8 @@ address; but often a machine-dependent > > @hook TARGET_GOACC_FORK_JOIN > > +@hook TARGET_GOACC_LOCK_UNLOCK > + > @node Anchored Addresses > @section Anchored Addresses > @cindex anchored addresses Grüße, Thomas
signature.asc
Description: PGP signature