Hi! On Wed, 24 Jan 2018 14:56:28 +0100, Tom de Vries <tom_devr...@mentor.com> wrote: > On 01/24/2018 12:03 PM, Jakub Jelinek wrote: > > On Wed, Jan 24, 2018 at 11:41:45AM +0100, Tom de Vries wrote: > >> +/* Insert a dummy ptx insn when encountering a branch to a label with no > >> ptx > >> + insn inbetween the branch and the label. This works around a JIT bug > >> + observed at driver version 384.111, at -O0 for sm_50. */
> [...] committed as attached. I pushed this to openacc-gcc-7-branch, including its later bug fix "[nvptx] Fix prevent_branch_around_nothing": commit 4cad9fa6b7a85e01da260d0b9e20de30d53f1881 Author: Tom de Vries <t...@codesourcery.com> Date: Wed Jan 24 13:52:12 2018 +0000 [nvptx, PR83589] Workaround for branch-around-nothing JIT bug gcc/ PR target/83589 * config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_2): Define to 1. (nvptx_pc_set, nvptx_condjump_label): New function. Copy from jump.c. Add strict parameter. (prevent_branch_around_nothing): Insert dummy insn between branch to label and label with no ptx insn inbetween. * config/nvptx/nvptx.md (define_insn "fake_nop"): New insn. PR target/84954 * config/nvptx/nvptx.c (prevent_branch_around_nothing): Also update seen_label if seen_label is already set. libgomp/ PR target/83589 * testsuite/libgomp.oacc-c-c++-common/pr83589.c: New test. (cherry picked from trunk r257016 and r258674) --- gcc/ChangeLog.openacc | 16 ++++ gcc/config/nvptx/nvptx.c | 93 ++++++++++++++++++++++ gcc/config/nvptx/nvptx.md | 9 +++ libgomp/ChangeLog.openacc | 5 ++ .../testsuite/libgomp.oacc-c-c++-common/pr83589.c | 21 +++++ 5 files changed, 144 insertions(+) diff --git gcc/ChangeLog.openacc gcc/ChangeLog.openacc index 172f1fc..4d13080 100644 --- gcc/ChangeLog.openacc +++ gcc/ChangeLog.openacc @@ -1,3 +1,19 @@ +2018-03-20 Tom de Vries <t...@codesourcery.com> + + PR target/84954 + * config/nvptx/nvptx.c (prevent_branch_around_nothing): Also update + seen_label if seen_label is already set. + +2018-01-24 Tom de Vries <t...@codesourcery.com> + + PR target/83589 + * config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_2): Define to 1. + (nvptx_pc_set, nvptx_condjump_label): New function. Copy from jump.c. + Add strict parameter. + (prevent_branch_around_nothing): Insert dummy insn between branch to + label and label with no ptx insn inbetween. + * config/nvptx/nvptx.md (define_insn "fake_nop"): New insn. + 2018-05-09 Tom de Vries <t...@codesourcery.com> backport from trunk: diff --git gcc/config/nvptx/nvptx.c gcc/config/nvptx/nvptx.c index d659ab4..f636d8d 100644 --- gcc/config/nvptx/nvptx.c +++ gcc/config/nvptx/nvptx.c @@ -77,6 +77,7 @@ #include "target-def.h" #define WORKAROUND_PTXJIT_BUG 1 +#define WORKAROUND_PTXJIT_BUG_2 1 #define WORKAROUND_PTXJIT_BUG_3 1 /* Define dimension sizes for known hardware. */ @@ -4636,6 +4637,94 @@ populate_offload_attrs (offload_attrs *oa) oa->max_workers = oa->num_workers; } +#if WORKAROUND_PTXJIT_BUG_2 +/* Variant of pc_set that only requires JUMP_P (INSN) if STRICT. This variant + is needed in the nvptx target because the branches generated for + parititioning are NONJUMP_INSN_P, not JUMP_P. */ + +static rtx +nvptx_pc_set (const rtx_insn *insn, bool strict = true) +{ + rtx pat; + if ((strict && !JUMP_P (insn)) + || (!strict && !INSN_P (insn))) + return NULL_RTX; + pat = PATTERN (insn); + + /* The set is allowed to appear either as the insn pattern or + the first set in a PARALLEL. */ + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == PC) + return pat; + + return NULL_RTX; +} + +/* Variant of condjump_label that only requires JUMP_P (INSN) if STRICT. */ + +static rtx +nvptx_condjump_label (const rtx_insn *insn, bool strict = true) +{ + rtx x = nvptx_pc_set (insn, strict); + + if (!x) + return NULL_RTX; + x = SET_SRC (x); + if (GET_CODE (x) == LABEL_REF) + return x; + if (GET_CODE (x) != IF_THEN_ELSE) + return NULL_RTX; + if (XEXP (x, 2) == pc_rtx && GET_CODE (XEXP (x, 1)) == LABEL_REF) + return XEXP (x, 1); + if (XEXP (x, 1) == pc_rtx && GET_CODE (XEXP (x, 2)) == LABEL_REF) + return XEXP (x, 2); + return NULL_RTX; +} + +/* Insert a dummy ptx insn when encountering a branch to a label with no ptx + insn inbetween the branch and the label. This works around a JIT bug + observed at driver version 384.111, at -O0 for sm_50. */ + +static void +prevent_branch_around_nothing (void) +{ + rtx_insn *seen_label = NULL; + for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (INSN_P (insn) && condjump_p (insn)) + { + seen_label = label_ref_label (nvptx_condjump_label (insn, false)); + continue; + } + + if (seen_label == NULL) + continue; + + if (NOTE_P (insn) || DEBUG_INSN_P (insn)) + continue; + + if (INSN_P (insn)) + switch (recog_memoized (insn)) + { + case CODE_FOR_nvptx_fork: + case CODE_FOR_nvptx_forked: + case CODE_FOR_nvptx_joining: + case CODE_FOR_nvptx_join: + continue; + default: + seen_label = NULL; + continue; + } + + if (LABEL_P (insn) && insn == seen_label) + emit_insn_before (gen_fake_nop (), insn); + + seen_label = NULL; + } + } +#endif + #ifdef WORKAROUND_PTXJIT_BUG_3 /* Insert two membar.cta insns inbetween two subsequent bar.sync insns. This works around a hang observed at driver version 390.48 for sm_50. */ @@ -4754,6 +4843,10 @@ nvptx_reorg (void) if (TARGET_UNIFORM_SIMT) nvptx_reorg_uniform_simt (); +#if WORKAROUND_PTXJIT_BUG_2 + prevent_branch_around_nothing (); +#endif + #ifdef WORKAROUND_PTXJIT_BUG_3 workaround_barsyncs (); #endif diff --git gcc/config/nvptx/nvptx.md gcc/config/nvptx/nvptx.md index ab63cf8..9e3001c 100644 --- gcc/config/nvptx/nvptx.md +++ gcc/config/nvptx/nvptx.md @@ -981,6 +981,15 @@ "" "exit;") +(define_insn "fake_nop" + [(const_int 2)] + "" + "{ + .reg .u32 %%nop_src; + .reg .u32 %%nop_dst; + mov.u32 %%nop_dst, %%nop_src; + }") + (define_insn "return" [(return)] "" diff --git libgomp/ChangeLog.openacc libgomp/ChangeLog.openacc index d1cc107..add3b24 100644 --- libgomp/ChangeLog.openacc +++ libgomp/ChangeLog.openacc @@ -1,3 +1,8 @@ +2018-01-24 Tom de Vries <t...@codesourcery.com> + + PR target/83589 + * testsuite/libgomp.oacc-c-c++-common/pr83589.c: New test. + 2018-05-09 Cesar Philippidis <ce...@codesourcery.com> * libgomp.oacc-fortran/deviceptr-1.f90: Remove xfail for -O2 and -O3. diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c new file mode 100644 index 0000000..a6ed5cf --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var GOMP_NVPTX_JIT "-O0" } */ + +#define n 32 + +int +main (void) +{ + int arr_a[n]; + +#pragma acc parallel copyout(arr_a) num_gangs(1) num_workers(1) vector_length(32) + { + #pragma acc loop vector + for (int m = 0; m < 32; m++) + ; + + #pragma acc loop vector + for (int m = 0; m < 32; m++) + arr_a[m] = 0; + } +} Grüße Thomas