On 06/26/2017 01:44 PM, Tom de Vries wrote:
On 06/26/2017 01:24 PM, Tom de Vries wrote:
Hi,
I've written a patch series to facilitate debugging libgomp openacc
testcase failures on the nvptx accelerator.
When running an openacc test-case on an nvptx accelerator, the
following happens:
- the plugin obtains the ptx assembly for the acceleration kernels
- it calls the cuda jit to compile and link the ptx into a module
- it loads the module
- it starts an acceleration kernel
The patch series adds these environment variables:
- GOMP_OPENACC_NVPTX_SAVE_TEMPS: a means to save the resulting module
such that it can be investigated using nvdisasm and cuobjdump.
- GOMP_OPENACC_NVPTX_DISASM: a means to see the resulting module in
the debug output, by writing it into a file and calling nvdisasm on
it
- GOMP_OPENACC_NVPTX_JIT: a means to set parameters of the
compilation/linking process, currently supporting:
* -O[0-4], mapping onto CU_JIT_OPTIMIZATION_LEVEL
* -ori, mapping onto CU_JIT_NEW_SM3X_OPT
The patch series consists of these patches:
4. Handle GOMP_OPENACC_NVPTX_JIT=-ori in libgomp nvptx plugin
This patch adds handling of GOMP_OPENACC_NVPTX_JIT=-ori.
Thanks,
- Tom
0004-Handle-GOMP_OPENACC_NVPTX_JIT-ori-in-libgomp-nvptx-plugin.patch
- CU_JIT_LOG_VERBOSE = 12
+ CU_JIT_LOG_VERBOSE = 12,
+ CU_JIT_NEW_SM3X_OPT = 15
} CUjit_option;
Adding the constant to plugin/cuda/cuda.h makes sure the constant is
available when not linking the plugin against cuda.
But when linking against cuda 7.5 and earlier, this still fails because
the constant is not available yet in cuda.h. Fixed by hardcoding the
constant if not available in the cuda version.
Thanks,
- Tom
Handle GOMP_OPENACC_NVPTX_JIT=-ori in libgomp nvptx plugin
2017-06-26 Tom de Vries <t...@codesourcery.com>
* plugin/cuda/cuda.h (enum CUjit_option): Add CU_JIT_NEW_SM3X_OPT.
* plugin/plugin-nvptx.c (process_GOMP_OPENACC_NVPTX_JIT): Add
gomp_openacc_nvptx_ori parameter. Handle -ori.
(link_ptx): Add CU_JIT_NEW_SM3X_OPT to opts.
---
libgomp/plugin/cuda/cuda.h | 3 ++-
libgomp/plugin/plugin-nvptx.c | 34 +++++++++++++++++++++++++++++-----
2 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/libgomp/plugin/cuda/cuda.h b/libgomp/plugin/cuda/cuda.h
index 75dfe3d..4644870 100644
--- a/libgomp/plugin/cuda/cuda.h
+++ b/libgomp/plugin/cuda/cuda.h
@@ -89,7 +89,8 @@ typedef enum {
CU_JIT_ERROR_LOG_BUFFER = 5,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
CU_JIT_OPTIMIZATION_LEVEL = 7,
- CU_JIT_LOG_VERBOSE = 12
+ CU_JIT_LOG_VERBOSE = 12,
+ CU_JIT_NEW_SM3X_OPT = 15
} CUjit_option;
typedef enum {
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 594ca39..41ecfec 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -143,6 +143,10 @@ init_cuda_lib (void)
#include "secure_getenv.h"
+#if CUDA_VERSION < 8000
+#define CU_JIT_NEW_SM3X_OPT 15
+#endif
+
/* Convenience macros for the frequently used CUDA library call and
error handling sequence as well as CUDA library calls that
do the error checking themselves or don't do it at all. */
@@ -980,13 +984,15 @@ debug_linkout (void *linkout, size_t linkoutsize)
}
static void
-process_GOMP_OPENACC_NVPTX_JIT (intptr_t *gomp_openacc_nvptx_o)
+process_GOMP_OPENACC_NVPTX_JIT (intptr_t *gomp_openacc_nvptx_o,
+ intptr_t *gomp_openacc_nvptx_ori)
{
const char *var_name = "GOMP_OPENACC_NVPTX_JIT";
const char *env_var = getenv (var_name);
notify_var (var_name, env_var);
*gomp_openacc_nvptx_o = 4;
+ *gomp_openacc_nvptx_ori = 0;
if (env_var == NULL)
return;
@@ -1005,6 +1011,14 @@ process_GOMP_OPENACC_NVPTX_JIT (intptr_t *gomp_openacc_nvptx_o)
continue;
}
+ if (c[0] == '-' && c[1] == 'o' && c[2] == 'r' && c[3] == 'i'
+ && (c[4] == '\0' || c[4] == ' '))
+ {
+ *gomp_openacc_nvptx_ori = 1;
+ c += 4;
+ continue;
+ }
+
GOMP_PLUGIN_error ("Error parsing %s", var_name);
break;
}
@@ -1014,8 +1028,8 @@ static bool
link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
unsigned num_objs)
{
- CUjit_option opts[7];
- void *optvals[7];
+ CUjit_option opts[8];
+ void *optvals[8];
float elapsed = 0.0;
char elog[1024];
char ilog[16384];
@@ -1043,13 +1057,23 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
optvals[5] = (void *) 1;
static intptr_t gomp_openacc_nvptx_o = -1;
+ static intptr_t gomp_openacc_nvptx_ori = -1;
if (gomp_openacc_nvptx_o == -1)
- process_GOMP_OPENACC_NVPTX_JIT (&gomp_openacc_nvptx_o);
+ process_GOMP_OPENACC_NVPTX_JIT (&gomp_openacc_nvptx_o,
+ &gomp_openacc_nvptx_ori);
opts[6] = CU_JIT_OPTIMIZATION_LEVEL;
optvals[6] = (void *) gomp_openacc_nvptx_o;
- CUDA_CALL (cuLinkCreate, 7, opts, optvals, &linkstate);
+ int nopts = 7;
+ if (gomp_openacc_nvptx_ori)
+ {
+ opts[nopts] = CU_JIT_NEW_SM3X_OPT;
+ optvals[nopts] = (void *) gomp_openacc_nvptx_ori;
+ nopts++;
+ }
+
+ CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
for (; num_objs--; ptx_objs++)
{