Hi,
Jakub Jelinek wrote:
+ "#if defined(__STDC_EMBED_FOUND__) && __has_embed (\"%s\") ==
__STDC_EMBED_FOUND__\n"
If this was an attempt to deal gracefully with no #embed support, then
the above would be wrong and should have been
#if defined(__STDC_EMBED_FOUND__) && defined(__has_embed)
#if __has_embed ("whatever") == __STDC_EMBED_FOUND__
I was kind of both – assuming that #embed is available (as it should be
compiled by the accompanied compiler) but handle the case that it is not.
However, as '#embed' is well diagnosed if unsupported, that part is not
really needed.
Now, if all you want is an error if the file doesn't exist, then
#embed "whatever"
will do that too […]
If you want an error not just when it doesn't exist, but also when it
is empty, then you could do
#embed "whatever" if_empty (%%%)
The idea was to also error out if the file is empty – as that shouldn't
happen here: if offloading code was found, the code gen should be done.
However, using an invalid expression seems to be a good idea as that's
really a special case that shouldn't happen.
* * *
I have additionally replaced the #include by __UINTPTR_TYPE__ and
__SIZE_TYPE__ to avoid including 3 header files; this doesn't have a
large effect, but still.
Updated patch attached.
OK for mainline, once Jakub's #embed is committed?
* * *
BTW: Testing shows for a hello world program (w/o #embed patch)
For -foffload=...: 'disable' 0.04s, 'nvptx-none' 0.15s, 'amdgcn-amdhsa'
1.2s.
With a simple #embed (this patch plus Jakub's first patch), the
performance is unchanged. I then applied Jakub's follow up patches, but
I then get an ICE (Jakub will have a look).
But compiling it with 'g++' (→ COLLECT_GCC is g++) works; result: takes
0.2s (~6× faster) and compiling for both nvptx and gcn takes 0.3s,
nearly 5× faster.
Tobias
gcn/mkoffload.cc: Use #embed for including the generated ELF file
gcc/ChangeLog:
* config/gcn/mkoffload.cc (read_file): Remove.
(process_asm): Do not add '#include' to generated C file.
(process_obj): Generate C file that uses #embed and use
__SIZE_TYPE__ and __UINTPTR_TYPE__ instead the #include-defined
size_t and uintptr.
(main): Update call to it; remove no longer needed file I/O.
gcc/config/gcn/mkoffload.cc | 79 +++++++--------------------------------------
1 file changed, 12 insertions(+), 67 deletions(-)
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 810298a799b..c3c998639ff 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -182,44 +182,6 @@ xputenv (const char *string)
putenv (CONST_CAST (char *, string));
}
-/* Read the whole input file. It will be NUL terminated (but
- remember, there could be a NUL in the file itself. */
-
-static const char *
-read_file (FILE *stream, size_t *plen)
-{
- size_t alloc = 16384;
- size_t base = 0;
- char *buffer;
-
- if (!fseek (stream, 0, SEEK_END))
- {
- /* Get the file size. */
- long s = ftell (stream);
- if (s >= 0)
- alloc = s + 100;
- fseek (stream, 0, SEEK_SET);
- }
- buffer = XNEWVEC (char, alloc);
-
- for (;;)
- {
- size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
-
- if (!n)
- break;
- base += n;
- if (base + 1 == alloc)
- {
- alloc *= 2;
- buffer = XRESIZEVEC (char, buffer, alloc);
- }
- }
- buffer[base] = 0;
- *plen = base;
- return buffer;
-}
-
/* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'. */
@@ -657,10 +619,6 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *);
- fprintf (cfile, "#include <stdlib.h>\n");
- fprintf (cfile, "#include <stdint.h>\n");
- fprintf (cfile, "#include <stdbool.h>\n\n");
-
fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count);
@@ -725,35 +683,28 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
/* Embed an object file into a C source file. */
static void
-process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
+process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires)
{
- size_t len = 0;
- const char *input = read_file (in, &len);
-
/* Dump out an array containing the binary.
- FIXME: do this with objcopy. */
- fprintf (cfile, "static unsigned char gcn_code[] = {");
- for (size_t i = 0; i < len; i += 17)
- {
- fprintf (cfile, "\n\t");
- for (size_t j = i; j < i + 17 && j < len; j++)
- fprintf (cfile, "%3u,", (unsigned char) input[j]);
- }
- fprintf (cfile, "\n};\n\n");
+ If the file is empty, a parse error is shown as the argument to is_empty
+ is an undeclared identifier. */
+ fprintf (cfile,
+ "static unsigned char gcn_code[] = {\n"
+ "#embed \"%s\" if_empty (error_file_is_empty)\n"
+ "};\n\n", fname_in);
fprintf (cfile,
"static const struct gcn_image {\n"
- " size_t size;\n"
+ " __SIZE_TYPE__ size;\n"
" void *image;\n"
"} gcn_image = {\n"
- " %zu,\n"
+ " sizeof(gcn_code),\n"
" gcn_code\n"
- "};\n\n",
- len);
+ "};\n\n");
fprintf (cfile,
"static const struct gcn_data {\n"
- " uintptr_t omp_requires_mask;\n"
+ " __UINTPTR_TYPE__ omp_requires_mask;\n"
" const struct gcn_image *gcn_image;\n"
" unsigned kernel_count;\n"
" const struct hsa_kernel_description *kernel_infos;\n"
@@ -1312,13 +1263,7 @@ main (int argc, char **argv)
fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
obstack_free (&ld_argv_obstack, NULL);
- in = fopen (gcn_o_name, "r");
- if (!in)
- fatal_error (input_location, "cannot open intermediate gcn obj file");
-
- process_obj (in, cfile, omp_requires);
-
- fclose (in);
+ process_obj (gcn_o_name, cfile, omp_requires);
xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
xputenv (concat ("COMPILER_PATH=", cpath, NULL));