Andrew Stubbs wrote:
I'm going to push the base patch shortly.
… which happened in commit r15-4540-ga6b26e5ea09779.
Updated patch attached.
Some more testing showed that there was an issue with the builtin
defines, which has been fixed and those have been extended as well.
In particular, it now also includes the __GFX11__ macro define as
architecture family.
Tobias
PS: I find it surprising which out-of-bounds writes remain undetected on
one machine and show up prominently on the other.
PPS: If more macros are desired, clang also defines
__AMDGCN_WAVEFRONT_SIZE__ (i.e. 32 or 64) and __AMDGCN_CUMODE__ (1 = CU
mode, 0 = WGP).
GCN: Initial generic-target handling, add more GCN macro defines
Newer llvm-mc assemblers support the gfx*-generic targets, permitting to
generate code for all GPUs belonging to the same generation, even if not
optimal code. This requires LLVM 19.
This patch adds the compiler-side support for generic gfx and also
adds -march=gfx10-3-generic and -march=gfx-11. However, those -march= are
not documented nor used anywhere, yet.
Disclaimer: Not tested (as my ROCm does not support it); additionally,
libgomp/plugin/plugin-gcn.c has to be updated before it becomes useful.
For better compatibility with LLVM's Clang, this commit additionally adds
the macro definitions __GFX<9|10|11>__ for the architecture family,
__AMDGPU__ besides the existing __AMDGCN__ and the two strings-containing
macros __amdgcn_processor__ and __amdgcn_target_id__, where the former has
'-' replaced by '_' but otherwise both contain the lower case name. For the
new generic targets, the same happens, yielding, e.g., __gfx10_3_generic__.
gcc/ChangeLog:
* config/gcn/gcn-devices.def: Add generic version/flag as additional
value and architecture family entry; update; add gfx-10-3-generic
and gfx11-generic.
* config/gcn/gcn-hsa.h (ABI_VERSION_SPEC): Remove
(ASM_SPEC): Use generated ABI_VERSION_OPT instead.
* config/gcn/gcn-tables.opt: Regenerate
* config/gcn/gcn.h (gcn_device_def): Add generic_version and
arch_family members.
(TARGET_CPU_CPP_BUILTINS): Fix allocation bug, handle '-' in the
name and add additional macro defines.
* config/gcn/gcn.cc (gcn_devices): Handle it.
* config/gcn/gen-gcn-device-macros.awk: Likewise; use ELF name
for the macro name; generate ABI_VERSION_OPT.
* config/gcn/mkoffload.cc (ELFABIVERSION_AMDGPU_HSA_V6,
EF_AMDGPU_GENERIC_VERSION_V, EF_AMDGPU_GENERIC_VERSION_OFFSET,
GET_GENERIC_VERSION, SET_GENERIC_VERSION): Define.
(get_arch): Call SET_GENERIC_VERSION flag on elf_flags.
(copy_early_debug_info): If the arch sets the generic version,
use ELFABIVERSION_AMDGPU_HSA_V6.
gcc/config/gcn/gcn-devices.def | 71 ++++++++++++++++++++++++++++----
gcc/config/gcn/gcn-hsa.h | 9 +---
gcc/config/gcn/gcn-tables.opt | 6 +++
gcc/config/gcn/gcn.cc | 5 ++-
gcc/config/gcn/gcn.h | 25 ++++++++++-
gcc/config/gcn/gen-gcn-device-macros.awk | 39 +++++++++++++-----
gcc/config/gcn/mkoffload.cc | 24 ++++++++++-
7 files changed, 147 insertions(+), 32 deletions(-)
diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def
index f2dbe18b6ae..1305e0f8817 100644
--- a/gcc/config/gcn/gcn-devices.def
+++ b/gcc/config/gcn/gcn-devices.def
@@ -62,18 +62,31 @@
purposes of calculating maximum occupancy. Some devices have AVGPRs
in the same register file, some have more registers than are
addressable from a single kernel. Used by libgomp's plugin-gcn.c.
+ 9 "Generic Processor Version" (unsigned, external)
+ Used as version field for generic processor support. For non-generic
+ code it is 0; otherwise, between 1 and 255. Initially, it is 1 for
+ each generic device, but incremented (for a given generic device) if
+ an new device of that series requires a code change;
+ cf. EF_AMDGPU_GENERIC_VERSION_V. The version shall be the same as
+ generated by the used llvm-mc assembler.
+ 10 "Architecture Family Name" (string, external)
+ Used to #define '__GFX<...>__'.
Fields marked "external", above, have values defined elsewhere (HSA, ROCM,
LLVM, ELF, etc.) and must have matching definitions here. Fields marked
"internal" are defined and used only in GCC (although some may have
user-visible effects) and may be refactored as needed. */
+/* GCN GFX9 (Vega) */
+
GCN_DEVICE(gfx900, GFX900, 0x2c, ISA_GCN5,
/* XNACK default */ HSACO_ATTR_OFF,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
/* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
/* CU mode */ HSACO_ATTR_UNSUPPORTED,
- /* Max ISA VGPRs */ 256
+ /* Max ISA VGPRs */ 256,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX9
)
GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5,
@@ -81,7 +94,9 @@ GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
/* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
/* CU mode */ HSACO_ATTR_UNSUPPORTED,
- /* Max ISA VGPRs */ 256
+ /* Max ISA VGPRs */ 256,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX9
)
GCN_DEVICE(gfx908, GFX908, 0x30, ISA_CDNA1,
@@ -89,7 +104,9 @@ GCN_DEVICE(gfx908, GFX908, 0x30, ISA_CDNA1,
/* SRAM_ECC default */ HSACO_ATTR_ANY,
/* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
/* CU mode */ HSACO_ATTR_UNSUPPORTED,
- /* Max ISA VGPRs */ 256
+ /* Max ISA VGPRs */ 256,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX9
)
GCN_DEVICE(gfx90a, GFX90A, 0x3f, ISA_CDNA2,
@@ -97,7 +114,9 @@ GCN_DEVICE(gfx90a, GFX90A, 0x3f, ISA_CDNA2,
/* SRAM_ECC default */ HSACO_ATTR_ANY,
/* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
/* CU mode */ HSACO_ATTR_UNSUPPORTED,
- /* Max ISA VGPRs */ 512
+ /* Max ISA VGPRs */ 512,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX9
)
GCN_DEVICE(gfx90c, GFX90C, 0x32, ISA_GCN5,
@@ -105,15 +124,21 @@ GCN_DEVICE(gfx90c, GFX90C, 0x32, ISA_GCN5,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
/* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
/* CU mode */ HSACO_ATTR_UNSUPPORTED,
- /* Max ISA VGPRs */ 256
+ /* Max ISA VGPRs */ 256,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX9
)
+/* GCN GFX10.3 (RDNA 2) */
+
GCN_DEVICE(gfx1030, GFX1030, 0x36, ISA_RDNA2,
/* XNACK default */ HSACO_ATTR_UNSUPPORTED,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
/* WAVE64 mode */ HSACO_ATTR_ON,
/* CU mode */ HSACO_ATTR_ON,
- /* Max ISA VGPRs */ 512 /* 512 SIMD32 = 256 wavefrontsize64. */
+ /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64. */
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX10
)
GCN_DEVICE(gfx1036, GFX1036, 0x45, ISA_RDNA2,
@@ -121,15 +146,31 @@ GCN_DEVICE(gfx1036, GFX1036, 0x45, ISA_RDNA2,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
/* WAVE64 mode */ HSACO_ATTR_ON,
/* CU mode */ HSACO_ATTR_ON,
- /* Max ISA VGPRs */ 512 /* 512 SIMD32 = 256 wavefrontsize64. */
+ /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64. */
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX10
+ )
+
+GCN_DEVICE(gfx10-3-generic, GFX10_3_GENERIC, 0x053, ISA_RDNA2,
+ /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+ /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+ /* WAVE64 mode */ HSACO_ATTR_ON,
+ /* CU mode */ HSACO_ATTR_ON,
+ /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64. */
+ /* Generic code obj version */ 1,
+ /* Architecture Family */ GFX10
)
+/* GCN GFX11 (RDNA 3) */
+
GCN_DEVICE(gfx1100, GFX1100, 0x41, ISA_RDNA3,
/* XNACK default */ HSACO_ATTR_UNSUPPORTED,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
/* WAVE64 mode */ HSACO_ATTR_ON,
/* CU mode */ HSACO_ATTR_ON,
- /* Max ISA VGPRs */ 1536 /* 1536 SIMD32 = 768 wavefrontsize64. */
+ /* Max ISA VGPRs */ 1536, /* 1536 SIMD32 = 768 wavefrontsize64. */
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX11
)
GCN_DEVICE(gfx1103, GFX1103, 0x44, ISA_RDNA3,
@@ -137,7 +178,19 @@ GCN_DEVICE(gfx1103, GFX1103, 0x44, ISA_RDNA3,
/* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
/* WAVE64 mode */ HSACO_ATTR_ON,
/* CU mode */ HSACO_ATTR_ON,
- /* Max ISA VGPRs */ 1536
+ /* Max ISA VGPRs */ 1536,
+ /* Generic code obj version */ 0, /* non-generic */
+ /* Architecture Family */ GFX11
+ )
+
+GCN_DEVICE(gfx11-generic, GFX11_GENERIC, 0x054, ISA_RDNA3,
+ /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+ /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+ /* WAVE64 mode */ HSACO_ATTR_ON,
+ /* CU mode */ HSACO_ATTR_ON,
+ /* Max ISA VGPRs */ 1536,
+ /* Generic code obj version */ 1,
+ /* Architecture Family */ GFX11
)
#undef GCN_DEVICE
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 7665e4f3158..d87d2fa143f 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -75,19 +75,12 @@ extern unsigned int gcn_local_sym_hash (const char *name);
supported for gcn. */
#define GOMP_SELF_SPECS ""
-/* Explicitly set the ABI version; in principle, we could use just the
- default; however, when debugging symbols are turned on, mkoffload.cc
- writes a new AMD GPU object file and the ABI version needs to be the
- same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
- GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5. */
-#define ABI_VERSION_SPEC "--amdhsa-code-object-version=4"
-
#include "gcn-device-macros.h"
/* Use LLVM assembler and linker options. */
#define ASM_SPEC "-triple=amdgcn--amdhsa " \
"%{march=*:-mcpu=%*} " \
- ABI_VERSION_SPEC " " \
+ ABI_VERSION_OPT \
XNACKOPT \
SRAMOPT \
WAVE64OPT \
diff --git a/gcc/config/gcn/gcn-tables.opt b/gcc/config/gcn/gcn-tables.opt
index 140316f7250..bb71089ff66 100644
--- a/gcc/config/gcn/gcn-tables.opt
+++ b/gcc/config/gcn/gcn-tables.opt
@@ -45,8 +45,14 @@ Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
EnumValue
Enum(gpu_type) String(gfx1036) Value(PROCESSOR_GFX1036)
+EnumValue
+Enum(gpu_type) String(gfx10-3-generic) Value(PROCESSOR_GFX10_3_GENERIC)
+
EnumValue
Enum(gpu_type) String(gfx1100) Value(PROCESSOR_GFX1100)
EnumValue
Enum(gpu_type) String(gfx1103) Value(PROCESSOR_GFX1103)
+
+EnumValue
+Enum(gpu_type) String(gfx11-generic) Value(PROCESSOR_GFX11_GENERIC)
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 3dc6acfa950..736ebeb2ffd 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -101,8 +101,9 @@ static hash_map<tree, int> lds_allocs;
/* Import all the data from gcn-devices.def.
The PROCESSOR_GFXnnn should be indices for this table. */
const struct gcn_device_def gcn_devices[] = {
-#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS) \
- {PROCESSOR_ ## NAME, #name, #NAME, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS},
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, GEN_VER,ARCH_FAM) \
+ {PROCESSOR_ ## NAME, #name, #NAME, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, \
+ GEN_VER, #ARCH_FAM},
#include "gcn-devices.def"
};
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index ff508406ff1..30a144b154c 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -28,13 +28,16 @@ extern const struct gcn_device_def {
enum hsaco_attr_type wave64_default;
enum hsaco_attr_type cumode_default;
int max_isa_vgprs;
+ unsigned generic_version;
+ const char *arch_family;
} gcn_devices[];
#define TARGET_CPU_CPP_BUILTINS() \
do \
{ \
+ builtin_define ("__AMDGPU__"); \
builtin_define ("__AMDGCN__"); \
- if (TARGET_GCN5) \
+ if (TARGET_GCN5) \
builtin_define ("__GCN5__"); \
else if (TARGET_CDNA1) \
builtin_define ("__CDNA1__"); \
@@ -46,8 +49,26 @@ extern const struct gcn_device_def {
builtin_define ("__RDNA3__"); \
else \
gcc_unreachable (); \
- char *name = (char *)xmalloc (sizeof (gcn_devices[gcn_arch].name) + 5); \
+ char *name = (char *)xmalloc (strlen (gcn_devices[gcn_arch].name) + 5); \
sprintf (name, "__%s__", gcn_devices[gcn_arch].name); \
+ char *p; \
+ if (gcn_devices[gcn_arch].generic_version) \
+ while ((p = strchr(name, '-'))) \
+ *p = '_'; \
+ builtin_define (name); \
+ name = (char *)xmalloc (strlen (gcn_devices[gcn_arch].arch_family) + 5); \
+ sprintf (name, "__%s__", gcn_devices[gcn_arch].arch_family); \
+ builtin_define (name); \
+ name = (char *)xmalloc (strlen ("__amdgcn_target_id__") + \
+ strlen (gcn_devices[gcn_arch].name) + 4); \
+ sprintf (name, "__amdgcn_target_id__=\"%s\"", gcn_devices[gcn_arch].name); \
+ builtin_define (name); \
+ name = (char *)xmalloc (strlen ("__amdgcn_processor__") + \
+ strlen (gcn_devices[gcn_arch].name) + 4); \
+ sprintf (name, "__amdgcn_processor__=\"%s\"", gcn_devices[gcn_arch].name); \
+ if (gcn_devices[gcn_arch].generic_version) \
+ while ((p = strchr(name, '-'))) \
+ *p = '_'; \
builtin_define (name); \
} while (0)
diff --git a/gcc/config/gcn/gen-gcn-device-macros.awk b/gcc/config/gcn/gen-gcn-device-macros.awk
index 6352fa5fa3e..5ecc5c403b7 100644
--- a/gcc/config/gcn/gen-gcn-device-macros.awk
+++ b/gcc/config/gcn/gen-gcn-device-macros.awk
@@ -25,28 +25,30 @@ BEGIN {
print " Do not edit. */"
list=""
+ generic_list=""
}
/^GCN_DEVICE\(/ {
gfx=$2
- list=(list " OPT_" gfx)
+ NAME=$3
+ list=(list " OPT_" NAME)
print ""
next
}
/XNACK default.*HSACO_ATTR_UNSUPPORTED/ {
- printf "\n#define XNACK_%s \"march=%s:;\"", gfx, gfx
+ printf "\n#define XNACK_%s \"march=%s:;\"", NAME, gfx
next
}
/XNACK default.*HSACO_ATTR_OFF/ {
- printf "\n#define XNACK_%s \"march=%s:%{!mxnack*|mxnack=default|mxnack=off:-mattr=-xnack;mxnack=on:-mattr=+xnack};\"", gfx, gfx
+ printf "\n#define XNACK_%s \"march=%s:%{!mxnack*|mxnack=default|mxnack=off:-mattr=-xnack;mxnack=on:-mattr=+xnack};\"", NAME, gfx
next
}
/XNACK default.*HSACO_ATTR_ANY/ {
- printf "\n#define XNACK_%s \"march=%s:%{mxnack=off:-mattr=-xnack;mxnack=on:-mattr=+xnack};\"", gfx, gfx
+ printf "\n#define XNACK_%s \"march=%s:%{mxnack=off:-mattr=-xnack;mxnack=on:-mattr=+xnack};\"", NAME, gfx
next
}
@@ -56,12 +58,12 @@ BEGIN {
}
/SRAM_ECC default.*HSACO_ATTR_UNSUPPORTED/ {
- printf "\n#define SRAM_%s \"march=%s:;\"", gfx, gfx
+ printf "\n#define SRAM_%s \"march=%s:;\"", NAME, gfx
next
}
/SRAM_ECC default.*HSACO_ATTR_ANY/ {
- printf "\n#define SRAM_%s \"march=%s:%{msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc};\"", gfx, gfx
+ printf "\n#define SRAM_%s \"march=%s:%{msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc};\"", NAME, gfx
next
}
@@ -71,12 +73,12 @@ BEGIN {
}
/WAVE64 mode.*HSACO_ATTR_UNSUPPORTED/ {
- printf "\n#define WAVE64_%s \"march=%s:;\"", gfx, gfx
+ printf "\n#define WAVE64_%s \"march=%s:;\"", NAME, gfx
next
}
/WAVE64 mode.*HSACO_ATTR_ON/ {
- printf "\n#define WAVE64_%s \"march=%s:-mattr=+wavefrontsize64;\"", gfx, gfx
+ printf "\n#define WAVE64_%s \"march=%s:-mattr=+wavefrontsize64;\"", NAME, gfx
next
}
@@ -86,12 +88,12 @@ BEGIN {
}
/CU mode.*HSACO_ATTR_UNSUPPORTED/ {
- printf "\n#define CU_%s \"march=%s:;\"", gfx, gfx
+ printf "\n#define CU_%s \"march=%s:;\"", NAME, gfx
next
}
/CU mode.*HSACO_ATTR_ON/ {
- printf "\n#define CU_%s \"march=%s:-mattr=+cumode;\"", gfx, gfx
+ printf "\n#define CU_%s \"march=%s:-mattr=+cumode;\"", NAME, gfx
next
}
@@ -100,9 +102,26 @@ BEGIN {
exit 1
}
+/Generic code obj version/ {
+ match($0,/Generic code obj version[^\/]*\/[\t ]*([0-9]+)/,m)
+ if (m[1] > 0) {
+ printf "\n#define GENERIC_%s \"march=%s:--amdhsa-code-object-version=6;\"", NAME, gfx
+ generic_list=(generic_list " GENERIC_" NAME)
+ }
+ next
+}
+
+# ABI Version: In principle, the LLVM default would work. However,
+# however, when debugging symbols are turned on, mkoffload.cc
+# writes a new AMD GPU object file and the ABI version needs to be the
+# same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
+# GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
+# Code object V6 is supported since LLVM 19.
+
END {
print ""
print ""
+ printf "#define ABI_VERSION_OPT \"%%{\"%s \"!march=*|march=*:--amdhsa-code-object-version=4} \"\n", generic_list
printf "#define XNACKOPT \"%%{\"%s \":%%eexpected march\\n} \"\n", gensub (/OPT/, "XNACK", "g", list)
printf "#define SRAMOPT \"%%{\"%s \":%%eexpected march\\n} \"\n", gensub (/OPT/, "SRAM", "g", list)
printf "#define WAVE64OPT \"%%{\"%s \":%%eexpected march\\n} \"\n", gensub (/OPT/, "WAVE64", "g", list)
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index cebb9e506fb..c1d80aae59c 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -48,6 +48,8 @@
#define ELFABIVERSION_AMDGPU_HSA_V3 1
#undef ELFABIVERSION_AMDGPU_HSA_V4
#define ELFABIVERSION_AMDGPU_HSA_V4 2
+#undef ELFABIVERSION_AMDGPU_HSA_V6
+#define ELFABIVERSION_AMDGPU_HSA_V6 4
/* Extract the EF_AMDGPU_MACH_AMDGCN_GFXnnn from the def file. */
enum elf_arch_code {
@@ -69,6 +71,9 @@ enum elf_arch_code {
#define EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 0x800
#define EF_AMDGPU_FEATURE_SRAMECC_ON_V4 0xc00
+#define EF_AMDGPU_GENERIC_VERSION_V 0xff000000 /* Mask. */
+#define EF_AMDGPU_GENERIC_VERSION_OFFSET 24
+
#define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
| EF_AMDGPU_FEATURE_XNACK_ON_V4)
#define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
@@ -100,6 +105,12 @@ enum elf_arch_code {
== EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
#define TEST_SRAM_ECC_UNSET(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) == 0)
+#define GET_GENERIC_VERSION(VAR) ((VAR & EF_AMDGPU_GENERIC_VERSION_V) \
+ >> EF_AMDGPU_GENERIC_VERSION_OFFSET)
+#define SET_GENERIC_VERSION(VAR,GEN_VER) \
+ VAR = ((VAR & ~EF_AMDGPU_GENERIC_VERSION_V) \
+ | (GEN_VER << EF_AMDGPU_GENERIC_VERSION_OFFSET))
+
#ifndef R_AMDGPU_NONE
#define R_AMDGPU_NONE 0
#define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
@@ -305,7 +316,9 @@ copy_early_debug_info (const char *infile, const char *outfile)
/* Patch the correct elf architecture flag into the file. */
ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
- ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA_V4;
+ ehdr.e_ident[8] = (GET_GENERIC_VERSION (elf_flags)
+ ? ELFABIVERSION_AMDGPU_HSA_V6
+ : ELFABIVERSION_AMDGPU_HSA_V4);
ehdr.e_type = ET_REL;
ehdr.e_machine = EM_AMDGPU;
ehdr.e_flags = elf_arch | elf_flags;
@@ -1036,6 +1049,15 @@ main (int argc, char **argv)
fatal_error (input_location, "unhandled architecture");
}
+ /* Set the generic version. */
+ switch (elf_arch)
+ {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, GEN_VER, ...) \
+ case ELF: if (GEN_VER) SET_GENERIC_VERSION (elf_flags, GEN_VER); break;
+#include "gcn-devices.def"
+#undef GCN_DEVICE
+ }
+
/* Build arguments for compiler pass. */
struct obstack cc_argv_obstack;
obstack_init (&cc_argv_obstack);