Hi Andrew,
this patch adds support for gfx90c GCN5 APU integrated graphics devices.
The LLVM AMDGPU documentation (https://llvm.org/docs/AMDGPUUsage.html)
lists those devices as unsupported by rocm-amdhsa.
As we have discussed elsewhere, I have tested the patch on an AMD Ryzen
5 5500U (also with different xnack settings) that I have and it passes
most libgomp offloading tests.
Although those APUs are very constrainted compared to dGPUs, I think
they might be interesting for learning, experimentation, and testing.


Can I commit the patch to the master branch?

Best regards,
Frederik
From 809e2a0248e6fad1e8336b4a883a729017cc62e5 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <frede...@harwath.name>
Date: Wed, 24 Apr 2024 20:29:14 +0200
Subject: [PATCH] amdgcn: Add gfx90c target

Add support for gfx90c GCN5 APU integrated graphics devices.
The LLVM AMDGPU documentation does not list those devices as supported
by rocm-amdhsa, but it passes most libgomp offloading tests.
Although they are constrainted compared to dGPUs, they might be
interesting for learning, experimentation, and testing.

gcc/ChangeLog:

	* config.gcc: Add gfx90c.
	* config/gcn/gcn-hsa.h (NO_SRAM_ECC): Likewise.
	* config/gcn/gcn-opts.h (enum processor_type): Likewise.
	(TARGET_GFX90c): New macro.
	* config/gcn/gcn.cc (gcn_option_override): Handle gfx90c.
	(gcn_omp_device_kind_arch_isa): Likewise.
	(output_file_start): Likewise.
	* config/gcn/gcn.h: Add gfx90c.
	* config/gcn/gcn.opt: Likewise.
	* config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX90c): New macro.
	(get_arch): Handle gfx90c.
	(main): Handle EF_AMDGPU_MACH_AMDGCN_GFX90c
	* config/gcn/t-omp-device: Add gfx90c.
	* doc/install.texi: Likewise.
	* doc/invoke.texi: Likewise.

libgomp/ChangeLog:

	* plugin/plugin-gcn.c (isa_hsa_name): Handle EF_AMDGPU_MACH_AMDGCN_GFX90c.
	(isa_code): Handle gfx90c.
	(max_isa_vgprs): Handle EF_AMDGPU_MACH_AMDGCN_GFX90c.

Signed-off-by: Frederik Harwath <frede...@harwath.name>
---
 gcc/config.gcc              | 4 ++--
 gcc/config/gcn/gcn-hsa.h    | 2 +-
 gcc/config/gcn/gcn-opts.h   | 2 ++
 gcc/config/gcn/gcn.cc       | 8 ++++++++
 gcc/config/gcn/gcn.h        | 2 ++
 gcc/config/gcn/gcn.opt      | 3 +++
 gcc/config/gcn/mkoffload.cc | 9 +++++++++
 gcc/config/gcn/t-omp-device | 2 +-
 gcc/doc/install.texi        | 4 ++--
 gcc/doc/invoke.texi         | 3 +++
 libgomp/plugin/plugin-gcn.c | 9 +++++++++
 11 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5df3c52f8e9..1bf07b6eece 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4569,7 +4569,7 @@ case "${target}" in
 		for which in arch tune; do
 			eval "val=\$with_$which"
 			case ${val} in
-			"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030 | gfx1036 | gfx1100 | gfx1103)
+			"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx90c | gfx1030 | gfx1036 | gfx1100 | gfx1103)
 				# OK
 				;;
 			*)
@@ -4585,7 +4585,7 @@ case "${target}" in
 			TM_MULTILIB_CONFIG=
 			;;
 		xdefault | xyes)
-			TM_MULTILIB_CONFIG=`echo "gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1036,gfx1100,gfx1103" | sed "s/${with_arch},\?//;s/,$//"`
+			TM_MULTILIB_CONFIG=`echo "gfx900,gfx906,gfx908,gfx90a,gfx90c,gfx1030,gfx1036,gfx1100,gfx1103" | sed "s/${with_arch},\?//;s/,$//"`
 			;;
 		*)
 			TM_MULTILIB_CONFIG="${with_multilib_list}"
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 7d6e3141cea..4611bc55392 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -93,7 +93,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
 #define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1036:;march=gfx1100:;march=gfx1103:;" \
     /* These match the defaults set in gcn.cc.  */ \
     "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};"
-#define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
+#define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;march=gfx90c:;"
 
 /* In HSACOv4 no attribute setting means the binary supports "any" hardware
    configuration.  The name of the attribute also changed.  */
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index 49099bad7e7..1091035a69a 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -25,6 +25,7 @@ enum processor_type
   PROCESSOR_VEGA20,  // gfx906
   PROCESSOR_GFX908,
   PROCESSOR_GFX90a,
+  PROCESSOR_GFX90c,
   PROCESSOR_GFX1030,
   PROCESSOR_GFX1036,
   PROCESSOR_GFX1100,
@@ -36,6 +37,7 @@ enum processor_type
 #define TARGET_VEGA20 (gcn_arch == PROCESSOR_VEGA20)
 #define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908)
 #define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a)
+#define TARGET_GFX90c (gcn_arch == PROCESSOR_GFX90c)
 #define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030)
 #define TARGET_GFX1036 (gcn_arch == PROCESSOR_GFX1036)
 #define TARGET_GFX1100 (gcn_arch == PROCESSOR_GFX1100)
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 9f91d4f9ebd..d6531f55190 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -138,6 +138,7 @@ gcn_option_override (void)
       : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
       : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
       : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
+      : gcn_arch == PROCESSOR_GFX90c ? ISA_GCN5
       : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
       : gcn_arch == PROCESSOR_GFX1036 ? ISA_RDNA2
       : gcn_arch == PROCESSOR_GFX1100 ? ISA_RDNA3
@@ -196,6 +197,7 @@ gcn_option_override (void)
 	flag_xnack = HSACO_ATTR_OFF;
 	break;
       case PROCESSOR_GFX90a:
+      case PROCESSOR_GFX90c:
 	flag_xnack = HSACO_ATTR_ANY;
 	break;
       default:
@@ -3050,6 +3052,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
 	return gcn_arch == PROCESSOR_GFX908;
       if (strcmp (name, "gfx90a") == 0)
 	return gcn_arch == PROCESSOR_GFX90a;
+      if (strcmp (name, "gfx90c") == 0)
+	return gcn_arch == PROCESSOR_GFX90c;
       if (strcmp (name, "gfx1030") == 0)
 	return gcn_arch == PROCESSOR_GFX1030;
       if (strcmp (name, "gfx1036") == 0)
@@ -6596,6 +6600,10 @@ output_file_start (void)
     case PROCESSOR_GFX90a:
       cpu = "gfx90a";
       break;
+    case PROCESSOR_GFX90c:
+      cpu = "gfx90c";
+      sram_ecc = "";
+      break;
     case PROCESSOR_GFX1030:
       cpu = "gfx1030";
       xnack = "";
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 4148ceaf582..afa615320ca 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -47,6 +47,8 @@
 	builtin_define ("__gfx908__");                                         \
       else if (TARGET_GFX90a)                                                  \
 	builtin_define ("__gfx90a__");                                         \
+      else if (TARGET_GFX90c)                                                  \
+	builtin_define ("__gfx90c__");                                         \
       else if (TARGET_GFX1030)                                                 \
 	builtin_define ("__gfx1030__");                                        \
       else if (TARGET_GFX1036)                                                 \
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 42bb5f75a42..3317c492507 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -40,6 +40,9 @@ Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908)
 EnumValue
 Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
 
+EnumValue
+Enum(gpu_type) String(gfx90c) Value(PROCESSOR_GFX90c)
+
 EnumValue
 Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
 
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 9a438de331a..810298a799b 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -59,6 +59,8 @@
 #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX90a
 #define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
+#undef  EF_AMDGPU_MACH_AMDGCN_GFX90c
+#define EF_AMDGPU_MACH_AMDGCN_GFX90c 0x32
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX1030
 #define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX1036
@@ -861,6 +863,8 @@ get_arch (const char *str, const char *with_arch_str)
     return EF_AMDGPU_MACH_AMDGCN_GFX908;
   else if (strcmp (str, "gfx90a") == 0)
     return EF_AMDGPU_MACH_AMDGCN_GFX90a;
+  else if (strcmp (str, "gfx90c") == 0)
+    return EF_AMDGPU_MACH_AMDGCN_GFX90c;
   else if (strcmp (str, "gfx1030") == 0)
     return EF_AMDGPU_MACH_AMDGCN_GFX1030;
   else if (strcmp (str, "gfx1036") == 0)
@@ -1099,6 +1103,11 @@ main (int argc, char **argv)
       if (TEST_SRAM_ECC_UNSET (elf_flags))
 	SET_SRAM_ECC_ANY (elf_flags);
       break;
+    case EF_AMDGPU_MACH_AMDGCN_GFX90c:
+      if (TEST_XNACK_UNSET (elf_flags))
+	SET_XNACK_ANY (elf_flags);
+      SET_SRAM_ECC_UNSET (elf_flags);
+      break;
     default:
       fatal_error (input_location, "unhandled architecture");
     }
diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
index 7bcf910cbd0..b92e19bb6d6 100644
--- a/gcc/config/gcn/t-omp-device
+++ b/gcc/config/gcn/t-omp-device
@@ -1,4 +1,4 @@
 omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
 	echo kind: gpu > $@
 	echo arch: amdgcn gcn >> $@
-	echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1036 gfx1100 gfx1103 >> $@
+	echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx90c gfx1030 gfx1036 gfx1100 gfx1103 >> $@
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 1c58dc334ab..afafaee526d 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1268,8 +1268,8 @@ default set of libraries is selected based on the value of
 
 @item amdgcn*-*-*
 @var{list} is a comma separated list of ISA names (allowed values: @code{fiji},
-@code{gfx900}, @code{gfx906}, @code{gfx908}, @code{gfx90a}, @code{gfx1030},
-@code{gfx1036}, @code{gfx1100}, @code{gfx1103}).
+@code{gfx900}, @code{gfx906}, @code{gfx908}, @code{gfx90a}, @code{gfx90c}, 
+@code{gfx1030}, @code{gfx1036}, @code{gfx1100}, @code{gfx1103}).
 It ought not include the name of the default
 ISA, specified via @option{--with-arch}.  If @var{list} is empty, then there
 will be no multilibs and only the default run-time library will be built.  If
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e3285587e4e..ede2a2db04b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21846,6 +21846,9 @@ Compile for CDNA1 Instinct MI100 series devices (gfx908).
 @item gfx90a
 Compile for CDNA2 Instinct MI200 series devices (gfx90a).
 
+@item gfx90c
+Compile for GCN5 Vega 7 devices (gfx90c).
+
 @item gfx1030
 Compile for RDNA2 gfx1030 devices (GFX10 series).
 
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 27947801ccd..3cdc7ba929f 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -390,6 +390,7 @@ typedef enum {
   EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
   EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
   EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f,
+  EF_AMDGPU_MACH_AMDGCN_GFX90c = 0x032,
   EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
   EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
   EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
@@ -1679,6 +1680,7 @@ const static char *gcn_gfx900_s = "gfx900";
 const static char *gcn_gfx906_s = "gfx906";
 const static char *gcn_gfx908_s = "gfx908";
 const static char *gcn_gfx90a_s = "gfx90a";
+const static char *gcn_gfx90c_s = "gfx90c";
 const static char *gcn_gfx1030_s = "gfx1030";
 const static char *gcn_gfx1036_s = "gfx1036";
 const static char *gcn_gfx1100_s = "gfx1100";
@@ -1702,6 +1704,8 @@ isa_hsa_name (int isa) {
       return gcn_gfx908_s;
     case EF_AMDGPU_MACH_AMDGCN_GFX90a:
       return gcn_gfx90a_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX90c:
+      return gcn_gfx90c_s;
     case EF_AMDGPU_MACH_AMDGCN_GFX1030:
       return gcn_gfx1030_s;
     case EF_AMDGPU_MACH_AMDGCN_GFX1036:
@@ -1749,6 +1753,9 @@ isa_code(const char *isa) {
   if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len))
     return EF_AMDGPU_MACH_AMDGCN_GFX90a;
 
+  if (!strncmp (isa, gcn_gfx90c_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX90c;
+
   if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len))
     return EF_AMDGPU_MACH_AMDGCN_GFX1030;
 
@@ -1778,6 +1785,8 @@ max_isa_vgprs (int isa)
       return 256;
     case EF_AMDGPU_MACH_AMDGCN_GFX90a:
       return 512;
+    case EF_AMDGPU_MACH_AMDGCN_GFX90c:
+      return 256;
     case EF_AMDGPU_MACH_AMDGCN_GFX1030:
     case EF_AMDGPU_MACH_AMDGCN_GFX1036:
       return 512;  /* 512 SIMD32 = 256 wavefrontsize64.  */
-- 
2.34.1

Reply via email to