Add support for more per-process flags starting with option to configure
MFMA precision for gfx 9.5

v2: Change flag name to KFD_PROC_FLAG_MFMA_HIGH_PRECISION
    Remove unused else condition
v3: Bump the KFD API version
v4: Missed SH_MEM_CONFIG__PRECISION_MODE__SHIFT define. Added it.

Signed-off-by: Harish Kasiviswanathan <harish.kasiviswanat...@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c              |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  6 ++++--
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h |  6 ++++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c |  6 ++++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c |  6 ++++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c |  6 ++++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c |  6 ++++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c  | 11 +++++++++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c  |  6 ++++--
 .../drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h    |  2 ++
 include/uapi/linux/kfd_ioctl.h                        |  8 ++++++--
 11 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8c2e92378b49..1e9dd00620bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -606,7 +606,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
                                default_policy,
                                alternate_policy,
                                (void __user *)args->alternate_aperture_base,
-                               args->alternate_aperture_size))
+                               args->alternate_aperture_size,
+                               args->misc_process_flag))
                err = -EINVAL;
 
 out:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d23c6a358d34..2afcc1b4856a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2596,7 +2596,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size)
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties)
 {
        bool retval = true;
 
@@ -2611,7 +2612,8 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
                        default_policy,
                        alternate_policy,
                        alternate_aperture_base,
-                       alternate_aperture_size);
+                       alternate_aperture_size,
+                       misc_process_properties);
 
        if (retval)
                goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7146e227e2c1..122eb745e9c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -174,7 +174,8 @@ struct device_queue_manager_ops {
                                           enum cache_policy default_policy,
                                           enum cache_policy alternate_policy,
                                           void __user *alternate_aperture_base,
-                                          uint64_t alternate_aperture_size);
+                                          uint64_t alternate_aperture_size,
+                                          u32 misc_process_properties);
 
        int (*process_termination)(struct device_queue_manager *dqm,
                        struct qcm_process_device *qpd);
@@ -210,7 +211,8 @@ struct device_queue_manager_asic_ops {
                                           enum cache_policy default_policy,
                                           enum cache_policy alternate_policy,
                                           void __user *alternate_aperture_base,
-                                          uint64_t alternate_aperture_size);
+                                          uint64_t alternate_aperture_size,
+                                          u32 misc_process_properties);
        void    (*init_sdma_vm)(struct device_queue_manager *dqm,
                                struct queue *q,
                                struct qcm_process_device *qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 32bedef912b3..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -40,7 +40,8 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size);
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties);
 static int update_qpd_cik(struct device_queue_manager *dqm,
                          struct qcm_process_device *qpd);
 static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -88,7 +89,8 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size)
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties)
 {
        uint32_t default_mtype;
        uint32_t ape1_mtype;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index b5f5f141353b..ba6e3d747ccd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -36,7 +36,8 @@ static bool set_cache_memory_policy_v10(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size);
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties);
 
 void device_queue_manager_init_v10(
        struct device_queue_manager_asic_ops *asic_ops)
@@ -61,7 +62,8 @@ static bool set_cache_memory_policy_v10(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size)
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties)
 {
        qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
                              SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index f436878d0d62..8b447d04558f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -35,7 +35,8 @@ static bool set_cache_memory_policy_v11(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size);
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties);
 
 void device_queue_manager_init_v11(
        struct device_queue_manager_asic_ops *asic_ops)
@@ -60,7 +61,8 @@ static bool set_cache_memory_policy_v11(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size)
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties)
 {
        qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
                              SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
index 62ca1c8fcbaf..3550da3a46f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
@@ -35,7 +35,8 @@ static bool set_cache_memory_policy_v12(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size);
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties);
 
 void device_queue_manager_init_v12(
        struct device_queue_manager_asic_ops *asic_ops)
@@ -60,7 +61,8 @@ static bool set_cache_memory_policy_v12(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size)
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties)
 {
        qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
                              SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index c734eb9b505f..4635077aa905 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -35,7 +35,8 @@ static bool set_cache_memory_policy_v9(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size);
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties);
 
 void device_queue_manager_init_v9(
        struct device_queue_manager_asic_ops *asic_ops)
@@ -60,7 +61,8 @@ static bool set_cache_memory_policy_v9(struct 
device_queue_manager *dqm,
                                   enum cache_policy default_policy,
                                   enum cache_policy alternate_policy,
                                   void __user *alternate_aperture_base,
-                                  uint64_t alternate_aperture_size)
+                                  uint64_t alternate_aperture_size,
+                                  u32 misc_process_properties)
 {
        qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
                                SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
@@ -73,6 +75,11 @@ static bool set_cache_memory_policy_v9(struct 
device_queue_manager *dqm,
                KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
                qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
 
+       if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) {
+               if (misc_process_properties & KFD_PROC_FLAG_MFMA_HIGH_PRECISION)
+                       qpd->sh_mem_config |= 1 << 
SH_MEM_CONFIG__PRECISION_MODE__SHIFT;
+       }
+
        qpd->sh_mem_ape1_limit = 0;
        qpd->sh_mem_ape1_base = 0;
        qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 320518f41890..dad83356e976 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -40,7 +40,8 @@ static bool set_cache_memory_policy_vi(struct 
device_queue_manager *dqm,
                                       enum cache_policy default_policy,
                                       enum cache_policy alternate_policy,
                                       void __user *alternate_aperture_base,
-                                      uint64_t alternate_aperture_size);
+                                      uint64_t alternate_aperture_size,
+                                      u32 misc_process_properties);
 static int update_qpd_vi(struct device_queue_manager *dqm,
                         struct qcm_process_device *qpd);
 static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -89,7 +90,8 @@ static bool set_cache_memory_policy_vi(struct 
device_queue_manager *dqm,
                enum cache_policy default_policy,
                enum cache_policy alternate_policy,
                void __user *alternate_aperture_base,
-               uint64_t alternate_aperture_size)
+               uint64_t alternate_aperture_size,
+               u32 misc_process_properties)
 {
        uint32_t default_mtype;
        uint32_t ape1_mtype;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h
index 2bd9f3f1026f..0122a21c50cf 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h
@@ -2261,11 +2261,13 @@
 #define SH_MEM_CONFIG__ADDRESS_MODE__SHIFT                                     
                               0x0
 #define SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT                                   
                               0x3
 #define SH_MEM_CONFIG__F8_MODE__SHIFT                                          
                               0x8
+#define SH_MEM_CONFIG__PRECISION_MODE__SHIFT                                   
                               0x9
 #define SH_MEM_CONFIG__RETRY_DISABLE__SHIFT                                    
                               0xc
 #define SH_MEM_CONFIG__PRIVATE_NV__SHIFT                                       
                               0xd
 #define SH_MEM_CONFIG__ADDRESS_MODE_MASK                                       
                               0x00000001L
 #define SH_MEM_CONFIG__ALIGNMENT_MODE_MASK                                     
                               0x00000018L
 #define SH_MEM_CONFIG__F8_MODE_MASK                                            
                               0x00000100L
+#define SH_MEM_CONFIG__PRECISION_MODE_MASK                                     
                               0x00000200L
 #define SH_MEM_CONFIG__RETRY_DISABLE_MASK                                      
                               0x00001000L
 #define SH_MEM_CONFIG__PRIVATE_NV_MASK                                         
                               0x00002000L
 //SP_MFMA_PORTD_RD_CONFIG
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index b0160b09987c..1e59344c5673 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -43,9 +43,10 @@
  * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl
  * - 1.16 - Add contiguous VRAM allocation flag
  * - 1.17 - Add SDMA queue creation with target SDMA engine ID
+ * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 17
+#define KFD_IOCTL_MINOR_VERSION 18
 
 struct kfd_ioctl_get_version_args {
        __u32 major_version;    /* from KFD */
@@ -150,6 +151,9 @@ struct kfd_dbg_device_info_entry {
 #define KFD_IOC_CACHE_POLICY_COHERENT 0
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
 
+/* Misc. per process flags */
+#define KFD_PROC_FLAG_MFMA_HIGH_PRECISION (1 << 0)
+
 struct kfd_ioctl_set_memory_policy_args {
        __u64 alternate_aperture_base;  /* to KFD */
        __u64 alternate_aperture_size;  /* to KFD */
@@ -157,7 +161,7 @@ struct kfd_ioctl_set_memory_policy_args {
        __u32 gpu_id;                   /* to KFD */
        __u32 default_policy;           /* to KFD */
        __u32 alternate_policy;         /* to KFD */
-       __u32 pad;
+       __u32 misc_process_flag;        /* to KFD */
 };
 
 /*
-- 
2.34.1

Reply via email to