Switch from a bool to an enum and allow more options
for enforce isolation.  There are now 3 modes of operation:
- Disabled (0)
- Enabled (serialization and cleaner shader) (1)
- Enabled in legacy mode (no serialization or cleaner shader) (2)
This provides better flexibility for more use cases.

Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           | 11 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 16 +++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 22 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       | 12 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       | 39 ++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h       |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  3 +-
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c        |  2 +-
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c        |  2 +-
 .../drm/amd/amdkfd/kfd_packet_manager_v9.c    | 11 +++---
 12 files changed, 93 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 804d377037095..468c6ce09e3ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -230,7 +230,7 @@ extern int amdgpu_force_asic_type;
 extern int amdgpu_smartshift_bias;
 extern int amdgpu_use_xgmi_p2p;
 extern int amdgpu_mtype_local;
-extern bool enforce_isolation;
+extern int amdgpu_enforce_isolation;
 #ifdef CONFIG_HSA_AMD
 extern int sched_policy;
 extern bool debug_evictions;
@@ -873,6 +873,13 @@ struct amdgpu_init_level {
 struct amdgpu_reset_domain;
 struct amdgpu_fru_info;
 
+enum amdgpu_enforce_isolation_mode {
+       AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
+       AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
+       AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
+};
+
+
 /*
  * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
  */
@@ -1224,7 +1231,7 @@ struct amdgpu_device {
 
        /* Protection for the following isolation structure */
        struct mutex                    enforce_isolation_mutex;
-       bool                            enforce_isolation[MAX_XCP];
+       enum amdgpu_enforce_isolation_mode      enforce_isolation[MAX_XCP];
        struct amdgpu_isolation {
                void                    *owner;
                struct dma_fence        *spearhead;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4a5cd7a111fc4..34a688fc5bf35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -296,7 +296,21 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
                                       num_ibs[i], &p->jobs[i]);
                if (ret)
                        goto free_all_kdata;
-               p->jobs[i]->enforce_isolation = 
p->adev->enforce_isolation[fpriv->xcp_id];
+               switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
+               case AMDGPU_ENFORCE_ISOLATION_DISABLE:
+               default:
+                       p->jobs[i]->enforce_isolation = false;
+                       p->jobs[i]->run_cleaner_shader = false;
+                       break;
+               case AMDGPU_ENFORCE_ISOLATION_ENABLE:
+                       p->jobs[i]->enforce_isolation = true;
+                       p->jobs[i]->run_cleaner_shader = true;
+                       break;
+               case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
+                       p->jobs[i]->enforce_isolation = true;
+                       p->jobs[i]->run_cleaner_shader = false;
+                       break;
+               }
        }
        p->gang_leader = p->jobs[p->gang_leader_idx];
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8b43f350447a9..700304bbe3982 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2115,8 +2115,26 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
        adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, 
amdgpu_fw_load_type);
 
-       for (i = 0; i < MAX_XCP; i++)
-               adev->enforce_isolation[i] = !!enforce_isolation;
+       for (i = 0; i < MAX_XCP; i++) {
+               switch (amdgpu_enforce_isolation) {
+               case -1:
+               case 0:
+               default:
+                       /* disable */
+                       adev->enforce_isolation[i] = 
AMDGPU_ENFORCE_ISOLATION_DISABLE;
+                       break;
+               case 1:
+                       /* enable */
+                       adev->enforce_isolation[i] =
+                               AMDGPU_ENFORCE_ISOLATION_ENABLE;
+                       break;
+               case 2:
+                       /* enable legacy mode */
+                       adev->enforce_isolation[i] =
+                               AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+                       break;
+               }
+       }
 
        return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d7b27b7a0d519..5132003d85a29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -179,7 +179,7 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu;
 char *amdgpu_virtual_display;
-bool enforce_isolation;
+int amdgpu_enforce_isolation = -1;
 int amdgpu_modeset = -1;
 
 /* Specifies the default granularity for SVM, used in buffer
@@ -1038,11 +1038,13 @@ module_param_named(user_partt_mode, 
amdgpu_user_partt_mode, uint, 0444);
 
 
 /**
- * DOC: enforce_isolation (bool)
- * enforce process isolation between graphics and compute via using the same 
reserved vmid.
+ * DOC: enforce_isolation (int)
+ * enforce process isolation between graphics and compute.
+ * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode)
  */
-module_param(enforce_isolation, bool, 0444);
-MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between 
graphics and compute . enforce_isolation = on");
+module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
+MODULE_PARM_DESC(enforce_isolation,
+"enforce process isolation between graphics and compute. (-1 = auto, 0 = 
disable, 1 = enable, 2 = enable legacy mode)");
 
 /**
  * DOC: modeset (int)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ff53401aae5a4..9b4272fbc470c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1460,6 +1460,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct 
amdgpu_ring *ring)
                goto err;
 
        job->enforce_isolation = true;
+       /* always run the cleaner shader */
+       job->run_cleaner_shader = true;
 
        ib = &job->ibs[0];
        for (i = 0; i <= ring->funcs->align_mask; ++i)
@@ -1591,7 +1593,7 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct 
device *dev,
  * Provides the sysfs read interface to get the current settings of the 
'enforce_isolation'
  * feature for each GPU partition. Reading from the 'enforce_isolation'
  * sysfs file returns the isolation settings for all partitions, where '0'
- * indicates disabled and '1' indicates enabled.
+ * indicates disabled, '1' indicates enabled, and '2' indicates enabled in 
legacy mode.
  *
  * Return: The number of bytes read from the sysfs file.
  */
@@ -1626,9 +1628,10 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct 
device *dev,
  * @count: The size of the input data
  *
  * This function allows control over the 'enforce_isolation' feature, which
- * serializes access to the graphics engine. Writing '1' or '0' to the
- * 'enforce_isolation' sysfs file enables or disables process isolation for
- * each partition. The input should specify the setting for all partitions.
+ * serializes access to the graphics engine. Writing '1', '2', or '0' to the
+ * 'enforce_isolation' sysfs file enables (full or legacy) or disables process
+ * isolation for each partition. The input should specify the setting for all
+ * partitions.
  *
  * Return: The number of bytes written to the sysfs file.
  */
@@ -1665,13 +1668,29 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct 
device *dev,
                return -EINVAL;
 
        for (i = 0; i < num_partitions; i++) {
-               if (partition_values[i] != 0 && partition_values[i] != 1)
+               if (partition_values[i] != 0 &&
+                   partition_values[i] != 1 &&
+                   partition_values[i] != 2)
                        return -EINVAL;
        }
 
        mutex_lock(&adev->enforce_isolation_mutex);
-       for (i = 0; i < num_partitions; i++)
-               adev->enforce_isolation[i] = partition_values[i];
+       for (i = 0; i < num_partitions; i++) {
+               switch (partition_values[i]) {
+               case 0:
+               default:
+                       adev->enforce_isolation[i] = 
AMDGPU_ENFORCE_ISOLATION_DISABLE;
+                       break;
+               case 1:
+                       adev->enforce_isolation[i] =
+                               AMDGPU_ENFORCE_ISOLATION_ENABLE;
+                       break;
+               case 2:
+                       adev->enforce_isolation[i] =
+                               AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+                       break;
+               }
+       }
        mutex_unlock(&adev->enforce_isolation_mutex);
 
        amdgpu_mes_update_enforce_isolation(adev);
@@ -2026,7 +2045,7 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct 
amdgpu_device *adev,
        bool wait = false;
 
        mutex_lock(&adev->enforce_isolation_mutex);
-       if (adev->enforce_isolation[idx]) {
+       if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
                /* set the initial values if nothing is set */
                if (!adev->gfx.enforce_isolation_jiffies[idx]) {
                        adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
@@ -2093,7 +2112,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct 
amdgpu_ring *ring)
        amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
 
        mutex_lock(&adev->enforce_isolation_mutex);
-       if (adev->enforce_isolation[idx]) {
+       if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
                if (adev->kfd.init_complete)
                        sched_work = true;
        }
@@ -2130,7 +2149,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct 
amdgpu_ring *ring)
                return;
 
        mutex_lock(&adev->enforce_isolation_mutex);
-       if (adev->enforce_isolation[idx]) {
+       if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
                if (adev->kfd.init_complete)
                        sched_work = true;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 4c4e087230ac5..359c19de9a5b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -588,7 +588,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
        }
        /* alloc a default reserved vmid to enforce isolation */
        for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
-               if (adev->enforce_isolation[i])
+               if (adev->enforce_isolation[i] != 
AMDGPU_ENFORCE_ISOLATION_DISABLE)
                        amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
        }
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index ce6b9ba967fff..f2c049129661f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -78,6 +78,7 @@ struct amdgpu_job {
 
        /* enforce isolation */
        bool                    enforce_isolation;
+       bool                    run_cleaner_shader;
 
        uint32_t                num_ibs;
        struct amdgpu_ib        ibs[];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 36f2e87161264..38ea64d87a0ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -768,7 +768,7 @@ int amdgpu_mes_update_enforce_isolation(struct 
amdgpu_device *adev)
        if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
                mutex_lock(&adev->enforce_isolation_mutex);
                for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); 
i++) {
-                       if (adev->enforce_isolation[i])
+                       if (adev->enforce_isolation[i] == 
AMDGPU_ENFORCE_ISOLATION_ENABLE)
                                r |= amdgpu_mes_set_enforce_isolation(adev, i, 
true);
                        else
                                r |= amdgpu_mes_set_enforce_isolation(adev, i, 
false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b5ddfcbbc9fc9..dadf6715b98be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -676,7 +676,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
        pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
                ring->funcs->emit_wreg;
 
-       cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
+       cleaner_shader_needed = job->run_cleaner_shader &&
+               adev->gfx.enable_cleaner_shader &&
                ring->funcs->emit_cleaner_shader && job->base.s_fence &&
                &job->base.s_fence->scheduled == isolation->spearhead;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 344d32268c3cd..f7aa45775eadb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -724,7 +724,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes 
*mes)
                                        mes->event_log_gpu_addr;
        }
 
-       if (adev->enforce_isolation[0])
+       if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
                mes_set_hw_res_pkt.limit_single_process = 1;
 
        return mes_v11_0_submit_pkt_and_poll_completion(mes,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index be43e19b7b7fa..b0e042a4cea19 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -762,7 +762,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes 
*mes, int pipe)
                                pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + 
AMDGPU_MES_MSCRATCH_SIZE);
        }
 
-       if (adev->enforce_isolation[0])
+       if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
                mes_set_hw_res_pkt.limit_single_process = 1;
 
        return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 2893fd5e5d003..fa28c57692b86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
        memset(buffer, 0, sizeof(struct pm4_mes_map_process));
        packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
                                        sizeof(struct pm4_mes_map_process));
-       if (adev->enforce_isolation[kfd->node_id])
+       if (adev->enforce_isolation[kfd->node_id] == 
AMDGPU_ENFORCE_ISOLATION_ENABLE)
                packet->bitfields2.exec_cleaner_shader = 1;
        packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
        packet->bitfields2.process_quantum = 10;
@@ -102,7 +102,8 @@ static int pm_map_process_aldebaran(struct packet_manager 
*pm,
        memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
        packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
                        sizeof(struct pm4_mes_map_process_aldebaran));
-       if (adev->enforce_isolation[knode->node_id])
+       if (adev->enforce_isolation[knode->node_id] ==
+           AMDGPU_ENFORCE_ISOLATION_ENABLE)
                packet->bitfields2.exec_cleaner_shader = 1;
        packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
        packet->bitfields2.process_quantum = 10;
@@ -165,9 +166,9 @@ static int pm_runlist_v9(struct packet_manager *pm, 
uint32_t *buffer,
         * hws_max_conc_proc has been done in
         * kgd2kfd_device_init().
         */
-       concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ?
-                       1 : min(pm->dqm->processes_count,
-                       kfd->max_proc_per_quantum);
+       concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] ==
+                              AMDGPU_ENFORCE_ISOLATION_ENABLE) ?
+               1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum);
 
        packet = (struct pm4_mes_runlist *)buffer;
 
-- 
2.49.0

Reply via email to