RE: [PATCH] drm/amdgpu: Drop redundant unsigned >=0 comparision 'amdgpu_gfx_rlc_init_microcode()'

2023-12-21 Thread Zhou1, Tao
[AMD Official Use Only - General]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: amd-gfx  On Behalf Of Srinivasan
> Shanmugam
> Sent: Wednesday, December 20, 2023 10:40 PM
> To: Deucher, Alexander ; Koenig, Christian
> 
> Cc: SHANMUGAM, SRINIVASAN ; amd-
> g...@lists.freedesktop.org
> Subject: [PATCH] drm/amdgpu: Drop redundant unsigned >=0 comparision
> 'amdgpu_gfx_rlc_init_microcode()'
>
> unsigned int "version_minor" is always >= 0
>
> Fixes the below:
> drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c:534
> amdgpu_gfx_rlc_init_microcode() warn: always true condition '(version_minor >=
> 0) => (0-u16max >= 0)'
>
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Srinivasan Shanmugam 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c | 11 +--
>  1 file changed, 5 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
> index 35e0ae9acadc..2c3675d91614 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
> @@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct
> amdgpu_device *adev,
>   if (version_major == 2 && version_minor == 1)
>   adev->gfx.rlc.is_rlc_v2_1 = true;
>
> - if (version_minor >= 0) {
> - err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
> - if (err) {
> - dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
> - return err;
> - }
> + err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
> + if (err) {
> + dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
> + return err;
>   }
> +
>   if (version_minor >= 1)
>   amdgpu_gfx_rlc_init_microcode_v2_1(adev);
>   if (version_minor >= 2)
> --
> 2.34.1

<>

RE: [PATCH Review V3 1/1] drm/amdgpu: Fix ecc irq enable/disable unpaired

2023-12-21 Thread Zhou1, Tao
[AMD Official Use Only - General]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: amd-gfx  On Behalf Of
> Stanley.Yang
> Sent: Thursday, December 21, 2023 2:05 PM
> To: amd-gfx@lists.freedesktop.org; Zhang, Hawking 
> Cc: Yang, Stanley 
> Subject: [PATCH Review V3 1/1] drm/amdgpu: Fix ecc irq enable/disable unpaired
>
> The ecc_irq is disabled while GPU mode2 reset suspending process, but not be
> enabled during GPU mode2 reset resume process.
>
> Changed from V1:
>   only do sdma/gfx ras_late_init in aldebaran_mode2_restore_ip
>   delete amdgpu_ras_late_resume function
>
> Changed from V2:
>   check umc ras supported before put ecc_irq
>
> Signed-off-by: Stanley.Yang 
> ---
>  drivers/gpu/drm/amd/amdgpu/aldebaran.c | 28 +-
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  4 
> drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  5 +
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 
>  4 files changed, 40 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> index 02f4c6f9d4f6..b60a3c1bd0f2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
> @@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct
> amdgpu_reset_control *reset_ctl,  {
>   struct list_head *reset_device_list = reset_context->reset_device_list;
>   struct amdgpu_device *tmp_adev = NULL;
> + struct amdgpu_ras *con;
>   int r;
>
>   if (reset_device_list == NULL)
> @@ -355,7 +356,32 @@ aldebaran_mode2_restore_hwcontext(struct
> amdgpu_reset_control *reset_ctl,
>*/
>   amdgpu_register_gpu_instance(tmp_adev);
>
> - /* Resume RAS */
> + /* Resume RAS, ecc_irq */
> + con = amdgpu_ras_get_context(tmp_adev);
> + if (!amdgpu_sriov_vf(tmp_adev) && con) {
> + if (tmp_adev->sdma.ras &&
> + amdgpu_ras_is_supported(tmp_adev,
> AMDGPU_RAS_BLOCK__SDMA) &&
> + tmp_adev->sdma.ras->ras_block.ras_late_init) {
> + r = tmp_adev->sdma.ras-
> >ras_block.ras_late_init(tmp_adev,
> + &tmp_adev->sdma.ras-
> >ras_block.ras_comm);
> + if (r) {
> + dev_err(tmp_adev->dev, "SDMA failed
> to execute ras_late_init! ret:%d\n", r);
> + goto end;
> + }
> + }
> +
> + if (tmp_adev->gfx.ras &&
> + amdgpu_ras_is_supported(tmp_adev,
> AMDGPU_RAS_BLOCK__GFX) &&
> + tmp_adev->gfx.ras->ras_block.ras_late_init) {
> + r = tmp_adev->gfx.ras-
> >ras_block.ras_late_init(tmp_adev,
> + &tmp_adev->gfx.ras-
> >ras_block.ras_comm);
> + if (r) {
> + dev_err(tmp_adev->dev, "GFX failed to
> execute ras_late_init! ret:%d\n", r);
> + goto end;
> + }
> + }
> + }
> +
>   amdgpu_ras_resume(tmp_adev);
>
>   /* Update PSP FW topology after reset */ diff --git
> a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 09cbca596bb5..4048539205cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -1043,6 +1043,10 @@ static int gmc_v10_0_hw_fini(void *handle)
>
>   amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
>
> + if (adev->gmc.ecc_irq.funcs &&
> + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
> + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
> +
>   return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index 416f3e4f0438..e1ca5a599971 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -941,6 +941,11 @@ static int gmc_v11_0_hw_fini(void *handle)
>   }
>
>   amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
> +
> + if (adev->gmc.ecc_irq.funcs &&
> + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
> + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
> +
>   gmc_v11_0_gart_disable(adev);
>
>   return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 205db28a9803..f00e5c8c79b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -2388,6 +2388,10 @@ static int gmc_v9_0_hw_fini(void *handle)
>
>   amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
>
> + if (adev->gmc.ecc_irq.funcs &&
> + amdgpu_ras_is_supported(adev, AMDGPU_RAS_

RE: [PATCH Review V3 1/1] drm/amdgpu: Fix ecc irq enable/disable unpaired

2023-12-21 Thread Zhang, Hawking
[AMD Official Use Only - General]

Feel free to drop the check as below since amdgpu_xxx_ras_late_init applies the 
check for interrupt enablement.
+   amdgpu_ras_is_supported(tmp_adev, 
AMDGPU_RAS_BLOCK__SDMA)
+   amdgpu_ras_is_supported(tmp_adev, 
AMDGPU_RAS_BLOCK__GFX)

Apart from that, the change is

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: amd-gfx  On Behalf Of Stanley.Yang
Sent: Thursday, December 21, 2023 14:05
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking 
Cc: Yang, Stanley 
Subject: [PATCH Review V3 1/1] drm/amdgpu: Fix ecc irq enable/disable unpaired

The ecc_irq is disabled while GPU mode2 reset suspending process, but not be 
enabled during GPU mode2 reset resume process.

Changed from V1:
only do sdma/gfx ras_late_init in aldebaran_mode2_restore_ip
delete amdgpu_ras_late_resume function

Changed from V2:
check umc ras supported before put ecc_irq

Signed-off-by: Stanley.Yang 
---
 drivers/gpu/drm/amd/amdgpu/aldebaran.c | 28 +-  
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  4   
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |  5 +  
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 02f4c6f9d4f6..b60a3c1bd0f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct 
amdgpu_reset_control *reset_ctl,  {
struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
+   struct amdgpu_ras *con;
int r;

if (reset_device_list == NULL)
@@ -355,7 +356,32 @@ aldebaran_mode2_restore_hwcontext(struct 
amdgpu_reset_control *reset_ctl,
 */
amdgpu_register_gpu_instance(tmp_adev);

-   /* Resume RAS */
+   /* Resume RAS, ecc_irq */
+   con = amdgpu_ras_get_context(tmp_adev);
+   if (!amdgpu_sriov_vf(tmp_adev) && con) {
+   if (tmp_adev->sdma.ras &&
+   amdgpu_ras_is_supported(tmp_adev, 
AMDGPU_RAS_BLOCK__SDMA) &&
+   tmp_adev->sdma.ras->ras_block.ras_late_init) {
+   r = 
tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+   
&tmp_adev->sdma.ras->ras_block.ras_comm);
+   if (r) {
+   dev_err(tmp_adev->dev, "SDMA failed to 
execute ras_late_init! ret:%d\n", r);
+   goto end;
+   }
+   }
+
+   if (tmp_adev->gfx.ras &&
+   amdgpu_ras_is_supported(tmp_adev, 
AMDGPU_RAS_BLOCK__GFX) &&
+   tmp_adev->gfx.ras->ras_block.ras_late_init) {
+   r = 
tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+   
&tmp_adev->gfx.ras->ras_block.ras_comm);
+   if (r) {
+   dev_err(tmp_adev->dev, "GFX failed to 
execute ras_late_init! ret:%d\n", r);
+   goto end;
+   }
+   }
+   }
+
amdgpu_ras_resume(tmp_adev);

/* Update PSP FW topology after reset */ diff --git 
a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 09cbca596bb5..4048539205cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1043,6 +1043,10 @@ static int gmc_v10_0_hw_fini(void *handle)

amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);

+   if (adev->gmc.ecc_irq.funcs &&
+   amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+   amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
return 0;
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 416f3e4f0438..e1ca5a599971 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -941,6 +941,11 @@ static int gmc_v11_0_hw_fini(void *handle)
}

amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+   if (adev->gmc.ecc_irq.funcs &&
+   amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+   amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
gmc_v11_0_gart_disable(adev);

return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 205db28a9803..f00e5c8c79b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amd

[PATCH v2 0/1] Implementation of resource_query_layout

2023-12-21 Thread Julia Zhang
Hi all,

Sorry to late reply. This is v2 of the implementation of
resource_query_layout. This adds a new ioctl to let guest query information
of host resource, which is originally from Daniel Stone. We add some
changes to support query the correct stride of host resource before it's
created, which is to support to blit data from dGPU to virtio iGPU for dGPU
prime feature. 

Changes from v1 to v2:
-Squash two patches to a single patch. 
-A small modification of VIRTIO_GPU_F_RESOURCE_QUERY_LAYOUT


Below is description of v1:
This add implementation of resource_query_layout to get the information of
how the host has actually allocated the buffer. This function is now used
to query the stride for guest linear resource for dGPU prime on guest VMs.

v1 of kernel side:
 https:
//lore.kernel.org/xen-devel/20231110074027.24862-1-julia.zh...@amd.com/T/#t

v1 of qemu side:
https:
//lore.kernel.org/qemu-devel/20231110074027.24862-1-julia.zh...@amd.com/T/#t

Daniel Stone (1):
  drm/virtio: Implement RESOURCE_GET_LAYOUT ioctl

 drivers/gpu/drm/virtio/virtgpu_drv.c   |  1 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 22 -
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 66 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  8 +++-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 63 
 include/uapi/drm/virtgpu_drm.h | 21 
 include/uapi/linux/virtio_gpu.h| 30 
 7 files changed, 208 insertions(+), 3 deletions(-)

-- 
2.34.1



[PATCH 1/1] drm/virtio: Implement RESOURCE_GET_LAYOUT ioctl

2023-12-21 Thread Julia Zhang
From: Daniel Stone 

Add a new ioctl to allow the guest VM to discover how the guest
actually allocated the underlying buffer, which allows buffers to
be used for GL<->Vulkan interop and through standard window systems.
It's also a step towards properly supporting modifiers in the guest.

Signed-off-by: Daniel Stone 
Co-developed-by: Julia Zhang  # support query
stride before it's created
Signed-off-by: Julia Zhang 
---
 drivers/gpu/drm/virtio/virtgpu_drv.c   |  1 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 22 -
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 66 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  8 +++-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 63 
 include/uapi/drm/virtgpu_drm.h | 21 
 include/uapi/linux/virtio_gpu.h| 30 
 7 files changed, 208 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c 
b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 4334c7608408..98061b714b98 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -148,6 +148,7 @@ static unsigned int features[] = {
VIRTIO_GPU_F_RESOURCE_UUID,
VIRTIO_GPU_F_RESOURCE_BLOB,
VIRTIO_GPU_F_CONTEXT_INIT,
+   VIRTIO_GPU_F_RESOURCE_QUERY_LAYOUT,
 };
 static struct virtio_driver virtio_gpu_driver = {
.feature_table = features,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 96365a772f77..bb5edcfeda54 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -214,6 +214,16 @@ struct virtio_gpu_drv_cap_cache {
atomic_t is_valid;
 };
 
+struct virtio_gpu_query_info {
+   uint32_t num_planes;
+   uint64_t modifier;
+   struct {
+   uint64_t offset;
+   uint32_t stride;
+   } planes[VIRTIO_GPU_MAX_RESOURCE_PLANES];
+   atomic_t is_valid;
+};
+
 struct virtio_gpu_device {
struct drm_device *ddev;
 
@@ -246,6 +256,7 @@ struct virtio_gpu_device {
bool has_resource_blob;
bool has_host_visible;
bool has_context_init;
+   bool has_resource_query_layout;
struct virtio_shm_region host_visible_region;
struct drm_mm host_visible_mm;
 
@@ -277,7 +288,7 @@ struct virtio_gpu_fpriv {
 };
 
 /* virtgpu_ioctl.c */
-#define DRM_VIRTIO_NUM_IOCTLS 12
+#define DRM_VIRTIO_NUM_IOCTLS 13
 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
 void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file);
 
@@ -420,6 +431,15 @@ virtio_gpu_cmd_set_scanout_blob(struct virtio_gpu_device 
*vgdev,
uint32_t width, uint32_t height,
uint32_t x, uint32_t y);
 
+int
+virtio_gpu_cmd_get_resource_layout(struct virtio_gpu_device *vgdev,
+  struct virtio_gpu_query_info *bo_info,
+  uint32_t width,
+  uint32_t height,
+  uint32_t format,
+  uint32_t bind,
+  uint32_t hw_res_handle);
+
 /* virtgpu_display.c */
 int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c 
b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index b24b11f25197..216c04314177 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -107,6 +107,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device 
*dev, void *data,
case VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs:
value = vgdev->capset_id_mask;
break;
+   case VIRTGPU_PARAM_RESOURCE_QUERY_LAYOUT:
+   value = vgdev->has_resource_query_layout ? 1 : 0;
+   break;
default:
return -EINVAL;
}
@@ -668,6 +671,65 @@ static int virtio_gpu_context_init_ioctl(struct drm_device 
*dev,
return ret;
 }
 
+static int virtio_gpu_resource_query_layout_ioctl(struct drm_device *dev,
+ void *data,
+ struct drm_file *file)
+{
+   struct drm_virtgpu_resource_query_layout *args = data;
+   struct virtio_gpu_device *vgdev = dev->dev_private;
+   struct drm_gem_object *obj = NULL;
+   struct virtio_gpu_object *bo = NULL;
+   struct virtio_gpu_query_info bo_info = {0};
+   int ret = 0;
+   int i;
+
+   if (!vgdev->has_resource_query_layout) {
+   DRM_ERROR("failing: no RQL on host\n");
+   return -EINVAL;
+   }
+
+   if (args->handle > 0) {
+   obj = drm_gem_object_lookup(file, args->handle);
+   if (obj == NULL) {
+   DRM_ERROR("invalid handle 0x%x\n", args->handle);
+   

[PATCH v2] drm/amdgpu/gfx11: need acquire mutex before access CP_VMID_RESET v2

2023-12-21 Thread Jack Xiao
It's required to take the gfx mutex before access to CP_VMID_RESET,
for there is a race condition with CP firmware to write the register.

v2: add extra code to ensure the mutex releasing is successful.

Signed-off-by: Jack Xiao 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 48 +-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index bdcf96df69e6..44f5b3135931 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4474,11 +4474,43 @@ static int gfx_v11_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
 }
 
+static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+int req)
+{
+   u32 i, tmp, val;
+
+   for (i = 0; i < adev->usec_timeout; i++) {
+   /* Request with MeId=2, PipeId=0 */
+   tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+   tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+   WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+   val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+   if (req) {
+   if (val == tmp)
+   break;
+   } else {
+   tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+   REQUEST, 1);
+
+   /* unlocked or locked by firmware */
+   if (val != tmp)
+   break;
+   }
+   udelay(1);
+   }
+
+   if (i >= adev->usec_timeout)
+   return -EINVAL;
+
+   return 0;
+}
+
 static int gfx_v11_0_soft_reset(void *handle)
 {
u32 grbm_soft_reset = 0;
u32 tmp;
-   int i, j, k;
+   int r, i, j, k;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
@@ -4518,6 +4550,13 @@ static int gfx_v11_0_soft_reset(void *handle)
}
}
 
+   /* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+   r = gfx_v11_0_request_gfx_index_mutex(adev, 1);
+   if (r) {
+   printk("Failed to acquire the gfx mutex during soft reset\n");
+   return r;
+   }
+
WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffe);
 
// Read CP_VMID_RESET register three times.
@@ -4526,6 +4565,13 @@ static int gfx_v11_0_soft_reset(void *handle)
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
 
+   /* release the gfx mutex */
+   r = gfx_v11_0_request_gfx_index_mutex(adev, 0);
+   if (r) {
+   printk("Failed to release the gfx mutex during soft reset\n");
+   return r;
+   }
+
for (i = 0; i < adev->usec_timeout; i++) {
if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
!RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
-- 
2.41.0



Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2023-12-21 Thread Arunpravin Paneer Selvam

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.


I was not involved, but it looks like we have also tried enabling the 
clear-on-free idea for VRAM in i915 and then also tracking that in the 
allocator, however that work unfortunately is not upstream. The code 
is open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300


It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
me). And also the inclusion of a de-fragmentation routine, since buddy 
blocks are now not always merged back, we might choose to run the 
defrag in some cases, which also sounds reasonable. IIRC in amdgpu 
userspace can control the page-size for an allocation, so perhaps you 
would want to run it first if the allocation fails, before trying to 
evict stuff?

Thanks, I will check the code.

Regards,
Arun.




v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, &vres->blocks);
+    drm_buddy_free_list(mm, &vres->blocks, 0);
  mutex_unlock(&mgr->lock);
  error_fini:
  ttm_resource_fini(man, &vres->base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, &vres->blocks);
+    drm_buddy_free_list(mm, &vres->blocks, 0);
  mutex_unlock(&mgr->lock);
    atomic64_sub(vis_usage, &mgr->vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
blocks) {

-    drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+    drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(&block->link, node->link.prev, &node->link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+    block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
  mark_free(mm, block->left);
  mark_free(mm, block->right);
  +    if (drm_buddy_block_is_clear(block)) {
+    mark_cleared(block->left);
+    mark_cleared(block->right);
+    clear_reset(block);
+    }
+
  mark_split(block);
    return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  +    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
+
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+
  list_del(&buddy->li

[PATCH] drm/amdgpu: Release 'adev->pm.fw' before return in 'amdgpu_device_need_post()'

2023-12-21 Thread Srinivasan Shanmugam
In function 'amdgpu_device_need_post(struct amdgpu_device *adev)' -
'adev->pm.fw' may not be released before return.

Using the function release_firmware() to release adev->pm.fw.

Thus fixing the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1571 amdgpu_device_need_post() warn: 
'adev->pm.fw' from request_firmware() not released on lines: 1554.

Cc: Monk Liu 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4b694696930e..669e6261b707 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1544,6 +1544,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
 
fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+
+   release_firmware(adev->pm.fw);
+
if (fw_ver < 0x00160e00)
return true;
}
-- 
2.34.1



Re: [PATCH] drm/amdkfd: Use logical AND operator (&&) instead of bitwise AND (&) with a bool operand

2023-12-21 Thread Lazar, Lijo

On 12/21/2023 8:12 AM, Srinivasan Shanmugam wrote:

Doing a bitwise AND between a bool and an int is generally not a good
idea.  The bool will be promoted to an int with value 0 or 1, the int is
generally regarded as true with a non-zero value, thus ANDing them using
bitwise has the potential to yield an undesired result.

Thus fixes the below:

drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_packet_manager_v9.c:117 pm_map_process_aldebaran() 
warn: maybe use && instead of &

Cc: Felix Kuehling 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 8ee2bedd301a..da83deee45a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -114,7 +114,7 @@ static int pm_map_process_aldebaran(struct packet_manager 
*pm,
packet->tcp_watch_cntl[i] = pdd->watch_points[i];
  
  		packet->bitfields2.single_memops =

-   !!(pdd->process->dbg_flags & 
KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP);
+   !!(pdd->process->dbg_flags && 
KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP);


Logical AND of bool with a known constant doesn't make sense. dbg_flags 
looks to be defined in the wrong way; to process multiple debug flag 
options, better to define as u32.


Thanks,
Lijo


}
  
  	packet->sh_mem_config = qpd->sh_mem_config;





Re: [PATCH] drm/amdgpu: Release 'adev->pm.fw' before return in 'amdgpu_device_need_post()'

2023-12-21 Thread SRINIVASAN SHANMUGAM

Sorry, there seems to be a problem in the logic, will resend v2 onto this.

On 12/21/2023 6:15 PM, Srinivasan Shanmugam wrote:

In function 'amdgpu_device_need_post(struct amdgpu_device *adev)' -
'adev->pm.fw' may not be released before return.

Using the function release_firmware() to release adev->pm.fw.

Thus fixing the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1571 amdgpu_device_need_post() warn: 
'adev->pm.fw' from request_firmware() not released on lines: 1554.

Cc: Monk Liu 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4b694696930e..669e6261b707 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1544,6 +1544,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
  
  			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);

+
+   release_firmware(adev->pm.fw);
+
if (fw_ver < 0x00160e00)
return true;
}


RE: [PATCH v2] drm/amdgpu/gfx11: need acquire mutex before access CP_VMID_RESET v2

2023-12-21 Thread Zhang, Hawking
[AMD Official Use Only - General]

Better to use dev_err or DRM_ERROR. Apart from that, the patch is

Reviewed-by: Hawking Zhang 

Regards,
Hawking

-Original Message-
From: Xiao, Jack 
Sent: Thursday, December 21, 2023 18:29
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Zhang, Hawking 
Cc: Xiao, Jack 
Subject: [PATCH v2] drm/amdgpu/gfx11: need acquire mutex before access 
CP_VMID_RESET v2

It's required to take the gfx mutex before access to CP_VMID_RESET, for there 
is a race condition with CP firmware to write the register.

v2: add extra code to ensure the mutex releasing is successful.

Signed-off-by: Jack Xiao 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 48 +-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index bdcf96df69e6..44f5b3135931 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4474,11 +4474,43 @@ static int gfx_v11_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
 }

+static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+int req)
+{
+   u32 i, tmp, val;
+
+   for (i = 0; i < adev->usec_timeout; i++) {
+   /* Request with MeId=2, PipeId=0 */
+   tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+   tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+   WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+   val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+   if (req) {
+   if (val == tmp)
+   break;
+   } else {
+   tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+   REQUEST, 1);
+
+   /* unlocked or locked by firmware */
+   if (val != tmp)
+   break;
+   }
+   udelay(1);
+   }
+
+   if (i >= adev->usec_timeout)
+   return -EINVAL;
+
+   return 0;
+}
+
 static int gfx_v11_0_soft_reset(void *handle)  {
u32 grbm_soft_reset = 0;
u32 tmp;
-   int i, j, k;
+   int r, i, j, k;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;

tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); @@ -4518,6 +4550,13 @@ 
static int gfx_v11_0_soft_reset(void *handle)
}
}

+   /* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+   r = gfx_v11_0_request_gfx_index_mutex(adev, 1);
+   if (r) {
+   printk("Failed to acquire the gfx mutex during soft reset\n");
+   return r;
+   }
+
WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffe);

// Read CP_VMID_RESET register three times.
@@ -4526,6 +4565,13 @@ static int gfx_v11_0_soft_reset(void *handle)
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
RREG32_SOC15(GC, 0, regCP_VMID_RESET);

+   /* release the gfx mutex */
+   r = gfx_v11_0_request_gfx_index_mutex(adev, 0);
+   if (r) {
+   printk("Failed to release the gfx mutex during soft reset\n");
+   return r;
+   }
+
for (i = 0; i < adev->usec_timeout; i++) {
if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
!RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
--
2.41.0



[PATCH v2] drm/amdgpu: Release 'adev->pm.fw' before return in 'amdgpu_device_need_post()'

2023-12-21 Thread Srinivasan Shanmugam
In function 'amdgpu_device_need_post(struct amdgpu_device *adev)' -
'adev->pm.fw' may not be released before return.

Using the function release_firmware() to release adev->pm.fw.

Thus fixing the below:
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1571 amdgpu_device_need_post() warn: 
'adev->pm.fw' from request_firmware() not released on lines: 1554.

Cc: Monk Liu 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4b694696930e..f2c3d7ebb218 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1541,11 +1541,11 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
err = request_firmware(&adev->pm.fw, 
"amdgpu/fiji_smc.bin", adev->dev);
/* force vPost if error occured */
if (err)
-   return true;
+   goto fw_release;
 
fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
if (fw_ver < 0x00160e00)
-   return true;
+   goto fw_release;
}
}
 
@@ -1568,6 +1568,8 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
if ((reg != 0) && (reg != 0x))
return false;
 
+fw_release:
+   release_firmware(adev->pm.fw);
return true;
 }
 
-- 
2.34.1



[PATCH v2] drm/amd/display: Address function parameter 'context' not described in 'dc_state_rem_all_planes_for_stream' & 'populate_subvp_cmd_drr_info'

2023-12-21 Thread Srinivasan Shanmugam
Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Function 
parameter or member 'state' not described in 
'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Excess 
function parameter 'context' description in 'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:540: warning: Function 
parameter or member 'context' not described in 'populate_subvp_cmd_drr_info'

Cc: Dillon Varone 
Cc: Jun Lei 
Cc: Hamza Mahfooz 
Cc: Aurabindo Pillai 
Cc: Rodrigo Siqueira 
Cc: Alex Deucher 
Cc: Srinath Rao 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/display/dc/core/dc_state.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index dd52cab7ecdf..460a8010c79f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -511,7 +511,7 @@ bool dc_state_remove_plane(
  *
  * @dc: Current dc state.
  * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
+ * @state: context from which the planes are to be removed.
  *
  * Return:
  * Return true if DC was able to remove all planes from the target
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 1d315f7cdce3..a59b982e99bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -519,10 +519,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc 
*dc, struct pipe_ctx *pi
 /**
  * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the 
DMCUB subvp command
  *
- * @dc: [in] current dc state
+ * @dc: [in] pointer to dc object
  * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
  * @vblank_pipe: [in] pipe_ctx for the DRR pipe
  * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
  *
  * Populate the DMCUB SubVP command with DRR pipe info. All the information
  * required for calculating the SubVP + DRR microschedule is populated here.
-- 
2.34.1



Re: [PATCH v2] drm/amd/display: Address function parameter 'context' not described in 'dc_state_rem_all_planes_for_stream' & 'populate_subvp_cmd_drr_info'

2023-12-21 Thread Rodrigo Siqueira Jordao




On 12/21/23 09:28, Srinivasan Shanmugam wrote:

Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Function 
parameter or member 'state' not described in 
'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Excess 
function parameter 'context' description in 'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:540: warning: Function 
parameter or member 'context' not described in 'populate_subvp_cmd_drr_info'

Cc: Dillon Varone 
Cc: Jun Lei 
Cc: Hamza Mahfooz 
Cc: Aurabindo Pillai 
Cc: Rodrigo Siqueira 
Cc: Alex Deucher 
Cc: Srinath Rao 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/display/dc/core/dc_state.c | 2 +-
  drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c   | 3 ++-
  2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index dd52cab7ecdf..460a8010c79f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -511,7 +511,7 @@ bool dc_state_remove_plane(
   *
   * @dc: Current dc state.
   * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
+ * @state: context from which the planes are to be removed.
   *
   * Return:
   * Return true if DC was able to remove all planes from the target
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 1d315f7cdce3..a59b982e99bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -519,10 +519,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc 
*dc, struct pipe_ctx *pi
  /**
   * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the 
DMCUB subvp command
   *
- * @dc: [in] current dc state
+ * @dc: [in] pointer to dc object
   * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
   * @vblank_pipe: [in] pipe_ctx for the DRR pipe
   * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
   *
   * Populate the DMCUB SubVP command with DRR pipe info. All the information
   * required for calculating the SubVP + DRR microschedule is populated here.


Reviewed-by: Rodrigo Siqueira 



Re: [PATCH] drm/amd/display: Adjust kdoc for 'dcn35_hw_block_power_down' & 'dcn35_hw_block_power_up'

2023-12-21 Thread Rodrigo Siqueira Jordao




On 12/19/23 00:37, Srinivasan Shanmugam wrote:

Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn35/dcn35_hwseq.c:1124: 
warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst

Cc: Charlene Liu 
Cc: Muhammad Ahmed 
Cc: Hamza Mahfooz 
Cc: Rodrigo Siqueira 
Cc: Aurabindo Pillai 
Cc: Alex Deucher 
Cc: Srinath Rao 
Signed-off-by: Srinivasan Shanmugam 
---
  .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 66 +++
  1 file changed, 39 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index ad710b4036de..782c26faf276 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1120,21 +1120,27 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct 
dc_state *context,
update_state->pg_res_update[PG_HPO] = true;
  
  }

+
  /**
-* power down sequence
-* ONO Region 3, DCPG 25: hpo - SKIPPED
-* ONO Region 4, DCPG 0: dchubp0, dpp0
-* ONO Region 6, DCPG 1: dchubp1, dpp1
-* ONO Region 8, DCPG 2: dchubp2, dpp2
-* ONO Region 10, DCPG 3: dchubp3, dpp3
-* ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW 
will pwr dwn at IPS2 entry
-* ONO Region 5, DCPG 16: dsc0
-* ONO Region 7, DCPG 17: dsc1
-* ONO Region 9, DCPG 18: dsc2
-* ONO Region 11, DCPG 19: dsc3
-* ONO Region 2, DCPG 24: mpc opp optc dwb
-* ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn 
after lono timer is armed
-*/
+ * dcn35_hw_block_power_down() - power down sequence


Maybe add a simple explanation phrase like this:

  The following sequence describes the ON-OFF (ONO) sequence:

Also, could you create another patch that adds ONO in our display 
glossary at `Documentation/gpu/amdgpu/display/dc-glossary.rst`?



+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will 
pwr dwn at IPS2 entry
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after 
lono timer is armed
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ *
+ * Return: void.


I don't think you need to document return void.

Thanks
Siqueira


+ */
  void dcn35_hw_block_power_down(struct dc *dc,
struct pg_block_update *update_state)
  {
@@ -1172,20 +1178,26 @@ void dcn35_hw_block_power_down(struct dc *dc,
//domain22, 23, 25 currently always on.
  
  }

+
  /**
-* power up sequence
-* ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
-* ONO Region 2, DCPG 24: mpc opp optc dwb
-* ONO Region 5, DCPG 16: dsc0
-* ONO Region 7, DCPG 17: dsc1
-* ONO Region 9, DCPG 18: dsc2
-* ONO Region 11, DCPG 19: dsc3
-* ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW 
will power up at IPS2 exit
-* ONO Region 4, DCPG 0: dchubp0, dpp0
-* ONO Region 6, DCPG 1: dchubp1, dpp1
-* ONO Region 8, DCPG 2: dchubp2, dpp2
-* ONO Region 10, DCPG 3: dchubp3, dpp3
-* ONO Region 3, DCPG 25: hpo - SKIPPED
+ * dcn35_hw_block_power_up - power up sequence
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will 
power up at IPS2 exit
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ *
+ * Return: void.
   */
  void dcn35_hw_block_power_up(struct dc *dc,
struct pg_block_update *update_state)




Re: [PATCH v2] drm/amd/display: Address function parameter 'context' not described in 'dc_state_rem_all_planes_for_stream' & 'populate_subvp_cmd_drr_info'

2023-12-21 Thread Aurabindo Pillai




On 2023-12-21 11:28, Srinivasan Shanmugam wrote:

Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Function 
parameter or member 'state' not described in 
'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Excess 
function parameter 'context' description in 'dc_state_rem_all_planes_for_stream'
drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:540: warning: Function 
parameter or member 'context' not described in 'populate_subvp_cmd_drr_info'

Cc: Dillon Varone 
Cc: Jun Lei 
Cc: Hamza Mahfooz 
Cc: Aurabindo Pillai 
Cc: Rodrigo Siqueira 
Cc: Alex Deucher 
Cc: Srinath Rao 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/display/dc/core/dc_state.c | 2 +-
  drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c   | 3 ++-
  2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index dd52cab7ecdf..460a8010c79f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -511,7 +511,7 @@ bool dc_state_remove_plane(
   *
   * @dc: Current dc state.
   * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
+ * @state: context from which the planes are to be removed.
   *
   * Return:
   * Return true if DC was able to remove all planes from the target
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 1d315f7cdce3..a59b982e99bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -519,10 +519,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc 
*dc, struct pipe_ctx *pi
  /**
   * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the 
DMCUB subvp command
   *
- * @dc: [in] current dc state
+ * @dc: [in] pointer to dc object
   * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
   * @vblank_pipe: [in] pipe_ctx for the DRR pipe
   * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
   *
   * Populate the DMCUB SubVP command with DRR pipe info. All the information
   * required for calculating the SubVP + DRR microschedule is populated here.


Reviewed-by: Aurabindo Pillai 


[PATCH v2] drm/amd/display: Adjust kdoc for 'dcn35_hw_block_power_down' & 'dcn35_hw_block_power_up'

2023-12-21 Thread Srinivasan Shanmugam
Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn35/dcn35_hwseq.c:1124: 
warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst

Cc: Charlene Liu 
Cc: Muhammad Ahmed 
Cc: Hamza Mahfooz 
Cc: Rodrigo Siqueira 
Cc: Aurabindo Pillai 
Cc: Alex Deucher 
Cc: Srinath Rao 
Signed-off-by: Srinivasan Shanmugam 
---

v2:
 - Added explaination for power down & power up sequence (Rodrigo)
 - Removed documenting return void. (Rodrigo)
 
 .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 68 +++
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index ad710b4036de..1cb61c46d911 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1120,21 +1120,28 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct 
dc_state *context,
update_state->pg_res_update[PG_HPO] = true;
 
 }
+
 /**
-* power down sequence
-* ONO Region 3, DCPG 25: hpo - SKIPPED
-* ONO Region 4, DCPG 0: dchubp0, dpp0
-* ONO Region 6, DCPG 1: dchubp1, dpp1
-* ONO Region 8, DCPG 2: dchubp2, dpp2
-* ONO Region 10, DCPG 3: dchubp3, dpp3
-* ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW 
will pwr dwn at IPS2 entry
-* ONO Region 5, DCPG 16: dsc0
-* ONO Region 7, DCPG 17: dsc1
-* ONO Region 9, DCPG 18: dsc2
-* ONO Region 11, DCPG 19: dsc3
-* ONO Region 2, DCPG 24: mpc opp optc dwb
-* ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn 
after lono timer is armed
-*/
+ * dcn35_hw_block_power_down() - power down sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power down:
+ *
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will 
pwr dwn at IPS2 entry
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after 
lono timer is armed
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
 void dcn35_hw_block_power_down(struct dc *dc,
struct pg_block_update *update_state)
 {
@@ -1172,20 +1179,27 @@ void dcn35_hw_block_power_down(struct dc *dc,
//domain22, 23, 25 currently always on.
 
 }
+
 /**
-* power up sequence
-* ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
-* ONO Region 2, DCPG 24: mpc opp optc dwb
-* ONO Region 5, DCPG 16: dsc0
-* ONO Region 7, DCPG 17: dsc1
-* ONO Region 9, DCPG 18: dsc2
-* ONO Region 11, DCPG 19: dsc3
-* ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW 
will power up at IPS2 exit
-* ONO Region 4, DCPG 0: dchubp0, dpp0
-* ONO Region 6, DCPG 1: dchubp1, dpp1
-* ONO Region 8, DCPG 2: dchubp2, dpp2
-* ONO Region 10, DCPG 3: dchubp3, dpp3
-* ONO Region 3, DCPG 25: hpo - SKIPPED
+ * dcn35_hw_block_power_up() - power up sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power up:
+ *
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will 
power up at IPS2 exit
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
  */
 void dcn35_hw_block_power_up(struct dc *dc,
struct pg_block_update *update_state)
-- 
2.34.1



Re: [PATCH v2] drm/amd/display: Adjust kdoc for 'dcn35_hw_block_power_down' & 'dcn35_hw_block_power_up'

2023-12-21 Thread Rodrigo Siqueira Jordao




On 12/21/23 10:13, Srinivasan Shanmugam wrote:

Fixes the following gcc with W=1:

drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn35/dcn35_hwseq.c:1124: 
warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer 
Documentation/doc-guide/kernel-doc.rst

Cc: Charlene Liu 
Cc: Muhammad Ahmed 
Cc: Hamza Mahfooz 
Cc: Rodrigo Siqueira 
Cc: Aurabindo Pillai 
Cc: Alex Deucher 
Cc: Srinath Rao 
Signed-off-by: Srinivasan Shanmugam 
---

v2:
  - Added explaination for power down & power up sequence (Rodrigo)
  - Removed documenting return void. (Rodrigo)
  
  .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c   | 68 +++

  1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c 
b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index ad710b4036de..1cb61c46d911 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -1120,21 +1120,28 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct 
dc_state *context,
update_state->pg_res_update[PG_HPO] = true;
  
  }

+
  /**
-* power down sequence
-* ONO Region 3, DCPG 25: hpo - SKIPPED
-* ONO Region 4, DCPG 0: dchubp0, dpp0
-* ONO Region 6, DCPG 1: dchubp1, dpp1
-* ONO Region 8, DCPG 2: dchubp2, dpp2
-* ONO Region 10, DCPG 3: dchubp3, dpp3
-* ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW 
will pwr dwn at IPS2 entry
-* ONO Region 5, DCPG 16: dsc0
-* ONO Region 7, DCPG 17: dsc1
-* ONO Region 9, DCPG 18: dsc2
-* ONO Region 11, DCPG 19: dsc3
-* ONO Region 2, DCPG 24: mpc opp optc dwb
-* ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn 
after lono timer is armed
-*/
+ * dcn35_hw_block_power_down() - power down sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power down:
+ *
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will 
pwr dwn at IPS2 entry
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after 
lono timer is armed
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
  void dcn35_hw_block_power_down(struct dc *dc,
struct pg_block_update *update_state)
  {
@@ -1172,20 +1179,27 @@ void dcn35_hw_block_power_down(struct dc *dc,
//domain22, 23, 25 currently always on.
  
  }

+
  /**
-* power up sequence
-* ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
-* ONO Region 2, DCPG 24: mpc opp optc dwb
-* ONO Region 5, DCPG 16: dsc0
-* ONO Region 7, DCPG 17: dsc1
-* ONO Region 9, DCPG 18: dsc2
-* ONO Region 11, DCPG 19: dsc3
-* ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW 
will power up at IPS2 exit
-* ONO Region 4, DCPG 0: dchubp0, dpp0
-* ONO Region 6, DCPG 1: dchubp1, dpp1
-* ONO Region 8, DCPG 2: dchubp2, dpp2
-* ONO Region 10, DCPG 3: dchubp3, dpp3
-* ONO Region 3, DCPG 25: hpo - SKIPPED
+ * dcn35_hw_block_power_up() - power up sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power up:
+ *
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will 
power up at IPS2 exit
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
   */
  void dcn35_hw_block_power_up(struct dc *dc,
struct pg_block_update *update_state)


Reviewed-by: Rodrigo Siqueira 


[PATCH] drm/amdkfd: Fix type of 'dbg_flags' in 'struct kfd_process'

2023-12-21 Thread Srinivasan Shanmugam
dbg_flags looks to be defined with incorrect data type; to process
multiple debug flag options, and hence defined dbg_flags as u32.

Fixes the below:

drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_packet_manager_v9.c:117 
pm_map_process_aldebaran() warn: maybe use && instead of &

Suggested-by: Lijo Lazar 
Cc: Felix Kuehling 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 45366b4ca976..745024b31340 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -970,7 +970,7 @@ struct kfd_process {
struct work_struct debug_event_workarea;
 
/* Tracks debug per-vmid request for debug flags */
-   bool dbg_flags;
+   u32 dbg_flags;
 
atomic_t poison;
/* Queues are in paused stated because we are in the process of doing a 
CRIU checkpoint */
-- 
2.34.1



Re: [PATCH] drm/amdkfd: Fix type of 'dbg_flags' in 'struct kfd_process'

2023-12-21 Thread Felix Kuehling

On 2023-12-21 12:39, Srinivasan Shanmugam wrote:

dbg_flags looks to be defined with incorrect data type; to process
multiple debug flag options, and hence defined dbg_flags as u32.

Fixes the below:

drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_packet_manager_v9.c:117 pm_map_process_aldebaran() 
warn: maybe use && instead of &


Please add a Fixes-tag:

Fixes: 0de4ec9a0353 ("drm/amdgpu: prepare map process for multi-process 
debug devices")





Suggested-by: Lijo Lazar 
Cc: Felix Kuehling 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 45366b4ca976..745024b31340 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -970,7 +970,7 @@ struct kfd_process {
struct work_struct debug_event_workarea;
  
  	/* Tracks debug per-vmid request for debug flags */

-   bool dbg_flags;
+   u32 dbg_flags;


For consistency with the rest of this header file, I'd prefer we use 
uint32_t here. Other than that, the patch is


Reviewed-by: Felix Kuehling 


  
  	atomic_t poison;

/* Queues are in paused stated because we are in the process of doing a 
CRIU checkpoint */


[PATCH 1/2] drm/amd/display: add nv12 bounding box

2023-12-21 Thread Alex Deucher
This was included in gpu_info firmware, move it into the
driver for consistency with other nv1x parts.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318
Signed-off-by: Alex Deucher 
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 110 +-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 1ec8ca1fdb4f..38ab9ad60ef8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -441,7 +441,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
.use_urgent_burst_bw = 0
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = {
+   .clock_limits = {
+   {
+   .state = 0,
+   .dcfclk_mhz = 560.0,
+   .fabricclk_mhz = 560.0,
+   .dispclk_mhz = 513.0,
+   .dppclk_mhz = 513.0,
+   .phyclk_mhz = 540.0,
+   .socclk_mhz = 560.0,
+   .dscclk_mhz = 171.0,
+   .dram_speed_mts = 1069.0,
+   },
+   {
+   .state = 1,
+   .dcfclk_mhz = 694.0,
+   .fabricclk_mhz = 694.0,
+   .dispclk_mhz = 642.0,
+   .dppclk_mhz = 642.0,
+   .phyclk_mhz = 600.0,
+   .socclk_mhz = 694.0,
+   .dscclk_mhz = 214.0,
+   .dram_speed_mts = 1324.0,
+   },
+   {
+   .state = 2,
+   .dcfclk_mhz = 875.0,
+   .fabricclk_mhz = 875.0,
+   .dispclk_mhz = 734.0,
+   .dppclk_mhz = 734.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 875.0,
+   .dscclk_mhz = 245.0,
+   .dram_speed_mts = 1670.0,
+   },
+   {
+   .state = 3,
+   .dcfclk_mhz = 1000.0,
+   .fabricclk_mhz = 1000.0,
+   .dispclk_mhz = 1100.0,
+   .dppclk_mhz = 1100.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 1000.0,
+   .dscclk_mhz = 367.0,
+   .dram_speed_mts = 2000.0,
+   },
+   {
+   .state = 4,
+   .dcfclk_mhz = 1200.0,
+   .fabricclk_mhz = 1200.0,
+   .dispclk_mhz = 1284.0,
+   .dppclk_mhz = 1284.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 1200.0,
+   .dscclk_mhz = 428.0,
+   .dram_speed_mts = 2000.0,
+   },
+   {
+   .state = 5,
+   .dcfclk_mhz = 1200.0,
+   .fabricclk_mhz = 1200.0,
+   .dispclk_mhz = 1284.0,
+   .dppclk_mhz = 1284.0,
+   .phyclk_mhz = 810.0,
+   .socclk_mhz = 1200.0,
+   .dscclk_mhz = 428.0,
+   .dram_speed_mts = 2000.0,
+   },
+   },
+
+   .num_states = 5,
+   .sr_exit_time_us = 1.9,
+   .sr_enter_plus_exit_time_us = 4.4,
+   .urgent_latency_us = 3.0,
+   .urgent_latency_pixel_data_only_us = 4.0,
+   .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+   .urgent_latency_vm_data_only_us = 4.0,
+   .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+   .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+   .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+   .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+   .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+   .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+   .max_avg_sdp_bw_use_normal_percent = 40.0,
+   .max_avg_dram_bw_use_normal_percent = 40.0,
+   .writeback_latency_us = 12.0,
+   .ideal_dram_bw_after_urgent_percent = 40.0,
+   .max_request_size_bytes = 256,
+   .dram_channel_width_bytes = 16,
+   .fabric_datapath_to_dcn_data_return_bytes = 64,
+   .dcn_downspread_percent = 0.5,
+   .downspread_percent = 0.5,
+   .dram_page_open_time_ns = 50.0,
+   .dram_rw_turnaround_time_ns = 17.5,
+   .dram_return_buffer_per_channel_bytes = 8192,
+   .round_trip_ping_latency_dcfclk_cycles = 131,
+   .urgent_out_of_order_return_per_channel_bytes = 4096,
+   .channel_interleave_bytes = 256,
+   .num_banks = 8,
+   .num_chans = 16,
+   .vmm_pag

[PATCH 2/2] drm/amdgpu: skip gpu_info fw loading on navi12

2023-12-21 Thread Alex Deucher
It's no longer required.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9c1ff893c03c..71e8fe2144b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2251,15 +2251,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct 
amdgpu_device *adev)
 
adev->firmware.gpu_info_fw = NULL;
 
-   if (adev->mman.discovery_bin) {
-   /*
-* FIXME: The bounding box is still needed by Navi12, so
-* temporarily read it from gpu_info firmware. Should be dropped
-* when DAL no longer needs it.
-*/
-   if (adev->asic_type != CHIP_NAVI12)
-   return 0;
-   }
+   if (adev->mman.discovery_bin)
+   return 0;
 
switch (adev->asic_type) {
default:
-- 
2.42.0



Re: [PATCH] drm/amdkfd: Drop redundant NULL pointer check in kfd_topology.c

2023-12-21 Thread Felix Kuehling

On 2023-12-20 21:00, Srinivasan Shanmugam wrote:

The list has the head node, the NULL pointer check is therefore
superfluous, Hence removed it.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1347 
kfd_create_indirect_link_prop() warn: can 'gpu_link' even be NULL?
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1428 kfd_add_peer_prop() 
warn: can 'iolink1' even be NULL?
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1433 kfd_add_peer_prop() 
warn: can 'iolink2' even be NULL?

Cc: Felix Kuehling 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 --
  1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index dc7c8312e8c7..141a8b3273c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1344,8 +1344,6 @@ static int kfd_create_indirect_link_prop(struct 
kfd_topology_device *kdev, int g
  
  	gpu_link = list_first_entry(&kdev->io_link_props,

struct kfd_iolink_properties, list);
-   if (!gpu_link)
-   return -ENOMEM;


I think the problem here is that list_first_entry expects the list to 
not be empty. So the correct solution would be to add a !list_empty 
check before using list_first_entry.


Regards,
  Felix


  
  	for (i = 0; i < num_cpu; i++) {

/* CPU <--> GPU */
@@ -1425,13 +1423,9 @@ static int kfd_add_peer_prop(struct kfd_topology_device 
*kdev,
  
  	iolink1 = list_first_entry(&kdev->io_link_props,

struct 
kfd_iolink_properties, list);
-   if (!iolink1)
-   return -ENOMEM;
  
  	iolink2 = list_first_entry(&peer->io_link_props,

struct 
kfd_iolink_properties, list);
-   if (!iolink2)
-   return -ENOMEM;
  
  	props = kfd_alloc_struct(props);

if (!props)


[PATCH] drm/amdkfd: Fix lock dependency warning

2023-12-21 Thread Felix Kuehling
==
WARNING: possible circular locking dependency detected
6.5.0-kfd-fkuehlin #276 Not tainted
--
kworker/8:2/2676 is trying to acquire lock:
9435aae95c88 ((work_completion)(&svm_bo->eviction_work)){+.+.}-{0:0}, at: 
__flush_work+0x52/0x550

but task is already holding lock:
9435cd8e1720 (&svms->lock){+.+.}-{3:3}, at: 
svm_range_deferred_list_work+0xe8/0x340 [amdgpu]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&svms->lock){+.+.}-{3:3}:
   __mutex_lock+0x97/0xd30
   kfd_ioctl_alloc_memory_of_gpu+0x6d/0x3c0 [amdgpu]
   kfd_ioctl+0x1b2/0x5d0 [amdgpu]
   __x64_sys_ioctl+0x86/0xc0
   do_syscall_64+0x39/0x80
   entry_SYSCALL_64_after_hwframe+0x63/0xcd

-> #1 (&mm->mmap_lock){}-{3:3}:
   down_read+0x42/0x160
   svm_range_evict_svm_bo_worker+0x8b/0x340 [amdgpu]
   process_one_work+0x27a/0x540
   worker_thread+0x53/0x3e0
   kthread+0xeb/0x120
   ret_from_fork+0x31/0x50
   ret_from_fork_asm+0x11/0x20

-> #0 ((work_completion)(&svm_bo->eviction_work)){+.+.}-{0:0}:
   __lock_acquire+0x1426/0x2200
   lock_acquire+0xc1/0x2b0
   __flush_work+0x80/0x550
   __cancel_work_timer+0x109/0x190
   svm_range_bo_release+0xdc/0x1c0 [amdgpu]
   svm_range_free+0x175/0x180 [amdgpu]
   svm_range_deferred_list_work+0x15d/0x340 [amdgpu]
   process_one_work+0x27a/0x540
   worker_thread+0x53/0x3e0
   kthread+0xeb/0x120
   ret_from_fork+0x31/0x50
   ret_from_fork_asm+0x11/0x20

other info that might help us debug this:

Chain exists of:
  (work_completion)(&svm_bo->eviction_work) --> &mm->mmap_lock --> &svms->lock

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(&svms->lock);
   lock(&mm->mmap_lock);
   lock(&svms->lock);
  lock((work_completion)(&svm_bo->eviction_work));

I believe this cannot really lead to a deadlock in practice, because
svm_range_evict_svm_bo_worker only takes the mmap_read_lock if the BO
refcount is non-0. That means it's impossible that svm_range_bo_release
is running concurrently. However, there is no good way to annotate this.

To avoid the problem, take a BO reference in
svm_range_schedule_evict_svm_bo instead of in the worker. That way it's
impossible for a BO to get freed while eviction work is pending and the
cancel_work_sync call in svm_range_bo_release can be eliminated.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index afd98aace065..7683c96f0cbd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -404,14 +404,9 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
-   if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
-   /* We're not in the eviction worker.
-* Signal the fence and synchronize with any
-* pending eviction work.
-*/
+   if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
+   /* We're not in the eviction worker. Signal the fence. */
dma_fence_signal(&svm_bo->eviction_fence->base);
-   cancel_work_sync(&svm_bo->eviction_work);
-   }
dma_fence_put(&svm_bo->eviction_fence->base);
amdgpu_bo_unref(&svm_bo->bo);
kfree(svm_bo);
@@ -3444,6 +3439,8 @@ int svm_range_schedule_evict_svm_bo(struct 
amdgpu_amdkfd_fence *fence)
return 0;
 
if (fence->svm_bo) {
+   /* Reference is dropped in svm_range_evict_svm_bo_worker */
+   kref_get(&fence->svm_bo->kref);
WRITE_ONCE(fence->svm_bo->evicting, 1);
schedule_work(&fence->svm_bo->eviction_work);
}
@@ -3458,8 +3455,6 @@ static void svm_range_evict_svm_bo_worker(struct 
work_struct *work)
int r = 0;
 
svm_bo = container_of(work, struct svm_range_bo, eviction_work);
-   if (!svm_bo_ref_unless_zero(svm_bo))
-   return; /* svm_bo was freed while eviction was pending */
 
if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
mm = svm_bo->eviction_fence->mm;
-- 
2.34.1



[PATCH 2/2] drm/amd/display: Move fixpt_from_s3132 to amdgpu_dm

2023-12-21 Thread Harry Wentland
Other environments don't like the unary minus operator on
an unsigned value.

Signed-off-by: Harry Wentland 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_color.c| 18 +++---
 .../gpu/drm/amd/display/include/fixed31_32.h   | 12 
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 36bf65a8cd6e..9b527bffe11a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void)
setup_x_points_distribution();
 }
 
+static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
+{
+   struct fixed31_32 val;
+
+   /* If negative, convert to 2's complement. */
+   if (x & (1ULL << 63))
+   x = -(x & ~(1ULL << 63));
+
+   val.value = x;
+   return val;
+}
+
 #ifdef AMD_PRIVATE_COLOR
 /* Pre-defined Transfer Functions (TF)
  *
@@ -430,7 +442,7 @@ static void __drm_ctm_to_dc_matrix(const struct 
drm_color_ctm *ctm,
}
 
/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-   matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
+   matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 
4)]);
}
 }
 
@@ -452,7 +464,7 @@ static void __drm_ctm_3x4_to_dc_matrix(const struct 
drm_color_ctm_3x4 *ctm,
 */
for (i = 0; i < 12; i++) {
/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
-   matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]);
+   matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]);
}
 }
 
@@ -1136,7 +1148,7 @@ amdgpu_dm_plane_set_color_properties(struct 
drm_plane_state *plane_state,
uint32_t shaper_size, lut3d_size, blend_size;
int ret;
 
-   dc_plane_state->hdr_mult = 
dc_fixpt_from_s3132(dm_plane_state->hdr_mult);
+   dc_plane_state->hdr_mult = 
amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult);
 
shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, 
&shaper_size);
shaper_size = shaper_lut != NULL ? shaper_size : 0;
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h 
b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index 84da1dd34efd..d4cf7ead1d87 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -69,18 +69,6 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
 static const struct fixed31_32 dc_fixpt_half = { 0x8000LL };
 static const struct fixed31_32 dc_fixpt_one = { 0x1LL };
 
-static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x)
-{
-   struct fixed31_32 val;
-
-   /* If negative, convert to 2's complement. */
-   if (x & (1ULL << 63))
-   x = -(x & ~(1ULL << 63));
-
-   val.value = x;
-   return val;
-}
-
 /*
  * @brief
  * Initialization routines
-- 
2.43.0



[PATCH 1/2] drm/amd/display: Fix recent checkpatch errors in amdgpu_dm

2023-12-21 Thread Harry Wentland
 - Use tabs, not spaces.
 - Brace and parentheses placement

Signed-off-by: Harry Wentland 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h   | 4 ++--
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 5 ++---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c  | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 2d5af83d40b5..9c1871b866cc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -747,7 +747,7 @@ enum amdgpu_transfer_function {
AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
-AMDGPU_TRANSFER_FUNCTION_COUNT
+   AMDGPU_TRANSFER_FUNCTION_COUNT
 };
 
 struct dm_plane_state {
@@ -844,7 +844,7 @@ struct dm_crtc_state {
 
int abm_level;
 
-/**
+   /**
 * @regamma_tf:
 *
 * Pre-defined transfer function for converting internal FB -> wire
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index c6ed0d854b01..36bf65a8cd6e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -630,8 +630,7 @@ static int __set_input_tf(struct dc_color_caps *caps, 
struct dc_transfer_func *f
 static enum dc_transfer_func_predefined
 amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
 {
-   switch (tf)
-   {
+   switch (tf) {
default:
case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
case AMDGPU_TRANSFER_FUNCTION_IDENTITY:
@@ -1225,7 +1224,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct 
dm_crtc_state *crtc,
 * plane and CRTC degamma at the same time. Explicitly reject atomic
 * updates when userspace sets both plane and CRTC degamma properties.
 */
-   if (has_crtc_cm_degamma && ret != -EINVAL){
+   if (has_crtc_cm_degamma && ret != -EINVAL) {
drm_dbg_kms(crtc->base.crtc->dev,
"doesn't support plane and CRTC degamma at the same 
time\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 4439e5a27362..6e715ef3a556 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -305,7 +305,7 @@ dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
 {
struct amdgpu_device *adev = drm_to_adev(crtc->dev);
 
-   if(adev->dm.dc->caps.color.mpc.ogam_ram)
+   if (adev->dm.dc->caps.color.mpc.ogam_ram)
drm_object_attach_property(&crtc->base,
   adev->mode_info.regamma_tf_property,
   AMDGPU_TRANSFER_FUNCTION_DEFAULT);
-- 
2.43.0



RE: [PATCH] drm/amdgpu/gfx11: need acquire mutex before access CP_VMID_RESET

2023-12-21 Thread Xiao, Jack
[AMD Official Use Only - General]

I can see the soft reset can be called from amdgpu_device_gpu_recover and pci 
error handler, they have called amdgpu_device_lock_reset_domain to acquire a 
reset lock before soft reset.

Regards,
Jack

-Original Message-
From: Christian König 
Sent: Wednesday, December 20, 2023 10:06 PM
To: Xiao, Jack ; Alex Deucher 
Cc: Deucher, Alexander ; 
amd-gfx@lists.freedesktop.org; Zhang, Hawking 
Subject: Re: [PATCH] drm/amdgpu/gfx11: need acquire mutex before access 
CP_VMID_RESET

Well not the reset lock, but there should only be a single reset queue which 
this runs on.

Regards,
Christian.

Am 20.12.23 um 10:49 schrieb Xiao, Jack:
> [AMD Official Use Only - General]
>
> It's already protected by the reset lock. In my understanding, soft reset 
> should not run in parallel.
>
> Regards,
> Jack
>
> -Original Message-
> From: Alex Deucher 
> Sent: Wednesday, December 20, 2023 1:04 AM
> To: Xiao, Jack 
> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> ; Zhang, Hawking 
> Subject: Re: [PATCH] drm/amdgpu/gfx11: need acquire mutex before
> access CP_VMID_RESET
>
> On Tue, Dec 19, 2023 at 4:30 AM Jack Xiao  wrote:
>> It's required to take the gfx mutex before access to CP_VMID_RESET,
>> for there is a race condition with CP firmware to write the register.
>>
>> Signed-off-by: Jack Xiao 
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 20 
>>   1 file changed, 20 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>> index bdcf96df69e6..ae3370d34d11 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>> @@ -4518,6 +4518,22 @@ static int gfx_v11_0_soft_reset(void *handle)
>>  }
>>  }
>>
> We probably want a CPU mutex or spinlock to protect this as well unless this 
> is already protected by the reset lock.
>
> Alex
>
>> +   /* Try to require the gfx mutex before access to CP_VMID_RESET */
>> +   for (i = 0; i < adev->usec_timeout; i++) {
>> +   /* Request with MeId=2, PipeId=0 */
>> +   tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, 1);
>> +   tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
>> +   WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
>> +   if (RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX) == tmp)
>> +   break;
>> +   udelay(1);
>> +   }
>> +
>> +   if (i >= adev->usec_timeout) {
>> +   printk("Failed to require the gfx mutex during soft 
>> reset\n");
>> +   return -EINVAL;
>> +   }
>> +
>>  WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffe);
>>
>>  // Read CP_VMID_RESET register three times.
>> @@ -4526,6 +4542,10 @@ static int gfx_v11_0_soft_reset(void *handle)
>>  RREG32_SOC15(GC, 0, regCP_VMID_RESET);
>>  RREG32_SOC15(GC, 0, regCP_VMID_RESET);
>>
>> +   /* release the gfx mutex */
>> +   tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, REQUEST, 0);
>> +   WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
>> +
>>  for (i = 0; i < adev->usec_timeout; i++) {
>>  if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
>>  !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
>> --
>> 2.41.0
>>



[PATCH] drm/amdkfd: Confirm list is non-empty before utilizing list_first_entry in kfd_topology.c

2023-12-21 Thread Srinivasan Shanmugam
Before using list_first_entry, make sure to check that list is not
empty.

Fixes the below:
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1347 
kfd_create_indirect_link_prop() warn: can 'gpu_link' even be NULL?
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1428 kfd_add_peer_prop() 
warn: can 'iolink1' even be NULL?
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1433 kfd_add_peer_prop() 
warn: can 'iolink2' even be NULL?

'Fixes: 0f28cca87e9a ("drm/amdkfd: Extend KFD device topology to surface
peer-to-peer links")'
Suggested-by: Felix Kuehling 
Cc: Felix Kuehling 
Cc: Christian König 
Cc: Alex Deucher 
Signed-off-by: Srinivasan Shanmugam 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index dc7c8312e8c7..a4cc88f08df2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1342,10 +1342,9 @@ static int kfd_create_indirect_link_prop(struct 
kfd_topology_device *kdev, int g
num_cpu++;
}
 
-   gpu_link = list_first_entry(&kdev->io_link_props,
-   struct kfd_iolink_properties, list);
-   if (!gpu_link)
-   return -ENOMEM;
+   if (!list_empty(&kdev->io_link_props))
+   gpu_link = list_first_entry(&kdev->io_link_props,
+   struct kfd_iolink_properties, list);
 
for (i = 0; i < num_cpu; i++) {
/* CPU <--> GPU */
@@ -1423,15 +1422,13 @@ static int kfd_add_peer_prop(struct kfd_topology_device 
*kdev,
peer->gpu->adev))
return ret;
 
-   iolink1 = list_first_entry(&kdev->io_link_props,
-   struct 
kfd_iolink_properties, list);
-   if (!iolink1)
-   return -ENOMEM;
+   if (!list_empty(&kdev->io_link_props))
+   iolink1 = list_first_entry(&kdev->io_link_props,
+  struct kfd_iolink_properties, list);
 
-   iolink2 = list_first_entry(&peer->io_link_props,
-   struct 
kfd_iolink_properties, list);
-   if (!iolink2)
-   return -ENOMEM;
+   if (!list_empty(&peer->io_link_props))
+   iolink2 = list_first_entry(&peer->io_link_props,
+  struct kfd_iolink_properties, list);
 
props = kfd_alloc_struct(props);
if (!props)
-- 
2.34.1



[PATCH Review 1/1] drm/amdgpu: Fix ineffective ras_mask settings

2023-12-21 Thread Stanley . Yang
For the special asic with mem ecc enabled but sram ecc
not enabled, even if the ras block is not supported on
.ras_enabled, if the asic supports poison mode and the
ras block has ras configuration, it can be considered
that the ras block supports ras function only with sram
ecc is not enabled, otherwise the modprobe argument ras
feature mask is ineffective.

Signed-off-by: Stanley.Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1d5c9d84f51d..8448e11d3e20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3405,7 +3405,8 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 block == AMDGPU_RAS_BLOCK__VCN ||
 block == AMDGPU_RAS_BLOCK__JPEG) &&
amdgpu_ras_is_poison_mode_supported(adev) &&
-   amdgpu_ras_get_ras_block(adev, block, 0))
+   amdgpu_ras_get_ras_block(adev, block, 0) &&
+   !amdgpu_atomfirmware_sram_ecc_supported(adev))
ret = 1;
 
return ret;
-- 
2.25.1