RE: [PATCH v2 3/5] drm/amdgpu: Add ras helper to query boot errors v2

2024-01-03 Thread Zhang, Morris
[AMD Official Use Only - General]

--Brs,
Morris Zhang
MLSE Linux  ML SRDC
Ext. 25147

> -Original Message-
> From: amd-gfx  On Behalf Of Hawking
> Zhang
> Sent: Tuesday, January 2, 2024 10:08 PM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao ; Yang,
> Stanley ; Wang, Yang(Kevin)
> ; Chai, Thomas ; Li,
> Candice 
> Cc: Deucher, Alexander ; Ma, Le
> ; Lazar, Lijo ; Zhang, Hawking
> 
> Subject: [PATCH v2 3/5] drm/amdgpu: Add ras helper to query boot errors v2
>
> Add ras helper function to query boot time gpu errors.
> v2: use aqua_vanjaram smn addressing pattern
>
> Signed-off-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 95
> +  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |
> 15 +++-
>  3 files changed, 110 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 616b6c911767..cd91533d641c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1328,6 +1328,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
>  #define WREG32_FIELD_OFFSET(reg, offset, field, val) \
>   WREG32(mm##reg + offset, (RREG32(mm##reg + offset) &
> ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
>
> +#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >>
> +(l))
>  /*
>   * BIOS helpers.
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index fc42fb6ee191..a901b00d4949 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -3763,3 +3763,98 @@ int amdgpu_ras_error_statistic_ce_count(struct
> ras_err_data *err_data,
>
>   return 0;
>  }
> +
> +#define mmMP0_SMN_C2PMSG_92  0x1609C
> +#define mmMP0_SMN_C2PMSG_126 0x160BE
> +static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device
> *adev,
> +  u32 instance, u32 boot_error)
> +{
> + u32 socket_id, aid_id, hbm_id;
> + u32 reg_data;
> + u64 reg_addr;
> +
> + socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
> + aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
> + hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error);
> +
> + /* The pattern for smn addressing in other SOC could be different from
> +  * the one for aqua_vanjaram. We should revisit the code if the pattern
> +  * is changed. In such case, replace the aqua_vanjaram implementation
> +  * with more common helper */
> + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
> +aqua_vanjaram_encode_ext_smn_addressing(instance);
> +
> + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
> + dev_err(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status
> is 0x%x\n",
> + socket_id, aid_id, reg_data);
> +
> + if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory
> training failed\n",
> +  socket_id, aid_id, hbm_id);
> +
> + if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed 
> at
> boot time\n",
> +  socket_id, aid_id);
> +
> + if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, wafl link training
> failed\n",
> +  socket_id, aid_id);
> +
> + if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training
> failed\n",
> +  socket_id, aid_id);
> +
> + if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training
> failed\n",
> +  socket_id, aid_id);
> +
> + if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training
> failed\n",
> +  socket_id, aid_id);
> +
> + if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm
> memory test failed\n",
> +  socket_id, aid_id, hbm_id);
> +
> + if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
> + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist
> test failed\n",
> +  socket_id, aid_id, hbm_id);
> +}
> +
> +static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
> +  u32 instance, u32 *boot_error) {
> + u32 reg_addr;
> + u32 reg_data;
> + int retry_loop;
> +
> + /* The pattern for smn addressing in other SOC could be different from
> +  * the one for aqua_vanjaram. We should revisit the code if the pattern
> +  * is changed. In 

RE: [PATCH] drm/amdgpu: fix metadata_size for ubo ioctl queries

2021-05-25 Thread Zhang, Morris
[AMD Official Use Only - Internal Distribution Only]

Thanks for the comments! I'll separate it into two patches.

--Brs,
Morris Zhang
MLSE Linux  ML SRDC
Ext. 25147

-Original Message-
From: Das, Nirmoy  
Sent: Tuesday, May 25, 2021 8:15 PM
To: Zhang, Morris ; amd-gfx@lists.freedesktop.org; Das, 
Nirmoy 
Subject: Re: [PATCH] drm/amdgpu: fix metadata_size for ubo ioctl queries


On 5/24/21 1:52 PM, Shiwu Zhang wrote:
> Although the kfd_ioctl_get_dmabuf_info() still fail it will indicate 
> the caller right metadat_size useful for the same kfd ioctl next time.
>
> v2: free the metadata buffer for sg type when to destroy BOs.
>
> Signed-off-by: Shiwu Zhang 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 ---
>   1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 3f85ba8222ef..e9f8701fd046 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -95,7 +95,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>   }
>   amdgpu_bo_unref(&bo->parent);
>   
> - if (bo->tbo.type == ttm_bo_type_device) {
> + if (bo->tbo.type != ttm_bo_type_kernel) {
>   ubo = to_amdgpu_bo_user(bo);
>   kfree(ubo->metadata);
>   }


This should be a separate patch, it is unrelated to the below hunk.


> @@ -1213,6 +1213,9 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, 
> void *buffer,
>   
>   BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
>   ubo = to_amdgpu_bo_user(bo);
> + if (metadata_size)
> + *metadata_size = ubo->metadata_size;
> +
>   if (buffer) {
>   if (buffer_size < ubo->metadata_size)
>   return -EINVAL;
> @@ -1221,8 +1224,6 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void 
> *buffer,
>   memcpy(buffer, ubo->metadata, ubo->metadata_size);
>   }
>   
> - if (metadata_size)
> - *metadata_size = ubo->metadata_size;
>   if (flags)
>   *flags = ubo->metadata_flags;
>   
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] free the metadata buffer for sg type BOs as well

2021-05-27 Thread Zhang, Morris
[AMD Official Use Only - Internal Distribution Only]

Hi Das&Christian,

Merged.
Thanks you both for the comments.

--Brs,
Morris Zhang
MLSE Linux  ML SRDC
Ext. 25147

-Original Message-
From: Christian König  
Sent: Wednesday, May 26, 2021 8:44 PM
To: Zhang, Morris ; amd-gfx@lists.freedesktop.org; Das, 
Nirmoy 
Subject: Re: [PATCH] free the metadata buffer for sg type BOs as well

You need a commit message.

Am 26.05.21 um 05:46 schrieb Shiwu Zhang:
> Signed-off-by: Shiwu Zhang 

With that fixed the patch is Reviewed-by: Christian König 


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 2d876e1eaa7c..e9f8701fd046 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -95,7 +95,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>   }
>   amdgpu_bo_unref(&bo->parent);
>   
> - if (bo->tbo.type == ttm_bo_type_device) {
> + if (bo->tbo.type != ttm_bo_type_kernel) {
>   ubo = to_amdgpu_bo_user(bo);
>   kfree(ubo->metadata);
>   }
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: add the accelerator pcie class

2023-05-22 Thread Zhang, Morris
[AMD Official Use Only - General]

Hi Bjorn,

Can we merge the below change through amdgpu tree ?  Thanks!

--Brs,
Morris Zhang
MLSE Linux  ML SRDC
Ext. 25147

> -Original Message-
> From: amd-gfx  On Behalf Of Shiwu
> Zhang
> Sent: Tuesday, May 23, 2023 12:03 PM
> To: amd-gfx@lists.freedesktop.org; linux-...@vger.kernel.org;
> bhelg...@google.com
> Subject: [PATCH] drm/amdgpu: add the accelerator pcie class
>
> v2: add the base class id for accelerator (lijo)
>
> Signed-off-by: Shiwu Zhang 
> Acked-by: Lijo Lazar 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +
>  include/linux/pci_ids.h | 3 +++
>  2 files changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 3d91e123f9bd..5d652e6f0b1e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2042,6 +2042,11 @@ static const struct pci_device_id pciidlist[] = {
> .class_mask = 0xff,
> .driver_data = CHIP_IP_DISCOVERY },
>
> + { PCI_DEVICE(0x1002, PCI_ANY_ID),
> +   .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
> +   .class_mask = 0xff,
> +   .driver_data = CHIP_IP_DISCOVERY },
> +
>   {0, 0, 0}
>  };
>
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index
> b362d90eb9b0..4918ff26a987 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -151,6 +151,9 @@
>  #define PCI_CLASS_SP_DPIO0x1100
>  #define PCI_CLASS_SP_OTHER   0x1180
>
> +#define PCI_BASE_CLASS_ACCELERATOR   0x12
> +#define PCI_CLASS_ACCELERATOR_PROCESSING 0x1200
> +
>  #define PCI_CLASS_OTHERS 0xff
>
>  /* Vendors and devices.  Sort key: vendor first, device next. */
> --
> 2.17.1



RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump

2025-05-08 Thread Zhang, Morris
[AMD Official Use Only - AMD Internal Distribution Only]

Ping. Thanks!

--Brs,
Morris Zhang
MLSE Linux  ML SRDC
Ext. 25147

> -Original Message-
> From: amd-gfx  On Behalf Of Shiwu
> Zhang
> Sent: Wednesday, May 7, 2025 12:42 PM
> To: Zhang, Hawking ; Gao, Likun
> ; amd-gfx@lists.freedesktop.org
> Subject: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
>
> Expose the debugfs file node for user space to dump the IFWI image on spirom.
>
> For one transaction between PSP and host, it will read out the images on both 
> active
> and inactive partitions so a buffer with two times the size of maximum IFWI 
> image
> (currently 16MByte) is needed.
>
> Signed-off-by: Shiwu Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 104 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  17 
>  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  |  40 +++-
>  4 files changed, 161 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 4835123c99f3..bfa3b1519d4c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -2113,6 +2113,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
>   amdgpu_rap_debugfs_init(adev);
>   amdgpu_securedisplay_debugfs_init(adev);
>   amdgpu_fw_attestation_debugfs_init(adev);
> + amdgpu_psp_debugfs_init(adev);
>
>   debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
>   &amdgpu_evict_vram_fops);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 6f9bcffda875..210a7bdda332 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -4185,6 +4185,110 @@ const struct attribute_group amdgpu_flash_attr_group
> = {
>   .is_visible = amdgpu_flash_attr_is_visible,  };
>
> +#if defined(CONFIG_DEBUG_FS)
> +static int psp_read_spirom_debugfs_open(struct inode *inode, struct
> +file *filp) {
> + struct amdgpu_device *adev = filp->f_inode->i_private;
> + struct bo_address_triplet *bo_triplet;
> + int ret;
> +
> + /* serialize the open() file calling */
> + if (!mutex_trylock(&adev->psp.mutex))
> + return -EBUSY;
> +
> + /*
> +  * make sure only one userpace process is alive for dumping so that
> +  * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is
> consumed.
> +  * let's say the case where one process try opening the file while
> +  * another one has proceeded to read or release. In this way, eliminate
> +  * the use of mutex for read() or release() callback as well.
> +  */
> + if (adev->psp.spirom_dump_trip) {
> + mutex_unlock(&adev->psp.mutex);
> + return -EBUSY;
> + }
> +
> + bo_triplet = kzalloc(sizeof(struct bo_address_triplet), GFP_KERNEL);
> + if (!bo_triplet) {
> + mutex_unlock(&adev->psp.mutex);
> + return -ENOMEM;
> + }
> +
> + ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2,
> + AMDGPU_GPU_PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &bo_triplet->bo,
> + &bo_triplet->mc_addr,
> + &bo_triplet->cpu_addr);
> + if (ret)
> + goto rel_trip;
> +
> + ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr);
> + if (ret)
> + goto rel_bo;
> +
> + adev->psp.spirom_dump_trip = bo_triplet;
> + mutex_unlock(&adev->psp.mutex);
> + return 0;
> +rel_bo:
> + amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
> +   &bo_triplet->cpu_addr);
> +rel_trip:
> + kfree(bo_triplet);
> + mutex_unlock(&adev->psp.mutex);
> + dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret);
> + return ret;
> +}
> +
> +static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user 
> *buf,
> size_t size,
> + loff_t *pos)
> +{
> + struct amdgpu_device *adev = filp->f_inode->i_private;
> + struct bo_address_triplet *bo_triplet = adev->psp.spirom_dump_trip;
> +
> + if (!bo_triplet)
> + return -EINVAL;
> +
> + return simple_read_from_buffer(buf,
> +size,
> +pos, bo_triplet->cpu_addr,
> +AMD_VBIOS_FILE_MAX_SIZE_B * 2); }
> +
> +static int psp_read_spirom_debugfs_release(struct inode *inode, struct
> +file *filp) {
> + struct amdgpu_device *adev = filp->f_inode->i_private;
> + struct bo_address_triplet *bo_triplet = adev->psp.spirom_dump_trip;
> +
> + if (bo_triplet) {
> + amdgpu_bo_free_kernel(&bo_triplet

RE: [PATCH 2/2] drm/amdgpu: fix the gb_addr_config_fields init value mismatch

2025-03-05 Thread Zhang, Morris
[AMD Official Use Only - AMD Internal Distribution Only]


Thanks @Lazar, Lijo<mailto:lijo.la...@amd.com> for the review.

And initializing the gb_addr_config_fields is part of sw_init although the 
callback has the name of early_init in it.

Will remove the part of " Fix it temporarily by using the golden value in 
sw_init as well."  in comment.



> -Original Message-

> From: Lazar, Lijo 

> Sent: Wednesday, March 5, 2025 4:02 PM

> To: Zhang, Morris ; Ma, Le ; Zhang,

> Hawking ; amd-gfx@lists.freedesktop.org

> Subject: Re: [PATCH 2/2] drm/amdgpu: fix the gb_addr_config_fields init value

> mismatch

>

>

>

> On 3/5/2025 12:14 PM, Shiwu Zhang wrote:

> > For gfx_v9_4_3 specifically, before regGB_ADDR_CONFIG is overwritten

> > in gfx hw_init it is read out to popluate the gb_addr_config_fields in

> > the sw_init stage, which causes mismatch.

> >

> > Fix it temporarily by using the golden value in sw_init as well.

>

> sw_init => early_init

>

> > The final fix should be by vBIOS/IFWI.

>

> This is supposed to be a driver-set golden reg. Most likely will remain this 
> way.

>

> Series is -

>

>  Reviewed-by: Lijo Lazar 
> mailto:lijo.la...@amd.com>>

>

> Thanks,

> Lijo

>

> >

> > Signed-off-by: Shiwu Zhang mailto:shiwu.zh...@amd.com>>

> > ---

> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 +-

> >  1 file changed, 1 insertion(+), 5 deletions(-)

> >

> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

> > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

> > index 2705f0cdd6da..af9b784eb78d 100644

> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

> > @@ -918,8 +918,6 @@ static const struct aca_info gfx_v9_4_3_aca_info =

> > {

> >

> >  static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)  {

> > -u32 gb_addr_config;

> > -

> >   adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;

> >   adev->gfx.ras = &gfx_v9_4_3_ras;

> >

> > @@ -928,9 +926,7 @@ static int gfx_v9_4_3_gpu_early_init(struct

> amdgpu_device *adev)

> >   adev->gfx.config.sc_prim_fifo_size_backend = 0x100;

> >   adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;

> >   adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;

> > -gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0),

> regGB_ADDR_CONFIG);

> > -

> > -adev->gfx.config.gb_addr_config = gb_addr_config;

> > +   adev->gfx.config.gb_addr_config = GOLDEN_GB_ADDR_CONFIG;

> >

> >   adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<

> >   REG_GET_FIELD(




RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump

2025-05-11 Thread Zhang, Morris
[AMD Official Use Only - AMD Internal Distribution Only]

Hi hawking,
thanks for taking your time reviewing. simple_read_from_buffer() is just a 
linux wrapper around copy_to_user() and it handles the reading count and buffer 
position checking well so it can clean up the code a little bit. FYI.

And the one-time handshake with psp is put into the .open() for 1. Normally 
user will call syscall open() only once but can call the read() multi times and 
2. The mutex operations include buffer allocation and handshake can be put into 
one place so that the .read() and .release() can eliminate the need to request 
the mutex. Thanks!


> -Original Message-
> From: Zhang, Hawking 
> Sent: Friday, May 9, 2025 2:20 PM
> To: Zhang, Morris ; Gao, Likun ;
> amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> Hi Morris,
>
> I will review the change later today. At first glance, it seems that some
> implementations are not included in the patch. For example, I couldn't find 
> the
> implementation of simple_read_from_buffer. Did I miss something?
>
> +   return simple_read_from_buffer(buf,
> +  size,
> +  pos, bo_triplet->cpu_addr,
> +  AMD_VBIOS_FILE_MAX_SIZE_B * 2); }
>
> Regards,
> Hawking
> -Original Message-
> From: Zhang, Morris 
> Sent: Thursday, May 8, 2025 17:34
> To: Zhang, Morris ; Zhang, Hawking
> ; Gao, Likun ; amd-
> g...@lists.freedesktop.org
> Subject: RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> Ping. Thanks!
>
> --Brs,
> Morris Zhang
> MLSE Linux  ML SRDC
> Ext. 25147
>
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Shiwu Zhang
> > Sent: Wednesday, May 7, 2025 12:42 PM
> > To: Zhang, Hawking ; Gao, Likun
> > ; amd-gfx@lists.freedesktop.org
> > Subject: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
> >
> > Expose the debugfs file node for user space to dump the IFWI image on 
> > spirom.
> >
> > For one transaction between PSP and host, it will read out the images
> > on both active and inactive partitions so a buffer with two times the
> > size of maximum IFWI image (currently 16MByte) is needed.
> >
> > Signed-off-by: Shiwu Zhang 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   1 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 104 
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  17 
> >  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  |  40 +++-
> >  4 files changed, 161 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > index 4835123c99f3..bfa3b1519d4c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > @@ -2113,6 +2113,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> >   amdgpu_rap_debugfs_init(adev);
> >   amdgpu_securedisplay_debugfs_init(adev);
> >   amdgpu_fw_attestation_debugfs_init(adev);
> > + amdgpu_psp_debugfs_init(adev);
> >
> >   debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
> >   &amdgpu_evict_vram_fops); diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > index 6f9bcffda875..210a7bdda332 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > @@ -4185,6 +4185,110 @@ const struct attribute_group
> > amdgpu_flash_attr_group = {
> >   .is_visible = amdgpu_flash_attr_is_visible,  };
> >
> > +#if defined(CONFIG_DEBUG_FS)
> > +static int psp_read_spirom_debugfs_open(struct inode *inode, struct
> > +file *filp) {
> > + struct amdgpu_device *adev = filp->f_inode->i_private;
> > + struct bo_address_triplet *bo_triplet;
> > + int ret;
> > +
> > + /* serialize the open() file calling */
> > + if (!mutex_trylock(&adev->psp.mutex))
> > + return -EBUSY;
> > +
> > + /*
> > +  * make sure only one userpace process is alive for dumping so that
> > +  * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is
> > consumed.
> > +  * let's say the case where one process try opening the file while
> > +

RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump

2025-05-13 Thread Zhang, Morris
[AMD Official Use Only - AMD Internal Distribution Only]

[public]

> -Original Message-
> From: Zhang, Hawking 
> Sent: Monday, May 12, 2025 10:25 PM
> To: Zhang, Morris ; Gao, Likun ;
> amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> I see. Thanks for clarifying.
>
> #define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 #define
> C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 #define
> C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
> +#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf #define
> +C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10 #define
> +C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11
>
> [Hawking] I suggest putting above definition in amdgpu_psp.h, including
> SPI_GET_ROM_IMAGE and SPI_UPDATE_ROM_IMAGE. These command, and
> their related definitions are common across psp_v13 and psp_v14. There is no 
> need
> to duplicate them for every PSP IP generation.
>
[Morris] Will do it in v2. Thanks!

>
> +#if defined(CONFIG_DEBUG_FS)
> +struct bo_address_triplet {
> +   struct amdgpu_bo *bo;
> +   uint64_t mc_addr;
> +   void *cpu_addr;
> +};
> +#endif
>
> [Hawking] bo_address_triplet is specifically used for dumping the spirom 
> image. I
> would suggest renaming it to spirom_bo, instead of more general name like
> bo_address.
[Morris] Will do it in v2. Thanks!

>
> +   debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root,
> +   adev, &psp_dump_spirom_debugfs_ops,
> + AMD_VBIOS_FILE_MAX_SIZE_B * 2);
>
> [Hawking] does the caller need to access the entire amdgpu_device? Or shall we
> consider exposing spirom_bo for this specific use case.

[Morris] yes, the pointer to adev is needed for the 
psp_read_spirom_debugfs_open() especially as it will do the psp handshake.
This data parameter is for kernel usage only and no risk to leak to user space. 
And you can find the similar usage in debugfs.
>
> Regards,
> Hawking
>
> -Original Message-
> From: Zhang, Morris 
> Sent: Monday, May 12, 2025 14:39
> To: Zhang, Hawking ; Gao, Likun
> ; amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> Hi hawking,
> thanks for taking your time reviewing. simple_read_from_buffer() is just a 
> linux
> wrapper around copy_to_user() and it handles the reading count and buffer 
> position
> checking well so it can clean up the code a little bit. FYI.
>
> And the one-time handshake with psp is put into the .open() for 1. Normally 
> user will
> call syscall open() only once but can call the read() multi times and 2. The 
> mutex
> operations include buffer allocation and handshake can be put into one place 
> so that
> the .read() and .release() can eliminate the need to request the mutex. 
> Thanks!
>
>
> > -Original Message-
> > From: Zhang, Hawking 
> > Sent: Friday, May 9, 2025 2:20 PM
> > To: Zhang, Morris ; Gao, Likun
> > ; amd-gfx@lists.freedesktop.org
> > Subject: RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
> >
> > [AMD Official Use Only - AMD Internal Distribution Only]
> >
> > Hi Morris,
> >
> > I will review the change later today. At first glance, it seems that
> > some implementations are not included in the patch. For example, I
> > couldn't find the implementation of simple_read_from_buffer. Did I miss
> something?
> >
> > +   return simple_read_from_buffer(buf,
> > +  size,
> > +  pos, bo_triplet->cpu_addr,
> > +  AMD_VBIOS_FILE_MAX_SIZE_B * 2);
> > + }
> >
> > Regards,
> > Hawking
> > -Original Message-
> > From: Zhang, Morris 
> > Sent: Thursday, May 8, 2025 17:34
> > To: Zhang, Morris ; Zhang, Hawking
> > ; Gao, Likun ; amd-
> > g...@lists.freedesktop.org
> > Subject: RE: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
> >
> > [AMD Official Use Only - AMD Internal Distribution Only]
> >
> > Ping. Thanks!
> >
> > --Brs,
> > Morris Zhang
> > MLSE Linux  ML SRDC
> > Ext. 25147
> >
> > > -Original Message-
> > > From: amd-gfx  On Behalf Of
> > > Shiwu Zhang
> > > Sent: Wednesday, May 7, 2025 12:42 PM
> > > To: Zhang, Hawking ; Gao, Likun
> > > ; amd-gfx@lists.freedesktop.org
> > > Subject: [PATCH] drm/amdgpu: add debugfs for spirom IFWI dump
&

RE: [PATCH v2] drm/amdgpu: add debugfs for spirom IFWI dump

2025-05-13 Thread Zhang, Morris
[AMD Official Use Only - AMD Internal Distribution Only]

[public]

> -Original Message-
> From: Lazar, Lijo 
> Sent: Tuesday, May 13, 2025 8:26 PM
> To: Zhang, Morris ; Zhang, Hawking
> ; Gao, Likun ; amd-
> g...@lists.freedesktop.org
> Subject: Re: [PATCH v2] drm/amdgpu: add debugfs for spirom IFWI dump
>
>
>
> On 5/13/2025 2:43 PM, Shiwu Zhang wrote:
> > Expose the debugfs file node for user space to dump the IFWI image on
> > spirom.
> >
> > For one transaction between PSP and host, it will read out the images
> > on both active and inactive partitions so a buffer with two times the
> > size of maximum IFWI image (currently 16MByte) is needed.
> >
> > v2: move the vbios gfl macros to the common header and rename the
> > bo triplet struct to spirom_bo for this specfic usage (Hawking)
> >
> > Signed-off-by: Shiwu Zhang 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   1 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 104 
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  29 ++
> >  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  |  46 +++--
> >  4 files changed, 170 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > index 4835123c99f3..bfa3b1519d4c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > @@ -2113,6 +2113,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> > amdgpu_rap_debugfs_init(adev);
> > amdgpu_securedisplay_debugfs_init(adev);
> > amdgpu_fw_attestation_debugfs_init(adev);
> > +   amdgpu_psp_debugfs_init(adev);
> >
> > debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
> > &amdgpu_evict_vram_fops);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > index 6f9bcffda875..3a27ce75f80c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > @@ -4185,6 +4185,110 @@ const struct attribute_group
> amdgpu_flash_attr_group = {
> > .is_visible = amdgpu_flash_attr_is_visible,  };
> >
> > +#if defined(CONFIG_DEBUG_FS)
> > +static int psp_read_spirom_debugfs_open(struct inode *inode, struct
> > +file *filp) {
> > +   struct amdgpu_device *adev = filp->f_inode->i_private;
> > +   struct spirom_bo *bo_triplet;
> > +   int ret;
> > +
> > +   /* serialize the open() file calling */
> > +   if (!mutex_trylock(&adev->psp.mutex))
> > +   return -EBUSY;
> > +
> > +   /*
> > +* make sure only one userpace process is alive for dumping so that
> > +* only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is
> consumed.
> > +* let's say the case where one process try opening the file while
> > +* another one has proceeded to read or release. In this way, eliminate
> > +* the use of mutex for read() or release() callback as well.
> > +*/
> > +   if (adev->psp.spirom_dump_trip) {
> > +   mutex_unlock(&adev->psp.mutex);
> > +   return -EBUSY;
> > +   }
> > +
> > +   bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL);
> > +   if (!bo_triplet) {
> > +   mutex_unlock(&adev->psp.mutex);
> > +   return -ENOMEM;
> > +   }
> > +
> > +   ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2,
> > +   AMDGPU_GPU_PAGE_SIZE,
> > +   AMDGPU_GEM_DOMAIN_GTT,
> > +   &bo_triplet->bo,
> > +   &bo_triplet->mc_addr,
> > +   &bo_triplet->cpu_addr);
> > +   if (ret)
> > +   goto rel_trip;
> > +
> > +   ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr);
> > +   if (ret)
> > +   goto rel_bo;
> > +
>
> Is it really needed to allocate and send the command each time on file open? 
> Can't
> this be cached, or is it because of the 32MB size needed?
>
[Morris]  And yes, I want to free the 32MB ASAP since it is one time operation 
and should not impact the memory usage as that much.
And from the user space point of view, they need the latest ifwi image as there 
could be ifwi flashing before reading. So caching is NOT a good idea in that it 
will give the obsolete ifwi and also occ