Re: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

2021-04-20 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

I think we should probably add the pci domain to the bdf to match the format in 
the kernel

+   seq_printf(m, "pdev:\t%02x:%02x.%d\npasid:\t%u\n", bus, dev, fn,
+   fpriv->vm.pasid);

you can get it with

pci_domain_nr<https://elixir.bootlin.com/linux/latest/C/ident/pci_domain_nr>(pdev->bus<https://elixir.bootlin.com/linux/latest/C/ident/bus>)

David


From: Roy Sun 
Sent: Tuesday, April 20, 2021 8:46 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Sun, Roy ; Nieto, David M 
Subject: [PATCH 2/2] drm/amdgpu: Add show_fdinfo() interface

Tracking devices, process info and fence info using
/proc/pid/fdinfo

Signed-off-by: David M Nieto 
Signed-off-by: Roy Sun 
---
 drivers/gpu/drm/amd/amdgpu/Makefile|  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c| 61 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 92 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 43 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  1 +
 8 files changed, 208 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8aba636..d216b7ecb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 amdgpu_fw_attestation.o amdgpu_securedisplay.o

+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
 amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o

 # add asic specific block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 125b25a5ce5b..3365feae15e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
 #include "amdgpu_gfxhub.h"
 #include "amdgpu_df.h"
 #include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"

 #define MAX_GPU_INSTANCE16

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0350205c4897..01fe60fedcbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -651,3 +651,64 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 idr_destroy(&mgr->ctx_handles);
 mutex_destroy(&mgr->lock);
 }
+
+void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
*centity,
+   ktime_t *total, ktime_t *max)
+{
+   ktime_t now, t1;
+   uint32_t i;
+
+   now = ktime_get();
+   for (i = 0; i < amdgpu_sched_jobs; i++) {
+   struct dma_fence *fence;
+   struct drm_sched_fence *s_fence;
+
+   spin_lock(&ctx->ring_lock);
+   fence = dma_fence_get(centity->fences[i]);
+   spin_unlock(&ctx->ring_lock);
+   if (!fence)
+   continue;
+   s_fence = to_drm_sched_fence(fence);
+   if (!dma_fence_is_signaled(&s_fence->scheduled))
+   continue;
+   t1 = s_fence->scheduled.timestamp;
+   if (t1 >= now)
+   continue;
+   if (dma_fence_is_signaled(&s_fence->finished) &&
+   s_fence->finished.timestamp < now)
+   *total += ktime_sub(s_fence->finished.timestamp, t1);
+   else
+   *total += ktime_sub(now, t1);
+   t1 = ktime_sub(now, t1);
+   dma_fence_put(fence);
+   *max = max(t1, *max);
+   }
+}
+
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+   uint32_t idx, uint64_t *elapsed)
+{
+   struct idr *idp;
+   struct amdgpu_ctx *ctx;
+   uint32_t id;
+   struct amdgpu_ctx_entity *centity;
+   ktime_t total = 0, max = 0;
+
+   if (idx >= AMDGPU_MAX_ENTITY_NUM)
+   return 0;
+   idp = &mgr->ctx_handles;
+   mutex_lock(&mgr->lock);
+   idr_for_each_entry(idp, ctx, id) {
+   if (!ctx->entities[hwip][idx])
+   continue;
+
+   centity = ctx->entities[hwip][idx];
+   amdgpu_ctx_fence_time(ctx, centity, &total, &max);
+   }
+
+   mutex_unlock(&mgr->lock);
+   if (elapsed)
+   *elapsed = max;
+
+   return total;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/a

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-04-27 Thread Nieto, David M
Besides system management, it provides parseable details on the VBIOS version 
and information. This is useful renderer information as the GPU behavior 
depends not only on the driver version but also on the firmwares running on the 
GPU.

The AMDGPU_INFO ioctl contains a method to get the versions of the ucode for 
all the IPs, but for VBIOS, only a way to dump the entire image is provided. 
While it Is possible to implement the whole logic of VBIOS parsing on 
userspace, it requires the application to have details on the header and table 
formats to parse the data. Moreover there is no guarantee that the format and 
header locations will remain constant across ASIC generations, so the 
maintainance cost and complexity seems unreasonable.

Providing a simple, and stable interface to the application seems to us like a 
sensible choice.

Thanks,
David

From: amd-gfx  on behalf of "Gu, JiaWei 
(Will)" 
Date: Thursday, April 22, 2021 at 8:25 PM
To: Christian König , 
"amd-gfx@lists.freedesktop.org" 
Cc: "Deucher, Alexander" , "StDenis, Tom" 
, "Nieto, David M" 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface


[AMD Official Use Only - Internal Distribution Only]

Hi Christian,
It will be used by a user space SMI-lib for GPU status query.

Hi David, please feel free to correct my statement since I’m not sure I have a 
view of whole picture.

Thanks,
Jiawei

From: Christian König 
Sent: Thursday, April 22, 2021 9:27 PM
To: Gu, JiaWei (Will) ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; StDenis, Tom 

Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

Is that useful to Vulkan/OpenGL/other clients in any way?

Christian.
Am 22.04.21 um 15:18 schrieb Gu, JiaWei (Will):



CC Tom.



On Apr 22, 2021, at 21:09, Gu, JiaWei (Will) 
<mailto:jiawei...@amd.com> wrote:



<[v2][umr]add-vbios-info-query.patch>

UMR patch which calls this new IOCTL attached.



Best regards,

Jiawei



On Apr 22, 2021, at 10:34, Jiawei Gu 
<mailto:jiawei...@amd.com> wrote:



Add AMDGPU_INFO_VBIOS_INFO subquery id for detailed vbios info.



Signed-off-by: Jiawei Gu <mailto:jiawei...@amd.com>

---

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  19 +++

drivers/gpu/drm/amd/amdgpu/atom.c  | 158 +

drivers/gpu/drm/amd/amdgpu/atom.h  |  11 ++

drivers/gpu/drm/amd/include/atomfirmware.h |  16 ++-

include/uapi/drm/amdgpu_drm.h  |  15 ++

5 files changed, 213 insertions(+), 6 deletions(-)



diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 39ee88d29cca..a20b016b05ab 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -861,6 +861,25 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)

   min((size_t)size, (size_t)(bios_size - 
bios_offset)))

   ? -EFAULT : 0;

 }

+case AMDGPU_INFO_VBIOS_INFO: {

+   struct drm_amdgpu_info_vbios vbios_info = {};

+   struct atom_context *atom_context;

+

+   atom_context = adev->mode_info.atom_context;

+ memcpy(vbios_info.name, atom_context->name, 
sizeof(atom_context->name));

+ vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, 
adev->pdev->devfn);

+ memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, 
sizeof(atom_context->vbios_pn));

+ vbios_info.version = atom_context->version;

+ memcpy(vbios_info.date, atom_context->date, 
sizeof(atom_context->date));

+ memcpy(vbios_info.serial, adev->serial, sizeof(adev->serial));

+ vbios_info.dev_id = adev->pdev->device;

+ vbios_info.rev_id = adev->pdev->revision;

+ vbios_info.sub_dev_id = atom_context->sub_dev_id;

+ vbios_info.sub_ved_id = atom_context->sub_ved_id;

+

+   return copy_to_user(out, &vbios_info,

+  min((size_t)size, sizeof(vbios_info))) ? 
-EFAULT : 0;

+}

 default:

  DRM_DEBUG_KMS("Invalid request %d\n",

info->vbios_info.type);

diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c 
b/drivers/gpu/drm/amd/amdgpu/atom.c

index 3dcb8b32f48b..0e2f0ea13b40 100644

--- a/drivers/gpu/drm/amd/amdgpu/atom.c

+++ b/drivers/gpu/drm/amd/amdgpu/atom.c

@@ -31,6 +31,7 @@



#define ATOM_DEBUG



+#include "atomfirmware.h"

#include "atom.h"

#include "atom-names.h"

#include "atom-bits.h"

@@ -1299,12 +1300,153 @@ static void atom_index_iio(struct atom_context *ctx, 
int base)

 }

}



+static void atom_get_vbios_name(struct atom_c

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-04-28 Thread Nieto, David M
I think this change may be orthogonal to that. Here we want to provide a way 
for the user application to get the VBIOS information without having to parse 
the binary…

And I agree that we should not have strong dependencies unless the encounter 
buggy VBIOS on the field, but I still think it is useful for the user to be 
able to display in a simple way the VBIOS version in their system if they 
happen to encounter an issue.

Regards,
David

From: Christian König 
Date: Wednesday, April 28, 2021 at 12:15 AM
To: "Nieto, David M" , "Gu, JiaWei (Will)" 
, "amd-gfx@lists.freedesktop.org" 

Cc: "Deucher, Alexander" , "StDenis, Tom" 

Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

Well displaying the VBIOS information along with the other firmware in 
userspace is certainly useful.

We should just avoid making strong dependencies on that.

Firmware and VBIOS must always be backward compatible and the driver must 
always work with any released firmware and VBIOS combination.

What we can do is to display a warning when we detect and/or old/buggy firmware.

Regards,
Christian.
Am 28.04.21 um 08:47 schrieb Nieto, David M:
Besides system management, it provides parseable details on the VBIOS version 
and information. This is useful renderer information as the GPU behavior 
depends not only on the driver version but also on the firmwares running on the 
GPU.

The AMDGPU_INFO ioctl contains a method to get the versions of the ucode for 
all the IPs, but for VBIOS, only a way to dump the entire image is provided. 
While it Is possible to implement the whole logic of VBIOS parsing on 
userspace, it requires the application to have details on the header and table 
formats to parse the data. Moreover there is no guarantee that the format and 
header locations will remain constant across ASIC generations, so the 
maintainance cost and complexity seems unreasonable.

Providing a simple, and stable interface to the application seems to us like a 
sensible choice.

Thanks,
David

From: amd-gfx 
<mailto:amd-gfx-boun...@lists.freedesktop.org>
 on behalf of "Gu, JiaWei (Will)" <mailto:jiawei...@amd.com>
Date: Thursday, April 22, 2021 at 8:25 PM
To: Christian König 
<mailto:ckoenig.leichtzumer...@gmail.com>, 
"amd-gfx@lists.freedesktop.org"<mailto:amd-gfx@lists.freedesktop.org> 
<mailto:amd-gfx@lists.freedesktop.org>
Cc: "Deucher, Alexander" 
<mailto:alexander.deuc...@amd.com>, "StDenis, Tom" 
<mailto:tom.stde...@amd.com>, "Nieto, David M" 
<mailto:david.ni...@amd.com>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface


[AMD Official Use Only - Internal Distribution Only]

Hi Christian,
It will be used by a user space SMI-lib for GPU status query.

Hi David, please feel free to correct my statement since I’m not sure I have a 
view of whole picture.

Thanks,
Jiawei

From: Christian König 
<mailto:ckoenig.leichtzumer...@gmail.com>
Sent: Thursday, April 22, 2021 9:27 PM
To: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander 
<mailto:alexander.deuc...@amd.com>; StDenis, Tom 
<mailto:tom.stde...@amd.com>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

Is that useful to Vulkan/OpenGL/other clients in any way?

Christian.
Am 22.04.21 um 15:18 schrieb Gu, JiaWei (Will):



CC Tom.



On Apr 22, 2021, at 21:09, Gu, JiaWei (Will) 
<mailto:jiawei...@amd.com> wrote:



<[v2][umr]add-vbios-info-query.patch>

UMR patch which calls this new IOCTL attached.



Best regards,

Jiawei



On Apr 22, 2021, at 10:34, Jiawei Gu 
<mailto:jiawei...@amd.com> wrote:



Add AMDGPU_INFO_VBIOS_INFO subquery id for detailed vbios info.



Signed-off-by: Jiawei Gu <mailto:jiawei...@amd.com>

---

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  19 +++

drivers/gpu/drm/amd/amdgpu/atom.c  | 158 +

drivers/gpu/drm/amd/amdgpu/atom.h  |  11 ++

drivers/gpu/drm/amd/include/atomfirmware.h |  16 ++-

include/uapi/drm/amdgpu_drm.h  |  15 ++

5 files changed, 213 insertions(+), 6 deletions(-)



diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 39ee88d29cca..a20b016b05ab 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -861,6 +861,25 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)

   min((size_t)size, (size_t)(bios_size - 
bios_offset)))

   ? -EFAULT : 0;

 }

+case AMDGPU_INFO_VBIOS_INFO: {

+   struct drm_amdgpu_info_vbios vbios_info = {};

+   struct atom_context *atom_context;

+

+   atom_context = adev

Re: [PATCH 1/2] drm/scheduler: Change scheduled fence track

2021-05-05 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

Thanks,

Do we know when those changes will make it back to amd-staging-drm-next ?

From: Christian König 
Sent: Wednesday, May 5, 2021 12:27 AM
To: Alex Deucher 
Cc: Deng, Emily ; Deucher, Alexander 
; amd-gfx list ; Sun, 
Roy ; Nieto, David M 
Subject: Re: [PATCH 1/2] drm/scheduler: Change scheduled fence track

I had to rebase them and was on sick leave last week.

Changed a few things on patch #1 and pushed the result a minute ago.

Christian.

Am 04.05.21 um 22:23 schrieb Alex Deucher:
> Did you push this yet?  I don't see it in drm-misc.
>
> Thanks,
>
> Alex
>
> On Wed, Apr 28, 2021 at 5:06 AM Christian König
>  wrote:
>> Well none. As I said I will push this upstream through drm-misc-next.
>>
>> Christian.
>>
>> Am 28.04.21 um 10:32 schrieb Deng, Emily:
>>
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>
>> Hi Alex and Christian,
>>
>> What extra work Roy need to do about this patch? And fdinfo?
>>
>>
>>
>> Best wishes
>>
>> Emily Deng
>>
>> From: amd-gfx  On Behalf Of Deucher, 
>> Alexander
>> Sent: Tuesday, April 27, 2021 3:52 AM
>> To: Christian König 
>> Cc: Sun, Roy ; amd-gfx list 
>> ; Nieto, David M 
>> Subject: Re: [PATCH 1/2] drm/scheduler: Change scheduled fence track
>>
>>
>>
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>
>>
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>
>>
>> Fair point.  Either way works for me.
>>
>>
>>
>> Alex
>>
>> 
>>
>> From: Christian König 
>> Sent: Monday, April 26, 2021 3:48 PM
>> To: Deucher, Alexander 
>> Cc: amd-gfx list ; Sun, Roy 
>> ; Nieto, David M 
>> Subject: Re: [PATCH 1/2] drm/scheduler: Change scheduled fence track
>>
>>
>>
>> My concern is more to get this tested from more people than just AMD.
>>
>> Christian.
>>
>> Am 26.04.21 um 21:40 schrieb Deucher, Alexander:
>>
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>
>>
>> That said, it would be easier for me to merge through the AMD tree since a 
>> relatively big AMD feature depends on it.  Not sure how much conflict 
>> potential there is if this goes through the AMD tree.
>>
>>
>>
>> Alex
>>
>>
>>
>> 
>>
>> From: amd-gfx  on behalf of Deucher, 
>> Alexander 
>> Sent: Monday, April 26, 2021 3:24 PM
>> To: Christian König 
>> Cc: amd-gfx list ; Sun, Roy 
>> ; Nieto, David M 
>> Subject: Re: [PATCH 1/2] drm/scheduler: Change scheduled fence track
>>
>>
>>
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>
>>
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>
>>
>> No objections from me.
>>
>>
>>
>> Thanks!
>>
>>
>>
>> Alex
>>
>>
>>
>> 
>>
>> From: Christian König 
>> Sent: Monday, April 26, 2021 2:49 AM
>> To: Deucher, Alexander 
>> Cc: Nieto, David M ; Sun, Roy ; 
>> amd-gfx list 
>> Subject: Re: [PATCH 1/2] drm/scheduler: Change scheduled fence track
>>
>>
>>
>> Hey Alex,
>>
>> any objections that we merge those two patches through drm-misc-next?
>>
>> Thanks,
>> Christian.
>>
>> Am 26.04.21 um 08:27 schrieb Roy Sun:
>>> Update the timestamp of scheduled fence on HW
>>> completion of the previous fences
>>>
>>> This allow more accurate tracking of the fence
>>> execution in HW
>>>
>>> Signed-off-by: David M Nieto 
>>> Signed-off-by: Roy Sun 
>>> ---
>>>drivers/gpu/drm/scheduler/sched_main.c | 12 ++--
>>>1 file changed, 10 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 92d8de24d0a1..f8e39ab0c41b 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -515,7 +515,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
>>> *sched)
>>>EXPORT_SYMBOL(drm_sched_resubmit_jobs);
>>>
>>>/**
>>> - * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs 
>>> from mirror ring lis

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-08 Thread Nieto, David M
[AMD Public Use]

The full vbios versioning information consists of the version (numeric), build 
date and the name... I this this interface exposes only the name.

Should we expose those on different sysfs files or just combine all of them in 
a single file?

David

From: Kees Cook 
Sent: Saturday, May 8, 2021 2:51 AM
To: Gu, JiaWei (Will) 
Cc: Deucher, Alexander ; StDenis, Tom 
; Christian König ; 
amd-gfx@lists.freedesktop.org ; Nieto, David M 
; linux-n...@vger.kernel.org 
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

On Sat, May 08, 2021 at 06:01:23AM +, Gu, JiaWei (Will) wrote:
> [AMD Official Use Only - Internal Distribution Only]
>
> Thanks for this catching Kees.
>
> Yes it should be 20, not 16. I was not aware that serial size had been 
> changed from 16 to 20 in struct amdgpu_device.
> Will submit a fix soon.

You might want to add a BUILD_BUG_ON() to keep those in sync, especially
since it's about to be UAPI.

-Kees

>
> Best regards,
> Jiawei
>
>
> -Original Message-
> From: Kees Cook 
> Sent: Saturday, May 8, 2021 12:28 PM
> To: Gu, JiaWei (Will) ; Deucher, Alexander 
> 
> Cc: StDenis, Tom ; Deucher, Alexander 
> ; Christian König 
> ; Gu, JiaWei (Will) ; 
> amd-gfx@lists.freedesktop.org; Nieto, David M ; 
> linux-n...@vger.kernel.org
> Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface
>
> Hi!
>
> This patch needs some fixing.
>
> On Thu, Apr 22, 2021 at 10:34:48AM +0800, Jiawei Gu wrote:
> > +   case AMDGPU_INFO_VBIOS_INFO: {
> > +   struct drm_amdgpu_info_vbios vbios_info = {};
> > +   struct atom_context *atom_context;
> > +
> > +   atom_context = adev->mode_info.atom_context;
> > +   memcpy(vbios_info.name, atom_context->name, 
> > sizeof(atom_context->name));
> > +   vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, 
> > adev->pdev->devfn);
> > +   memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, 
> > sizeof(atom_context->vbios_pn));
> > +   vbios_info.version = atom_context->version;
> > +   memcpy(vbios_info.date, atom_context->date, 
> > sizeof(atom_context->date));
> > +   memcpy(vbios_info.serial, adev->serial, 
> > sizeof(adev->serial));
>
> This writes beyond the end of vbios_info.serial.
>
> > +   vbios_info.dev_id = adev->pdev->device;
> > +   vbios_info.rev_id = adev->pdev->revision;
> > +   vbios_info.sub_dev_id = atom_context->sub_dev_id;
> > +   vbios_info.sub_ved_id = atom_context->sub_ved_id;
>
> Though it gets "repaired" by these writes.
>
> > +
> > +   return copy_to_user(out, &vbios_info,
> > +   min((size_t)size, 
> > sizeof(vbios_info))) ? -EFAULT : 0;
> > +   }
>
> sizeof(adev->serial) != sizeof(vbios_info.serial)
>
> adev is struct amdgpu_device:
>
> struct amdgpu_device {
> ...
> charserial[20];
>
>
> > +struct drm_amdgpu_info_vbios {
> > [...]
> > +   __u8 serial[16];
> > +   __u32 dev_id;
> > +   __u32 rev_id;
> > +   __u32 sub_dev_id;
> > +   __u32 sub_ved_id;
> > +};
>
> Is there a truncation issue (20 vs 16) and is this intended to be a 
> NUL-terminated string?
>
> --
> Kees Cook

--
Kees Cook
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-09 Thread Nieto, David M
No, this structure contains all the details of the vbios: date, serial number, 
name, etc.

The sysfs node only contains the vbios name string

> On May 9, 2021, at 23:33, Gu, JiaWei (Will)  wrote:
> 
> [AMD Official Use Only - Internal Distribution Only]
> 
> With a second thought, 
> __u8 serial[16] in drm_amdgpu_info_vbios is a bit redundant, sysfs 
> serial_number already exposes it.
> 
> Is it fine to abandon it from drm_amdgpu_info_vbios struct? @Alex Deucher 
> @Nieto, David M
> 
> Best regards,
> Jiawei
> 
> -Original Message-
> From: Alex Deucher  
> Sent: Sunday, May 9, 2021 11:59 PM
> To: Gu, JiaWei (Will) 
> Cc: amd-gfx list ; Kees Cook 
> 
> Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios
> 
>> On Sat, May 8, 2021 at 2:48 AM Jiawei Gu  wrote:
>> 
>> 20 should be serial char size now instead of 16.
>> 
>> Signed-off-by: Jiawei Gu 
> 
> Please make sure this keeps proper 64 bit alignment in the structure.
> 
> Alex
> 
> 
>> ---
>> include/uapi/drm/amdgpu_drm.h | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>> 
>> diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h index 2b487a8d2727..1c20721f90da 
>> 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -957,7 +957,7 @@ struct drm_amdgpu_info_vbios {
>>__u8 vbios_pn[64];
>>__u32 version;
>>__u8 date[32];
>> -   __u8 serial[16];
>> +   __u8 serial[20];
>>__u32 dev_id;
>>__u32 rev_id;
>>__u32 sub_dev_id;
>> --
>> 2.17.1
>> 
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
>> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CJi
>> awei.Gu%40amd.com%7Ccea31833184c41e8574508d9130360cc%7C3dd8961fe4884e6
>> 08e11a82d994e183d%7C0%7C0%7C637561727523880356%7CUnknown%7CTWFpbGZsb3d
>> 8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C
>> 1000&sdata=kAJiC6WoJUTeExwk6ftrLfMoY2OTAwg9X7mGgJT3kLk%3D&rese
>> rved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-10 Thread Nieto, David M
Then the application would need to issue the ioctl and then open a sysfs file 
to get all the information it needs. It makes little sense from a programming 
perspective to add an incomplete interface in my opinion


From: Gu, JiaWei (Will) 
Sent: Monday, May 10, 2021 12:13:07 AM
To: Nieto, David M 
Cc: Alex Deucher ; amd-gfx list 
; Kees Cook ; Deng, Emily 

Subject: RE: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from drm_amdgpu_info_vbios, not 
the whole struct.

struct drm_amdgpu_info_vbios {
__u8 name[64];
__u32 dbdf;
__u8 vbios_pn[64];
__u32 version;
__u8 date[32];
__u8 serial[16]; // jiawei: shall we delete this
__u32 dev_id;
__u32 rev_id;
__u32 sub_dev_id;
__u32 sub_ved_id;
};

serial[16] in drm_amdgpu_info_vbios  copied from adev->serial, but there's 
already a sysfs named serial_number, which exposes it already.

static ssize_t amdgpu_device_get_serial_number(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;

return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
}

Thanks,
Jiawei


-Original Message-----
From: Nieto, David M 
Sent: Monday, May 10, 2021 2:53 PM
To: Gu, JiaWei (Will) 
Cc: Alex Deucher ; amd-gfx list 
; Kees Cook ; Deng, Emily 

Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

No, this structure contains all the details of the vbios: date, serial number, 
name, etc.

The sysfs node only contains the vbios name string

> On May 9, 2021, at 23:33, Gu, JiaWei (Will)  wrote:
>
> [AMD Official Use Only - Internal Distribution Only]
>
> With a second thought,
> __u8 serial[16] in drm_amdgpu_info_vbios is a bit redundant, sysfs 
> serial_number already exposes it.
>
> Is it fine to abandon it from drm_amdgpu_info_vbios struct? @Alex
> Deucher @Nieto, David M
>
> Best regards,
> Jiawei
>
> -Original Message-
> From: Alex Deucher 
> Sent: Sunday, May 9, 2021 11:59 PM
> To: Gu, JiaWei (Will) 
> Cc: amd-gfx list ; Kees Cook
> 
> Subject: Re: [PATCH] drm/amdgpu: Align serial size in
> drm_amdgpu_info_vbios
>
>> On Sat, May 8, 2021 at 2:48 AM Jiawei Gu  wrote:
>>
>> 20 should be serial char size now instead of 16.
>>
>> Signed-off-by: Jiawei Gu 
>
> Please make sure this keeps proper 64 bit alignment in the structure.
>
> Alex
>
>
>> ---
>> include/uapi/drm/amdgpu_drm.h | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h index 2b487a8d2727..1c20721f90da
>> 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -957,7 +957,7 @@ struct drm_amdgpu_info_vbios {
>>__u8 vbios_pn[64];
>>__u32 version;
>>__u8 date[32];
>> -   __u8 serial[16];
>> +   __u8 serial[20];
>>__u32 dev_id;
>>__u32 rev_id;
>>__u32 sub_dev_id;
>> --
>> 2.17.1
>>
>> ___
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flis
>> t
>> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CJ
>> i
>> awei.Gu%40amd.com%7Ccea31833184c41e8574508d9130360cc%7C3dd8961fe4884e
>> 6
>> 08e11a82d994e183d%7C0%7C0%7C637561727523880356%7CUnknown%7CTWFpbGZsb3
>> d
>> 8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7
>> C
>> 1000&sdata=kAJiC6WoJUTeExwk6ftrLfMoY2OTAwg9X7mGgJT3kLk%3D&res
>> e
>> rved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-10 Thread Nieto, David M
One of the primary usecases is to add this information to the renderer string, 
I am not sure if there are other cases of UMD drivers accessing sysfs nodes, 
but I think if we think permissions, if a client is authenticated and opens the 
render device then it can use the IOCTL, it is unclear to me we can make a such 
an assumption for sysfs nodes…

I think there is value in having both tbh.

Regards,
David

From: Christian König 
Date: Monday, May 10, 2021 at 6:48 AM
To: "Nieto, David M" , "Gu, JiaWei (Will)" 

Cc: Alex Deucher , "Deng, Emily" , 
Kees Cook , amd-gfx list 
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

Well we could add both as sysfs file(s).

Question here is rather what is the primary use case of this and if the 
application has the necessary access permissions to the sysfs files?

Regards,
Christian.
Am 10.05.21 um 15:42 schrieb Nieto, David M:
Then the application would need to issue the ioctl and then open a sysfs file 
to get all the information it needs. It makes little sense from a programming 
perspective to add an incomplete interface in my opinion


From: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>
Sent: Monday, May 10, 2021 12:13:07 AM
To: Nieto, David M <mailto:david.ni...@amd.com>
Cc: Alex Deucher <mailto:alexdeuc...@gmail.com>; amd-gfx 
list <mailto:amd-gfx@lists.freedesktop.org>; 
Kees Cook <mailto:keesc...@chromium.org>; Deng, Emily 
<mailto:emily.d...@amd.com>
Subject: RE: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from drm_amdgpu_info_vbios, not 
the whole struct.

struct drm_amdgpu_info_vbios {
__u8 name[64];
__u32 dbdf;
__u8 vbios_pn[64];
__u32 version;
__u8 date[32];
__u8 serial[16]; // jiawei: shall we delete this
__u32 dev_id;
__u32 rev_id;
__u32 sub_dev_id;
__u32 sub_ved_id;
};

serial[16] in drm_amdgpu_info_vbios  copied from adev->serial, but there's 
already a sysfs named serial_number, which exposes it already.

static ssize_t amdgpu_device_get_serial_number(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;

return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
}

Thanks,
Jiawei


-Original Message-
From: Nieto, David M <mailto:david.ni...@amd.com>
Sent: Monday, May 10, 2021 2:53 PM
To: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>
Cc: Alex Deucher <mailto:alexdeuc...@gmail.com>; amd-gfx 
list <mailto:amd-gfx@lists.freedesktop.org>; 
Kees Cook <mailto:keesc...@chromium.org>; Deng, Emily 
<mailto:emily.d...@amd.com>
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

No, this structure contains all the details of the vbios: date, serial number, 
name, etc.

The sysfs node only contains the vbios name string

> On May 9, 2021, at 23:33, Gu, JiaWei (Will) 
> <mailto:jiawei...@amd.com> wrote:
>
> [AMD Official Use Only - Internal Distribution Only]
>
> With a second thought,
> __u8 serial[16] in drm_amdgpu_info_vbios is a bit redundant, sysfs 
> serial_number already exposes it.
>
> Is it fine to abandon it from drm_amdgpu_info_vbios struct? @Alex
> Deucher @Nieto, David M
>
> Best regards,
> Jiawei
>
> -Original Message-
> From: Alex Deucher <mailto:alexdeuc...@gmail.com>
> Sent: Sunday, May 9, 2021 11:59 PM
> To: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>
> Cc: amd-gfx list 
> <mailto:amd-gfx@lists.freedesktop.org>; Kees 
> Cook
> <mailto:keesc...@chromium.org>
> Subject: Re: [PATCH] drm/amdgpu: Align serial size in
> drm_amdgpu_info_vbios
>
>> On Sat, May 8, 2021 at 2:48 AM Jiawei Gu 
>> <mailto:jiawei...@amd.com> wrote:
>>
>> 20 should be serial char size now instead of 16.
>>
>> Signed-off-by: Jiawei Gu <mailto:jiawei...@amd.com>
>
> Please make sure this keeps proper 64 bit alignment in the structure.
>
> Alex
>
>
>> ---
>> include/uapi/drm/amdgpu_drm.h | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h index 2b487a8d2727..1c20721f90da
>> 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -957,7 +957,7 @@ struct drm_amdgpu_info_vbios {
>>__u8 vbios_pn[64];
>>__u32 version;
>>__u8 date[32];
>> -   __u8 serial[16];
>> +   __u8 serial[20];
>> 

Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-10 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

I agree that the serial number should be on number form, but I think we are 
still missing one field, which is the vbios name, which is located after the 
P/N, ASIC, PCI and memory type strings (skiping 0xD 0xA

David

From: Gu, JiaWei (Will) 
Sent: Monday, May 10, 2021 7:23 PM
To: Nieto, David M ; Christian König 

Cc: Alex Deucher ; Deng, Emily ; 
Kees Cook ; amd-gfx list 
Subject: RE: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios


[AMD Official Use Only - Internal Distribution Only]



Got it. Let’s keep them both.



Another idea about drm_amdgpu_info_vbios is

Does it make more sense to fill the “serial info” with uint64_t 
adev->unique_id, instead of the current char[] in adev->serial?



Sorry about that I was not aware of adev->unique_id exists when I defined 
drm_amdgpu_info_vbios.

I think it’s clearer to use original numeric variable than a string to expose 
serial.



How about that?



>> struct drm_amdgpu_info_vbios {
>>__u8 vbios_pn[64];
>>__u32 version;
>>__u8 date[32];
>> -   __u8 serial[16];
>> +   __u64 serial;
>>__u32 dev_id;
>>__u32 rev_id;
>>    __u32 sub_dev_id;
>> --



Best regards,

Jiawei





From: Nieto, David M 
Sent: Tuesday, May 11, 2021 4:20 AM
To: Christian König ; Gu, JiaWei (Will) 

Cc: Alex Deucher ; Deng, Emily ; 
Kees Cook ; amd-gfx list 
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



One of the primary usecases is to add this information to the renderer string, 
I am not sure if there are other cases of UMD drivers accessing sysfs nodes, 
but I think if we think permissions, if a client is authenticated and opens the 
render device then it can use the IOCTL, it is unclear to me we can make a such 
an assumption for sysfs nodes…



I think there is value in having both tbh.



Regards,

David



From: Christian König 
mailto:ckoenig.leichtzumer...@gmail.com>>
Date: Monday, May 10, 2021 at 6:48 AM
To: "Nieto, David M" mailto:david.ni...@amd.com>>, "Gu, 
JiaWei (Will)" mailto:jiawei...@amd.com>>
Cc: Alex Deucher mailto:alexdeuc...@gmail.com>>, "Deng, 
Emily" mailto:emily.d...@amd.com>>, Kees Cook 
mailto:keesc...@chromium.org>>, amd-gfx list 
mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



Well we could add both as sysfs file(s).

Question here is rather what is the primary use case of this and if the 
application has the necessary access permissions to the sysfs files?

Regards,
Christian.

Am 10.05.21 um 15:42 schrieb Nieto, David M:

Then the application would need to issue the ioctl and then open a sysfs file 
to get all the information it needs. It makes little sense from a programming 
perspective to add an incomplete interface in my opinion



____

From: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>
Sent: Monday, May 10, 2021 12:13:07 AM
To: Nieto, David M <mailto:david.ni...@amd.com>
Cc: Alex Deucher <mailto:alexdeuc...@gmail.com>; amd-gfx 
list <mailto:amd-gfx@lists.freedesktop.org>; 
Kees Cook <mailto:keesc...@chromium.org>; Deng, Emily 
<mailto:emily.d...@amd.com>
Subject: RE: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from drm_amdgpu_info_vbios, not 
the whole struct.

struct drm_amdgpu_info_vbios {
__u8 name[64];
__u32 dbdf;
__u8 vbios_pn[64];
__u32 version;
__u8 date[32];
__u8 serial[16]; // jiawei: shall we delete this
__u32 dev_id;
__u32 rev_id;
__u32 sub_dev_id;
__u32 sub_ved_id;
};

serial[16] in drm_amdgpu_info_vbios  copied from adev->serial, but there's 
already a sysfs named serial_number, which exposes it already.

static ssize_t amdgpu_device_get_serial_number(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
    struct amdgpu_device *adev = ddev->dev_private;

return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
}

Thanks,
Jiawei


-Original Message-
From: Nieto, David M <mailto:david.ni...@amd.com>
Sent: Monday, May 10, 2021 2:53 PM
To: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>
Cc: Alex Deucher <mailto:alexdeuc...@gmail.com>; amd-gfx 
list <mailto:amd-gfx@lists.freedesktop.org>; 
Kees Cook <mailto:keesc...@chromium.org>; Deng, Emily 
<mailto:emily.d...@amd.com>
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

No, this structure contains all the details of the vbios: date, serial n

Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

2021-05-11 Thread Nieto, David M
[AMD Public Use]

The point of having the device ID in the structure is because we are reading it 
from the VBIOS header, not the asic registers. They should match, but an user 
may flash a VBIOS for a different devid and they may not match.

Regarding sysfs vs ioctl I see value in providing it in both ways, Mesa uses 
IOCTL and other DRM based tools may benefit as well. I recently went through 
the trouble of having to add sysfs string parsing for some data not available 
in ioctl, and while not very complicated, it is a programming inconvenience.

I understand that being uapi, changing it is not easy, but this is information 
extracted from a VBIOS header, something that has been kept stable for many 
years.

David

From: Christian König 
Sent: Tuesday, May 11, 2021 7:07 AM
To: Deucher, Alexander ; Marek Olšák 

Cc: Kees Cook ; Gu, JiaWei (Will) ; 
amd-gfx list ; Deng, Emily ; 
Alex Deucher ; Nieto, David M 
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

Yeah, but umr is making strong use of sysfs as well.

The only justification of this interface would be if we want to use it in Mesa.

And I agree with Marek that looks redundant with the device structure to me as 
well.

Christian.

Am 11.05.21 um 16:04 schrieb Deucher, Alexander:

[AMD Public Use]

It's being used by umr and some other smi tools to provide vbios information 
for debugging.

Alex


From: amd-gfx 
<mailto:amd-gfx-boun...@lists.freedesktop.org>
 on behalf of Marek Olšák <mailto:mar...@gmail.com>
Sent: Tuesday, May 11, 2021 4:18 AM
To: Christian König 
<mailto:ckoenig.leichtzumer...@gmail.com>
Cc: Kees Cook <mailto:keesc...@chromium.org>; Gu, JiaWei 
(Will) <mailto:jiawei...@amd.com>; amd-gfx list 
<mailto:amd-gfx@lists.freedesktop.org>; Deng, 
Emily <mailto:emily.d...@amd.com>; Alex Deucher 
<mailto:alexdeuc...@gmail.com>; Nieto, David M 
<mailto:david.ni...@amd.com>
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios

Mesa doesn't use sysfs.

Note that this is a uapi, meaning that once it's in the kernel, it can't be 
changed like that.

What's the use case for this new interface? Isn't it partially redundant with 
the current device info structure, which seems to have the equivalent of dev_id 
and rev_id?

Marek

On Tue, May 11, 2021 at 3:51 AM Christian König 
mailto:ckoenig.leichtzumer...@gmail.com>> 
wrote:
Marek and other userspace folks need to decide that.

Basic question here is if Mesa is already accessing sysfs nodes for OpenGL or 
RADV. If that is the case then we should probably expose the information there 
as well.

If that isn't the case (which I think it is) then we should implement it as 
IOCTL.

Regards,
Christian.

Am 10.05.21 um 22:19 schrieb Nieto, David M:

One of the primary usecases is to add this information to the renderer string, 
I am not sure if there are other cases of UMD drivers accessing sysfs nodes, 
but I think if we think permissions, if a client is authenticated and opens the 
render device then it can use the IOCTL, it is unclear to me we can make a such 
an assumption for sysfs nodes…



I think there is value in having both tbh.



Regards,

David



From: Christian König 
<mailto:ckoenig.leichtzumer...@gmail.com>
Date: Monday, May 10, 2021 at 6:48 AM
To: "Nieto, David M" <mailto:david.ni...@amd.com>, "Gu, 
JiaWei (Will)" <mailto:jiawei...@amd.com>
Cc: Alex Deucher <mailto:alexdeuc...@gmail.com>, "Deng, 
Emily" <mailto:emily.d...@amd.com>, Kees Cook 
<mailto:keesc...@chromium.org>, amd-gfx list 
<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



Well we could add both as sysfs file(s).

Question here is rather what is the primary use case of this and if the 
application has the necessary access permissions to the sysfs files?

Regards,
Christian.

Am 10.05.21 um 15:42 schrieb Nieto, David M:

Then the application would need to issue the ioctl and then open a sysfs file 
to get all the information it needs. It makes little sense from a programming 
perspective to add an incomplete interface in my opinion





From: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>
Sent: Monday, May 10, 2021 12:13:07 AM
To: Nieto, David M <mailto:david.ni...@amd.com>
Cc: Alex Deucher <mailto:alexdeuc...@gmail.com>; amd-gfx 
list <mailto:amd-gfx@lists.freedesktop.org>; 
Kees Cook <mailto:keesc...@chromium.org>; Deng, Emily 
<mailto:emily.d...@amd.com>
Subject: RE: [PATCH] drm/amdgpu: Align serial size in drm_amdgpu_info_vbios



[AMD Official Use Only - Internal Distribution Only]

Hi David,

What I meant is to ONLY delete the serial[16] from drm_amdgpu_info_vbios, not 
the whole struct.

struct drm_amdgpu_inf

Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

2021-05-11 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

The local variables need to be initialized to zero, since amdgpu_ctx_fence_time 
accumulates and does not initialize

David

From: Christian König 
Sent: Tuesday, May 11, 2021 12:53 AM
To: Nieto, David M ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

Am 10.05.21 um 22:29 schrieb David M Nieto:
> The proper metric for fence utilization over several
> contexts is an harmonic mean, but such calculation is
> prohibitive in kernel space, so the code approximates it.
>
> Because the approximation diverges when one context has a
> very small ratio compared with the other context, this change
> filter out ratios smaller that 0.01%
>
> Signed-off-by: David M Nieto 
> Change-Id: I5b6e0ce5f489a5f55855d35354a6a3653e9d613b
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
>   2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 9036c93b4a0c..89ee464b9424 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -698,16 +698,27 @@ ktime_t amdgpu_ctx_mgr_fence_usage(struct 
> amdgpu_ctx_mgr *mgr, uint32_t hwip,
>struct amdgpu_ctx_entity *centity;
>ktime_t total = 0, max = 0;
>
> +

Unrelated white space change.

>if (idx >= AMDGPU_MAX_ENTITY_NUM)
>return 0;
>idp = &mgr->ctx_handles;
>mutex_lock(&mgr->lock);
>idr_for_each_entry(idp, ctx, id) {
> + ktime_t ttotal = tmax = ktime_set(0, 0);

There should be a blank line between decleration and code and please
don't initialize local variables if it isn't necessary.

Christian.

>if (!ctx->entities[hwip][idx])
>continue;
>
>centity = ctx->entities[hwip][idx];
> - amdgpu_ctx_fence_time(ctx, centity, &total, &max);
> + amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
> +
> + /* Harmonic mean approximation diverges for very small
> +  * values. If ratio < 0.01% ignore
> +  */
> + if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
> + continue;
> +
> + total = ktime_add(total, ttotal);
> + max = ktime_after(tmax, max) ? tmax : max;
>}
>
>mutex_unlock(&mgr->lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 10dcf59a5c6b..3541dfb059ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -30,6 +30,7 @@ struct drm_file;
>   struct amdgpu_fpriv;
>
>   #define AMDGPU_MAX_ENTITY_NUM 4
> +#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) (max > 16384ULL*total)
>
>   struct amdgpu_ctx_entity {
>uint64_tsequence;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

2021-05-12 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

yep, you are right...

I'll make those changes.

David

From: Christian König 
Sent: Tuesday, May 11, 2021 11:56 PM
To: Nieto, David M ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

And BTW amdgpu_ctx_fence_time() should probably be static.

Christian.

Am 12.05.21 um 08:55 schrieb Christian König:
In this case amdgpu_ctx_fence_time should probably be changed to initialize the 
variable itself.

That is really bad coding style otherwise.

Christian.

Am 11.05.21 um 20:14 schrieb Nieto, David M:

[AMD Official Use Only - Internal Distribution Only]

The local variables need to be initialized to zero, since amdgpu_ctx_fence_time 
accumulates and does not initialize

David

From: Christian König 
<mailto:ckoenig.leichtzumer...@gmail.com>
Sent: Tuesday, May 11, 2021 12:53 AM
To: Nieto, David M <mailto:david.ni...@amd.com>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

Am 10.05.21 um 22:29 schrieb David M Nieto:
> The proper metric for fence utilization over several
> contexts is an harmonic mean, but such calculation is
> prohibitive in kernel space, so the code approximates it.
>
> Because the approximation diverges when one context has a
> very small ratio compared with the other context, this change
> filter out ratios smaller that 0.01%
>
> Signed-off-by: David M Nieto <mailto:david.ni...@amd.com>
> Change-Id: I5b6e0ce5f489a5f55855d35354a6a3653e9d613b
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
>   2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 9036c93b4a0c..89ee464b9424 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -698,16 +698,27 @@ ktime_t amdgpu_ctx_mgr_fence_usage(struct 
> amdgpu_ctx_mgr *mgr, uint32_t hwip,
>struct amdgpu_ctx_entity *centity;
>ktime_t total = 0, max = 0;
>
> +

Unrelated white space change.

>if (idx >= AMDGPU_MAX_ENTITY_NUM)
>return 0;
>idp = &mgr->ctx_handles;
>mutex_lock(&mgr->lock);
>idr_for_each_entry(idp, ctx, id) {
> + ktime_t ttotal = tmax = ktime_set(0, 0);

There should be a blank line between decleration and code and please
don't initialize local variables if it isn't necessary.

Christian.

>if (!ctx->entities[hwip][idx])
>continue;
>
>centity = ctx->entities[hwip][idx];
> - amdgpu_ctx_fence_time(ctx, centity, &total, &max);
> + amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
> +
> + /* Harmonic mean approximation diverges for very small
> +  * values. If ratio < 0.01% ignore
> +  */
> + if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
> + continue;
> +
> + total = ktime_add(total, ttotal);
> + max = ktime_after(tmax, max) ? tmax : max;
>}
>
>mutex_unlock(&mgr->lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 10dcf59a5c6b..3541dfb059ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -30,6 +30,7 @@ struct drm_file;
>   struct amdgpu_fpriv;
>
>   #define AMDGPU_MAX_ENTITY_NUM 4
> +#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) (max > 16384ULL*total)
>
>   struct amdgpu_ctx_entity {
>uint64_tsequence;



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

2021-05-13 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

DOS line endings? That is weird

I corrected the issues that showed in checkpatch.pl (the > 80 lines)

I'll re-upload

From: Christian König 
Sent: Thursday, May 13, 2021 1:11 AM
To: Nieto, David M ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH 2/2] drm/amdgpu: fix fence calculation

Am 12.05.21 um 21:45 schrieb David M Nieto:
> The proper metric for fence utilization over several
> contexts is an harmonic mean, but such calculation is
> prohibitive in kernel space, so the code approximates it.
>
> Because the approximation diverges when one context has a
> very small ratio compared with the other context, this change
> filter out ratios smaller that 0.01%
>
> Signed-off-by: David M Nieto 
> Change-Id: I5b6e0ce5f489a5f55855d35354a6a3653e9d613b

Looks good to me now, but the automated tools complain about "DOS line
endings" plus a couple of other nit picks and won't let me push it :)

Please use the checkpatch.pl script found in the Linux kernel to fix
those errors and resend.

Thanks,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 17 +++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 +
>   2 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 9036c93b4a0c..b919615e6644 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -652,12 +652,14 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
>mutex_destroy(&mgr->lock);
>   }
>
> -void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity 
> *centity,
> +static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct 
> amdgpu_ctx_entity *centity,
>ktime_t *total, ktime_t *max)
>   {
>ktime_t now, t1;
>uint32_t i;
>
> + *total = *max = 0;
> +
>now = ktime_get();
>for (i = 0; i < amdgpu_sched_jobs; i++) {
>struct dma_fence *fence;
> @@ -703,11 +705,22 @@ ktime_t amdgpu_ctx_mgr_fence_usage(struct 
> amdgpu_ctx_mgr *mgr, uint32_t hwip,
>idp = &mgr->ctx_handles;
>mutex_lock(&mgr->lock);
>idr_for_each_entry(idp, ctx, id) {
> + ktime_t ttotal, tmax;
> +
>if (!ctx->entities[hwip][idx])
>continue;
>
>centity = ctx->entities[hwip][idx];
> - amdgpu_ctx_fence_time(ctx, centity, &total, &max);
> + amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
> +
> + /* Harmonic mean approximation diverges for very small
> +  * values. If ratio < 0.01% ignore
> +  */
> + if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
> + continue;
> +
> + total = ktime_add(total, ttotal);
> + max = ktime_after(tmax, max) ? tmax : max;
>}
>
>mutex_unlock(&mgr->lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> index 10dcf59a5c6b..3541dfb059ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> @@ -30,6 +30,7 @@ struct drm_file;
>   struct amdgpu_fpriv;
>
>   #define AMDGPU_MAX_ENTITY_NUM 4
> +#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) (max > 16384ULL*total)
>
>   struct amdgpu_ctx_entity {
>uint64_tsequence;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/5] drm/amdgpu: Initialize Aldebaran RLC function pointers

2021-12-15 Thread Nieto, David M
[AMD Official Use Only]

 case IP_VERSION(9, 4, 1):
 case IP_VERSION(9, 4, 2):
 amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+   if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == 
IP_VERSION(9, 4, 2))
+   gfx_v9_0_set_rlc_funcs(adev);
 break;
 case IP_VERSION(10, 1, 10):

I think for the above, it would be more clear just to separate them:

case IP_VERSION(9, 4, 1):
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
break;
case IP_VERSION(9, 4, 2):
   amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
   if (amdgpu_sriov_vf(adev))
   gfx_v9_0_set_rlc_funcs(adev);
   break;

From: Skvortsov, Victor 
Sent: Wednesday, December 15, 2021 10:55 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH 4/5] drm/amdgpu: Initialize Aldebaran RLC function pointers

In SRIOV, RLC function pointers must be initialized early as
we rely on the RLCG interface for all GC register access.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 65e1f6cc59dd..1bc92a38d124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -844,6 +844,8 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct 
amdgpu_device *adev)
 case IP_VERSION(9, 4, 1):
 case IP_VERSION(9, 4, 2):
 amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+   if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == 
IP_VERSION(9, 4, 2))
+   gfx_v9_0_set_rlc_funcs(adev);
 break;
 case IP_VERSION(10, 1, 10):
 case IP_VERSION(10, 1, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index edb3e3b08eed..d252b06efa43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -816,7 +816,6 @@ static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, 
u32 offset,
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
-static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
@@ -7066,7 +7065,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device 
*adev)
 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
 }

-static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
+void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
 switch (adev->ip_versions[GC_HWIP][0]) {
 case IP_VERSION(9, 0, 1):
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
index dfe8d4841f58..1817e252354f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -29,4 +29,6 @@ extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
u32 instance);

+void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
+
 #endif
--
2.25.1



Re: [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov

2021-12-15 Thread Nieto, David M
[AMD Official Use Only]

 scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
 scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
-   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
 spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;

the definition of scratch_reg2 and 3 has here will this be backwards 
compatible? Was it a bug in the definition?

From: Skvortsov, Victor 
Sent: Wednesday, December 15, 2021 10:55 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov

Expand RLCG interface for new GC read & write commands.
New interface will only be used if the PF enables the flag in pf2vf msg.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 111 +++---
 1 file changed, 83 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d252b06efa43..bce6ab52cae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -63,6 +63,13 @@
 #define mmGCEA_PROBE_MAP0x070c
 #define mmGCEA_PROBE_MAP_BASE_IDX   0

+#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28)
+#define GFX9_RLCG_GC_WRITE (0x0 << 28)
+#define GFX9_RLCG_GC_READ  (0x1 << 28)
+#define GFX9_RLCG_VFGATE_DISABLED  0x400
+#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x200
+#define GFX9_RLCG_NOT_IN_RANGE 0x100
+
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
@@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };

-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 
flag)
+static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, 
uint32_t flag)
 {
 static void *scratch_reg0;
 static void *scratch_reg1;
@@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
 static void *spare_int;
 static uint32_t grbm_cntl;
 static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+   u32 ret = 0;
+   u32 tmp;

 scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
 scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
-   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
 spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;

 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;

-   if (amdgpu_sriov_runtime(adev)) {
-   pr_err("shouldn't call rlcg write register during runtime\n");
-   return;
-   }
-
 if (offset == grbm_cntl || offset == grbm_idx) {
 if (offset  == grbm_cntl)
 writel(v, scratch_reg2);
@@ -771,41 +777,89 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f

 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
 } else {
-   uint32_t i = 0;
-   uint32_t retries = 5;
-
 writel(v, scratch_reg0);
-   writel(offset | 0x8000, scratch_reg1);
+  

Re: [PATCH 2/5] drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov

2021-12-15 Thread Nieto, David M
[AMD Official Use Only]

I don't know what others may think, but this coding while correct:

-   WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
+   (vmhub == AMDGPU_GFXHUB_0) ?
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req) :
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);

if is bit difficult to read. I wouldn't mind if the results of those function 
calls were stored in a variable, but here, you are using it as an if/else or a 
switch statement. It is better to do it like that:

switch(vmhub) {
case AMDGPU_GFXHUB_0):
//yadayada

or

if (vmhub == AMDGPU_GFXHUB_0)
 //yadayada
else {
 // yada du
}

From: Skvortsov, Victor 
Sent: Wednesday, December 15, 2021 10:55 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH 2/5] drm/amdgpu: Modify indirect register access for gmc_v9_0 
sriov

Modify GC register access from MMIO to RLCG if the
indirect flag is set

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 45 +++
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index db2ec84f7237..345ce7fc6463 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -478,9 +478,16 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
 hub = &adev->vmhub[j];
 for (i = 0; i < 16; i++) {
 reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   tmp = (j == AMDGPU_GFXHUB_0) ?
+   RREG32_SOC15_IP(GC, reg) :
+   RREG32_SOC15_IP(MMHUB, reg);
+
 tmp &= ~bits;
-   WREG32(reg, tmp);
+
+   (j == AMDGPU_GFXHUB_0) ?
+   WREG32_SOC15_IP(GC, reg, tmp) :
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
 }
 }
 break;
@@ -489,9 +496,16 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
 hub = &adev->vmhub[j];
 for (i = 0; i < 16; i++) {
 reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   tmp = (j == AMDGPU_GFXHUB_0) ?
+   RREG32_SOC15_IP(GC, reg) :
+   RREG32_SOC15_IP(MMHUB, reg);
+
 tmp |= bits;
-   WREG32(reg, tmp);
+
+   (j == AMDGPU_GFXHUB_0) ?
+   WREG32_SOC15_IP(GC, reg, tmp) :
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
 }
 }
 break;
@@ -789,8 +803,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 if (use_semaphore) {
 for (j = 0; j < adev->usec_timeout; j++) {
 /* a read return value of 1 means semaphore acuqire */
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
-   hub->eng_distance * eng);
+   tmp = (vmhub == AMDGPU_GFXHUB_0) ?
+   RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng) :
+   RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
 if (tmp & 0x1)
 break;
 udelay(1);
@@ -801,8 +816,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 }

 do {
-   WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
+   (vmhub == AMDGPU_GFXHUB_0) ?
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req) :
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);

 /*
  * Issue a dummy read to wait for the ACK register to
@@ -815,8 +831,9 @@ static void gmc_v9_0_flush_gpu_tlb(st

Re: [PATCH 1/5] drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions

2021-12-15 Thread Nieto, David M
[AMD Official Use Only]

Reviewed-by: David Nieto 

From: Skvortsov, Victor 
Sent: Wednesday, December 15, 2021 10:55 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH 1/5] drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions

Add helper macros to change register access
from direct to indirect.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 8a9ca87d8663..473767e03676 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -51,6 +51,8 @@

 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP)

+#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
 __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
  AMDGPU_REGS_NO_KIQ, ip##_HWIP)
@@ -65,6 +67,9 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
  __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP)

+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
 __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
  value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
--
2.25.1



Re: [PATCH 3/5] drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov

2021-12-15 Thread Nieto, David M
[AMD Official Use Only]

Reviewed-by: David Nieto 

From: Skvortsov, Victor 
Sent: Wednesday, December 15, 2021 10:55 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH 3/5] drm/amdgpu: Modify indirect register access for 
amdkfd_gfx_v9 sriov

Modify GC register access from MMIO to RLCG if the indirect
flag is set

Signed-off-by: Victor Skvortsov 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ddfe7aff919d..1abf662a0e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -166,7 +166,7 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, 
uint32_t pipe_id)

 lock_srbm(adev, mec, pipe, 0, 0);

-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+   WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);

@@ -279,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
lower_32_bits((uintptr_t)wptr));
 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+   WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 }

@@ -488,13 +488,13 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device 
*adev,
 uint32_t low, high;

 acquire_queue(adev, pipe_id, queue_id);
-   act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
 if (act) {
 low = lower_32_bits(queue_address >> 8);
 high = upper_32_bits(queue_address >> 8);

-   if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
-  high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+   if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+  high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
 retval = true;
 }
 release_queue(adev);
@@ -556,7 +556,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void 
*mqd,

 end_jiffies = (utimeout * HZ / 1000) + jiffies;
 while (true) {
-   temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 break;
 if (time_after(jiffies, end_jiffies)) {
@@ -645,7 +645,7 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device 
*adev,
 mutex_lock(&adev->grbm_idx_mutex);

 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+   WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);

 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 INSTANCE_BROADCAST_WRITES, 1);
@@ -722,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int 
queue_idx,
 pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
 queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
 soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
-   reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+   reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
  queue_slot);
 *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
 if (*wave_cnt != 0)
@@ -809,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev, int pasid,
 for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {

 gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 
0x);
-   queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
-  mmSPI_CSQ_WF_ACTIVE_STATUS));
+   queue_map = RREG32_SOC15(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_STATUS);

 /*
  * Assumption: queue map encodes following schema: four
@@ -860,17 +859,17 @@ void kgd_gfx_v9_program_trap_handler_settings(struct 
amdgpu_device *adev,
 /*
  * Program TBA registers
  */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
 lower_32_bits(tb

Re: [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov

2021-12-15 Thread Nieto, David M
[Public]

Gotcha,

Can you add prior to the implementation a small description on how the 
interface and the different scratch registers work? It may be easier to review 
with a clear idea of the operation. I know the earlier implementation did not 
include it, but now that we are modifying it, it would be good to document it.



From: Skvortsov, Victor 
Sent: Wednesday, December 15, 2021 11:18 AM
To: Nieto, David M ; amd-gfx@lists.freedesktop.org 
; Deng, Emily ; Liu, Monk 
; Ming, Davis ; Liu, Shaoyun 
; Zhou, Peng Ju ; Chen, JingWen 
; Chen, Horace 
Subject: RE: [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 
sriov


[AMD Official Use Only]



This was a bug in the original definition, but it functionally it makes no 
difference (in both cases the macros resolve to the same value).



From: Nieto, David M 
Sent: Wednesday, December 15, 2021 2:16 PM
To: Skvortsov, Victor ; 
amd-gfx@lists.freedesktop.org; Deng, Emily ; Liu, Monk 
; Ming, Davis ; Liu, Shaoyun 
; Zhou, Peng Ju ; Chen, JingWen 
; Chen, Horace 
Subject: Re: [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 
sriov



[AMD Official Use Only]



 scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
 scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
-   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
 spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;



the definition of scratch_reg2 and 3 has here will this be backwards 
compatible? Was it a bug in the definition?



From: Skvortsov, Victor 
mailto:victor.skvort...@amd.com>>
Sent: Wednesday, December 15, 2021 10:55 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>; Deng, 
Emily mailto:emily.d...@amd.com>>; Liu, Monk 
mailto:monk@amd.com>>; Ming, Davis 
mailto:davis.m...@amd.com>>; Liu, Shaoyun 
mailto:shaoyun@amd.com>>; Zhou, Peng Ju 
mailto:pengju.z...@amd.com>>; Chen, JingWen 
mailto:jingwen.ch...@amd.com>>; Chen, Horace 
mailto:horace.c...@amd.com>>; Nieto, David M 
mailto:david.ni...@amd.com>>
Cc: Skvortsov, Victor 
mailto:victor.skvort...@amd.com>>
Subject: [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov



Expand RLCG interface for new GC read & write commands.
New interface will only be used if the PF enables the flag in pf2vf msg.

Signed-off-by: Victor Skvortsov 
mailto:victor.skvort...@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 111 +++---
 1 file changed, 83 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d252b06efa43..bce6ab52cae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -63,6 +63,13 @@
 #define mmGCEA_PROBE_MAP0x070c
 #define mmGCEA_PROBE_MAP_BASE_IDX   0

+#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28)
+#define GFX9_RLCG_GC_WRITE (0x0 << 28)
+#define GFX9_RLCG_GC_READ  (0x1 << 28)
+#define GFX9_RLCG_VFGATE_DISABLED  0x400
+#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x200
+#define GFX9_RLCG_NOT_IN_RANGE 0x100
+
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
@@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };

-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 
flag)
+static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, 
uint32_t flag)
 {
 static void *scratch_reg0;
 static void *scratch_reg1;
@@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
 static void *spare_int;
 static uint32_t grbm_cntl;
 static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+   u32 ret = 0;
+   u32 tmp;

 scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_R

Re: [PATCH v3 1/5] drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions

2021-12-16 Thread Nieto, David M
[AMD Official Use Only]

Reviewed-by: David Nieto 

From: Skvortsov, Victor 
Sent: Thursday, December 16, 2021 11:42 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH v3 1/5] drm/amdgpu: Add *_SOC15_IP_NO_KIQ() macro definitions

Add helper macros to change register access
from direct to indirect.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/soc15_common.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h 
b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 8a9ca87d8663..473767e03676 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -51,6 +51,8 @@

 #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP)

+#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, 
AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
 __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
  AMDGPU_REGS_NO_KIQ, ip##_HWIP)
@@ -65,6 +67,9 @@
 #define WREG32_SOC15_IP(ip, reg, value) \
  __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP)

+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
+__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
+
 #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
 __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] 
+ reg, \
  value, AMDGPU_REGS_NO_KIQ, ip##_HWIP)
--
2.25.1



Re: [PATCH v3 2/5] drm/amdgpu: Modify indirect register access for gmc_v9_0 sriov

2021-12-16 Thread Nieto, David M
[AMD Official Use Only]

Reviewed-by: David Nieto 

From: Skvortsov, Victor 
Sent: Thursday, December 16, 2021 11:42 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH v3 2/5] drm/amdgpu: Modify indirect register access for 
gmc_v9_0 sriov

Modify GC register access from MMIO to RLCG if the
indirect flag is set

v2: Replaced ternary operator with if-else for better
readability

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 57 ---
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index a5471923b3f6..2b86c63b032a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -478,9 +478,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
 hub = &adev->vmhub[j];
 for (i = 0; i < 16; i++) {
 reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   if (j == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP(GC, reg);
+   else
+   tmp = RREG32_SOC15_IP(MMHUB, reg);
+
 tmp &= ~bits;
-   WREG32(reg, tmp);
+
+   if (j == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP(GC, reg, tmp);
+   else
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
 }
 }
 break;
@@ -489,9 +498,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct 
amdgpu_device *adev,
 hub = &adev->vmhub[j];
 for (i = 0; i < 16; i++) {
 reg = hub->vm_context0_cntl + i;
-   tmp = RREG32(reg);
+
+   if (j == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP(GC, reg);
+   else
+   tmp = RREG32_SOC15_IP(MMHUB, reg);
+
 tmp |= bits;
-   WREG32(reg, tmp);
+
+   if (j == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP(GC, reg, tmp);
+   else
+   WREG32_SOC15_IP(MMHUB, reg, tmp);
 }
 }
 break;
@@ -788,9 +806,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 /* TODO: It needs to continue working on debugging with semaphore for 
GFXHUB as well. */
 if (use_semaphore) {
 for (j = 0; j < adev->usec_timeout; j++) {
-   /* a read return value of 1 means semaphore acuqire */
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
-   hub->eng_distance * eng);
+   /* a read return value of 1 means semaphore acquire */
+   if (vmhub == AMDGPU_GFXHUB_0)
+   tmp = RREG32_SOC15_IP_NO_KIQ(GC, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+   else
+   tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, 
hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+
 if (tmp & 0x1)
 break;
 udelay(1);
@@ -801,8 +822,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
 }

 do {
-   WREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, inv_req);
+   if (vmhub == AMDGPU_GFXHUB_0)
+   WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);
+   else
+   WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + 
hub->eng_distance * eng, inv_req);

 /*
  * Issue a dummy read to wait for the ACK register to
@@ -815,8 +838,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
   hub->eng_distance * eng);

 for (j = 0; j < adev->usec_timeout; j++) {
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
-   hub->eng_distance * eng);
+  

Re: [PATCH v3 3/5] drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov

2021-12-16 Thread Nieto, David M
[AMD Official Use Only]

Reviewed-by: David Nieto 

From: Skvortsov, Victor 
Sent: Thursday, December 16, 2021 11:42 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH v3 3/5] drm/amdgpu: Modify indirect register access for 
amdkfd_gfx_v9 sriov

Modify GC register access from MMIO to RLCG if the indirect
flag is set

Signed-off-by: Victor Skvortsov 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ddfe7aff919d..1abf662a0e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -166,7 +166,7 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, 
uint32_t pipe_id)

 lock_srbm(adev, mec, pipe, 0, 0);

-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+   WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);

@@ -279,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void 
*mqd,
lower_32_bits((uintptr_t)wptr));
 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, 
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+   WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
 }

@@ -488,13 +488,13 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device 
*adev,
 uint32_t low, high;

 acquire_queue(adev, pipe_id, queue_id);
-   act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
 if (act) {
 low = lower_32_bits(queue_address >> 8);
 high = upper_32_bits(queue_address >> 8);

-   if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
-  high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+   if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+  high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
 retval = true;
 }
 release_queue(adev);
@@ -556,7 +556,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void 
*mqd,

 end_jiffies = (utimeout * HZ / 1000) + jiffies;
 while (true) {
-   temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+   temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 break;
 if (time_after(jiffies, end_jiffies)) {
@@ -645,7 +645,7 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device 
*adev,
 mutex_lock(&adev->grbm_idx_mutex);

 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+   WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);

 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 INSTANCE_BROADCAST_WRITES, 1);
@@ -722,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int 
queue_idx,
 pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
 queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
 soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
-   reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+   reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
  queue_slot);
 *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
 if (*wave_cnt != 0)
@@ -809,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev, int pasid,
 for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {

 gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 
0x);
-   queue_map = RREG32(SOC15_REG_OFFSET(GC, 0,
-  mmSPI_CSQ_WF_ACTIVE_STATUS));
+   queue_map = RREG32_SOC15(GC, 0, 
mmSPI_CSQ_WF_ACTIVE_STATUS);

 /*
  * Assumption: queue map encodes following schema: four
@@ -860,17 +859,17 @@ void kgd_gfx_v9_program_trap_handler_settings(struct 
amdgpu_device *adev,
 /*
  * Program TBA registers
  */
-   WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+   WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
 lower_32_bits

Re: [PATCH v3 4/5] drm/amdgpu: get xgmi info before ip_init

2021-12-16 Thread Nieto, David M
[AMD Official Use Only]

Reviewed-by: David Nieto 

From: Skvortsov, Victor 
Sent: Thursday, December 16, 2021 11:42 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH v3 4/5] drm/amdgpu: get xgmi info before ip_init

Driver needs to call get_xgmi_info() before ip_init
to determine whether it needs to handle a pending hive reset.

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 --
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 6 --
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5bd785cfc5ca..4fd370016834 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3576,6 +3576,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 if (r)
 return r;

+   /* Need to get xgmi info early to decide the reset behavior*/
+   if (adev->gmc.xgmi.supported) {
+   r = adev->gfxhub.funcs->get_xgmi_info(adev);
+   if (r)
+   return r;
+   }
+
 /* enable PCIE atomic ops */
 if (amdgpu_sriov_vf(adev))
 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info 
*)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index ae46eb35b3d7..3d5d47a799e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -914,12 +914,6 @@ static int gmc_v10_0_sw_init(void *handle)
 return r;
 }

-   if (adev->gmc.xgmi.supported) {
-   r = adev->gfxhub.funcs->get_xgmi_info(adev);
-   if (r)
-   return r;
-   }
-
 r = gmc_v10_0_mc_init(adev);
 if (r)
 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2b86c63b032a..57f2729a7bd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1628,12 +1628,6 @@ static int gmc_v9_0_sw_init(void *handle)
 }
 adev->need_swiotlb = drm_need_swiotlb(44);

-   if (adev->gmc.xgmi.supported) {
-   r = adev->gfxhub.funcs->get_xgmi_info(adev);
-   if (r)
-   return r;
-   }
-
 r = gmc_v9_0_mc_init(adev);
 if (r)
 return r;
--
2.25.1



Re: [PATCH v3 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov

2021-12-16 Thread Nieto, David M
[AMD Official Use Only]

Reviewed-by: David Nieto 

From: Skvortsov, Victor 
Sent: Thursday, December 16, 2021 11:42 AM
To: amd-gfx@lists.freedesktop.org ; Deng, Emily 
; Liu, Monk ; Ming, Davis 
; Liu, Shaoyun ; Zhou, Peng Ju 
; Chen, JingWen ; Chen, Horace 
; Nieto, David M 
Cc: Skvortsov, Victor 
Subject: [PATCH v3 5/5] drm/amdgpu: Modify indirect register access for gfx9 
sriov

Expand RLCG interface for new GC read & write commands.
New interface will only be used if the PF enables the flag in pf2vf msg.

v2: Added a description for the scratch registers

Signed-off-by: Victor Skvortsov 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 117 --
 1 file changed, 89 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index edb3e3b08eed..9189fb85a4dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -63,6 +63,13 @@
 #define mmGCEA_PROBE_MAP0x070c
 #define mmGCEA_PROBE_MAP_BASE_IDX   0

+#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28)
+#define GFX9_RLCG_GC_WRITE (0x0 << 28)
+#define GFX9_RLCG_GC_READ  (0x1 << 28)
+#define GFX9_RLCG_VFGATE_DISABLED  0x400
+#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x200
+#define GFX9_RLCG_NOT_IN_RANGE 0x100
+
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
@@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };

-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 
flag)
+static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, 
uint32_t flag)
 {
 static void *scratch_reg0;
 static void *scratch_reg1;
@@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f
 static void *spare_int;
 static uint32_t grbm_cntl;
 static uint32_t grbm_idx;
+   uint32_t i = 0;
+   uint32_t retries = 5;
+   u32 ret = 0;
+   u32 tmp;

 scratch_reg0 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
 scratch_reg1 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
-   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
-   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+   scratch_reg2 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;
+   scratch_reg3 = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;
 spare_int = adev->rmmio + 
(adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;

 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + 
mmGRBM_GFX_CNTL;
 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + 
mmGRBM_GFX_INDEX;

-   if (amdgpu_sriov_runtime(adev)) {
-   pr_err("shouldn't call rlcg write register during runtime\n");
-   return;
-   }
-
 if (offset == grbm_cntl || offset == grbm_idx) {
 if (offset  == grbm_cntl)
 writel(v, scratch_reg2);
@@ -771,41 +777,95 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, 
u32 offset, u32 v, u32 f

 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
 } else {
-   uint32_t i = 0;
-   uint32_t retries = 5;
-
+   /*
+* SCRATCH_REG0   = read/write value
+* SCRATCH_REG1[30:28]   = command
+* SCRATCH_REG1[19:0]= address in dword
+* SCRATCH_REG1[26:24]   = Error reporting
+*/
 writel(v, scratch_reg0);
-   writel(offset | 0x8000, scratch_reg1);
+   writel(offset | flag, scratch_reg1);
 writel(1, spare_int);
-   for (i = 0; i < retries; i++) {
-   u32 tmp;

+   for (i = 0; i < retries; i++) {
 tmp = readl(scratch_reg1);
-   if (!(tmp & 0x8000))
+   if (!(tmp & flag))
 break;

 udelay(10);
 }
-   if (i >= retries)
-   pr_err("timeout: rlcg program reg:0x%05x failed !\n", 
offset);
+
+   if (i &g

Re: [PATCH 2/2] drm/amdgpu/pm: add new fields for Navi1x

2021-05-17 Thread Nieto, David M
[AMD Public Use]

I dont think the pp_nodes expose the vclk dclk nodes, but it might be better to 
rework this patch to expose those instead, and just add the voltages...

From: Lazar, Lijo 
Sent: Sunday, May 16, 2021 11:28 PM
To: Nieto, David M ; amd-gfx@lists.freedesktop.org 

Cc: Nieto, David M 
Subject: RE: [PATCH 2/2] drm/amdgpu/pm: add new fields for Navi1x

[AMD Public Use]

Metrics table carries dynamic state information of the ASIC. There are other 
pp_* nodes which carry static information about min/max and levels supported 
and that is a one-time query. Why there is a need to put everything in metrics 
data?

Thanks,
Lijo

-Original Message-
From: amd-gfx  On Behalf Of David M Nieto
Sent: Saturday, May 15, 2021 2:32 AM
To: amd-gfx@lists.freedesktop.org
Cc: Nieto, David M 
Subject: [PATCH 2/2] drm/amdgpu/pm: add new fields for Navi1x

Fill voltage and frequency ranges fields

Signed-off-by: David M Nieto 
Change-Id: I07f926dea46e80a96e1c972ba9dbc804b812d503
---
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   | 434 +-
 1 file changed, 417 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index ac13042672ea..a412fa9a95ec 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -505,7 +505,7 @@ static int navi10_tables_init(struct smu_context *smu)
 goto err0_out;
 smu_table->metrics_time = 0;

-   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_1);
+   smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_3);
 smu_table->gpu_metrics_table = 
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
 if (!smu_table->gpu_metrics_table)
 goto err1_out;
@@ -2627,10 +2627,11 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,
  void **table)
 {
 struct smu_table_context *smu_table = &smu->smu_table;
-   struct gpu_metrics_v1_1 *gpu_metrics =
-   (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+   struct gpu_metrics_v1_3 *gpu_metrics =
+   (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
 SmuMetrics_legacy_t metrics;
 int ret = 0;
+   int freq = 0, dpm = 0;

 mutex_lock(&smu->metrics_lock);

@@ -2646,7 +2647,7 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,

 mutex_unlock(&smu->metrics_lock);

-   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+   smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);

 gpu_metrics->temperature_edge = metrics.TemperatureEdge;
 gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot; @@ 
-2681,19 +2682,119 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct 
smu_context *smu,

 gpu_metrics->system_clock_counter = ktime_get_boottime_ns();

+   gpu_metrics->voltage_gfx = (155000 - 625 * 
metrics.CurrGfxVoltageOffset) / 100;
+   gpu_metrics->voltage_mem = (155000 - 625 * metrics.CurrMemVidOffset) / 
100;
+   gpu_metrics->voltage_soc = (155000 - 625 *
+metrics.CurrSocVoltageOffset) / 100;
+
+   gpu_metrics->max_socket_power = smu->power_limit;
+
+   /* Frequency and DPM ranges */
+
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_GFXCLK, 0, &freq);
+   if (ret)
+   goto out;
+   gpu_metrics->min_gfxclk_frequency = freq;
+
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_SOCCLK, 0, &freq);
+   if (ret)
+   goto out;
+   gpu_metrics->min_socclk_frequency = freq;
+
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_UCLK, 0, &freq);
+   if (ret)
+   goto out;
+   gpu_metrics->min_uclk_frequency = freq;
+
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_VCLK, 0, &freq);
+   if (ret)
+   goto out;
+   gpu_metrics->min_vclk0_frequency = freq;
+
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_DCLK, 0, &freq);
+   if (ret)
+   goto out;
+   gpu_metrics->min_dclk0_frequency = freq;
+
+   ret = smu_v11_0_get_dpm_level_count(smu, SMU_GFXCLK, &dpm);
+   if (ret)
+   goto out;
+   gpu_metrics->max_gfxclk_dpm = dpm;
+
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_GFXCLK,
+   gpu_metrics->max_gfxclk_dpm - 1, &freq);
+   if (ret)
+   goto out;
+
+   gpu_metrics->max_gfxclk_frequency = freq;
+
+   ret = smu_v11_0_get_dpm_level_count(smu, SMU_SOCCLK, &dpm);
+   if (ret)
+   goto out;
+
+   gpu_metrics->max_socclk_dpm = dpm;
+
+   ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_SOCCLK,
+  

Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

2021-05-17 Thread Nieto, David M
[AMD Official Use Only]

It is for unique identification of the GPU in system management applications, 
the 64 bit asic number is only available in Vega10 and later and not compliant 
with RFC4122.

David

From: Christian König 
Sent: Sunday, May 16, 2021 11:52 PM
To: Gu, JiaWei (Will) ; amd-gfx@lists.freedesktop.org 

Cc: Deng, Emily ; Nieto, David M 
Subject: Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

Am 17.05.21 um 07:54 schrieb Jiawei Gu:
> Introduce an RFC 4122 compliant UUID for the GPUs derived
> from the unique GPU serial number (from Vega10) on gpus.
> Where this serial number is not available, use a compliant
> random UUID.
>
> For virtualization, the unique ID is passed by the host driver
> in the PF2VF structure.

The question is why this is useful.

>
> Signed-off-by: Jiawei Gu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h | 36 
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 96 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c|  4 +
>   drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +-
>   drivers/gpu/drm/amd/amdgpu/nv.c |  5 ++
>   drivers/gpu/drm/amd/amdgpu/nv.h |  3 +
>   6 files changed, 146 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 3147c1c935c8..ad6d4b55be6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -802,6 +802,40 @@ struct amd_powerplay {
>  (rid == 0x01) || \
>  (rid == 0x10
>
> +union amdgpu_uuid_info {
> + struct {
> + union {
> + struct {
> + uint32_t did: 16;
> + uint32_t fcn: 8;
> + uint32_t asic_7 : 8;
> + };

Bitfields are not allowed in structures used for communication with
hardware or uAPI.

Regards,
Christian.

> + uint32_t time_low;
> + };
> +
> + struct {
> + uint32_t time_mid  : 16;
> + uint32_t time_high : 12;
> + uint32_t version   : 4;
> + };
> +
> + struct {
> + struct {
> + uint8_t clk_seq_hi : 6;
> + uint8_t variant: 2;
> + };
> + union {
> + uint8_t clk_seq_low;
> + uint8_t asic_6;
> + };
> + uint16_t asic_4;
> + };
> +
> + uint32_t asic_0;
> + };
> + char as_char[16];
> +};
> +
>   #define AMDGPU_RESET_MAGIC_NUM 64
>   #define AMDGPU_MAX_DF_PERFMONS 4
>   struct amdgpu_device {
> @@ -1074,6 +1108,8 @@ struct amdgpu_device {
>charproduct_name[32];
>charserial[20];
>
> + union amdgpu_uuid_info uuid_info;
> +
>struct amdgpu_autodump  autodump;
>
>atomic_tthrottling_logging_enabled;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7c6c435e5d02..079841e1cb52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -37,6 +37,7 @@
>   #include 
>   #include 
>   #include 
> +#include 
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   #include "amdgpu_i2c.h"
> @@ -3239,11 +3240,104 @@ static int 
> amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
>return ret;
>   }
>
> +static bool amdgpu_is_uuid_info_empty(union amdgpu_uuid_info *uuid_info)
> +{
> + return (uuid_info->time_low== 0 &&
> + uuid_info->time_mid== 0 &&
> + uuid_info->time_high   == 0 &&
> + uuid_info->version == 0 &&
> + uuid_info->clk_seq_hi  == 0 &&
> + uuid_info->variant == 0 &&
> + uuid_info->clk_seq_low == 0 &&
> + uuid_info->asic_4  == 0 &&
> + uuid_info->asic_0  == 0);
> +}
> +
> +static void amdgpu_gen_uuid_info(union amdgpu_uuid_info *uuid_info,
> + uint64_t serial, uint16_t did, uint8_t idx)
> +{
> + uint16_t clk_seq 

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-17 Thread Nieto, David M
[AMD Official Use Only - Internal Distribution Only]

The serial number is ASIC information, not VBIOS information, and it is still 
available as a sysfs node... I don't think we should put it there.

Not sure what dbdf stands for.

From: Gu, JiaWei (Will) 
Sent: Monday, May 17, 2021 7:11 PM
To: Koenig, Christian ; amd-gfx@lists.freedesktop.org 
; Nieto, David M ; 
mar...@gmail.com ; Deucher, Alexander 

Cc: Deng, Emily 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface

[AMD Official Use Only - Internal Distribution Only]

So I guess the dbdf is also needed to be removed?
And how about serial?

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf; // do we need this?
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial; // do we need this?
> +};

Best regards,
Jiawei

-Original Message-
From: Koenig, Christian 
Sent: Monday, May 17, 2021 8:26 PM
To: Gu, JiaWei (Will) ; amd-gfx@lists.freedesktop.org; 
Nieto, David M ; mar...@gmail.com; Deucher, Alexander 

Cc: Deng, Emily 
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

I'm not very familiar with the technical background why we have the fields here 
once more.

But of hand we should at least remove everything which is also available from 
the PCI information.

E.g. dev_id, rev_id, sub_dev_id, sub_ved_id.

Regards,
Christian.

Am 17.05.21 um 14:17 schrieb Gu, JiaWei (Will):
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi all,
>
> Thanks Christian's suggestion.
> I reverted the previous patches and squash them into this single one.
>
> As this patch shows, the current uapi change looks like this:
>
> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf;
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial;
> + __u32 dev_id;
> + __u32 rev_id;
> + __u32 sub_dev_id;
> + __u32 sub_ved_id;
> +};
>
> As we know there's some redundant info in this struct.
> Please feel free to give any comments or suggestion about what it should & 
> shouldn't include.
>
> Best regards,
> Jiawei
>
> -Original Message-
> From: Jiawei Gu 
> Sent: Monday, May 17, 2021 8:08 PM
> To: amd-gfx@lists.freedesktop.org; Koenig, Christian
> ; Nieto, David M ;
> mar...@gmail.com; Deucher, Alexander 
> Cc: Deng, Emily ; Gu, JiaWei (Will)
> 
> Subject: [PATCH] drm/amdgpu: Add vbios info ioctl interface
>
> Add AMDGPU_INFO_VBIOS_INFO subquery id for detailed vbios info.
>
> Provides a way for the user application to get the VBIOS information without 
> having to parse the binary.
> It is useful for the user to be able to display in a simple way the VBIOS 
> version in their system if they happen to encounter an issue.
>
> V2:
> Use numeric serial.
> Parse and expose vbios version string.
>
> Signed-off-by: Jiawei Gu 
> Acked-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  21 +++
>   drivers/gpu/drm/amd/amdgpu/atom.c  | 174 +
>   drivers/gpu/drm/amd/amdgpu/atom.h  |  12 ++
>   drivers/gpu/drm/amd/include/atomfirmware.h |   5 +
>   include/uapi/drm/amdgpu_drm.h  |  16 ++
>   5 files changed, 228 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 8d12e474745a..30e4fed3de22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -861,6 +861,27 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *filp)
>min((size_t)size, 
> (size_t)(bios_size - bios_offset)))
>? -EFAULT : 0;
>}
> + case AMDGPU_INFO_VBIOS_INFO: {
> + struct drm_amdgpu_info_vbios vbios_info = {};
> + struct atom_context *atom_context;
> +
> + atom_context = adev->mode_info.atom_context;
> + memcpy(vbios_info.name, atom_context->name, 
> sizeof(atom_context->name));
> + vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, 
> adev->pdev->devfn);
> + memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, 
> sizeof(atom_context->vbios_pn));
> + vbios_info.version = atom_context->version;
> + memcpy(vbios_info.vbios_ver_str, 
> atom_context->vbios_ver_str,
> +  

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-17 Thread Nieto, David M
[Public]

Yes, let's remove that too,

Thanks,

David

From: Gu, JiaWei (Will) 
Sent: Monday, May 17, 2021 8:07 PM
To: Nieto, David M ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org 
; mar...@gmail.com ; Deucher, 
Alexander 
Cc: Deng, Emily 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface


[AMD Official Use Only - Internal Distribution Only]



OK let’s remove serial.



dbdf comes from this:

vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, adev->pdev->devfn);



I think we can remove dbdf as well.



Best regards,

Jiawei



From: Nieto, David M 
Sent: Tuesday, May 18, 2021 10:45 AM
To: Gu, JiaWei (Will) ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org; mar...@gmail.com; 
Deucher, Alexander 
Cc: Deng, Emily 
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



The serial number is ASIC information, not VBIOS information, and it is still 
available as a sysfs node... I don't think we should put it there.



Not sure what dbdf stands for.



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 7:11 PM
To: Koenig, Christian 
mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com<mailto:mar...@gmail.com> 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]

So I guess the dbdf is also needed to be removed?
And how about serial?

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf; // do we need this?
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial; // do we need this?
> +};

Best regards,
Jiawei

-Original Message-
From: Koenig, Christian 
mailto:christian.koe...@amd.com>>
Sent: Monday, May 17, 2021 8:26 PM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com<mailto:mar...@gmail.com>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

I'm not very familiar with the technical background why we have the fields here 
once more.

But of hand we should at least remove everything which is also available from 
the PCI information.

E.g. dev_id, rev_id, sub_dev_id, sub_ved_id.

Regards,
Christian.

Am 17.05.21 um 14:17 schrieb Gu, JiaWei (Will):
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi all,
>
> Thanks Christian's suggestion.
> I reverted the previous patches and squash them into this single one.
>
> As this patch shows, the current uapi change looks like this:
>
> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf;
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial;
> + __u32 dev_id;
> + __u32 rev_id;
> + __u32 sub_dev_id;
> + __u32 sub_ved_id;
> +};
>
> As we know there's some redundant info in this struct.
> Please feel free to give any comments or suggestion about what it should & 
> shouldn't include.
>
> Best regards,
> Jiawei
>
> -Original Message-
> From: Jiawei Gu mailto:jiawei...@amd.com>>
> Sent: Monday, May 17, 2021 8:08 PM
> To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; 
> Koenig, Christian
> mailto:christian.koe...@amd.com>>; Nieto, David M 
> mailto:david.ni...@amd.com>>;
> mar...@gmail.com<mailto:mar...@gmail.com>; Deucher, Alexander 
> mailto:alexander.deuc...@amd.com>>
> Cc: Deng, Emily mailto:emily.d...@amd.com>>; Gu, JiaWei 
> (Will)
> mailto:jiawei...@amd.com>>
> Subject: [PATCH] drm/amdgpu: Add vbios info ioctl interface
>
> Add AMDGPU_INFO_VBIOS_INFO subquery id for detailed vbios info.
>
> Provides a way for the user application to get the VBIOS information without 
> having to parse the binary.
> It is useful for the user to be able to display in a simple way the VBIOS 
> version in their system if they happen to encounter an issue.
>
> V2:
> Use numeric serial.
> Parse and expose vbios version string.
>
> Signed-off-by: Jiawei Gu mailto:jiawei...@amd.com>>

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-18 Thread Nieto, David M
[Public]

That looks like the right output to me.

From: Gu, JiaWei (Will) 
Sent: Monday, May 17, 2021 10:58 PM
To: Nieto, David M ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org 
; mar...@gmail.com ; Deucher, 
Alexander 
Cc: Deng, Emily 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface


[Public]


Hi all,



Then the struct looks like:



> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};



Sample output:



vbios name : NAVI12 A0 XT D30501 8GB EVAL 1150e/334m HYN/SAM
vbios pn : 113-D3050100-104
vbios version : 285409288
vbios ver_str : 017.003.000.008.016956
vbios date : 2021/05/03 23:32


Please help double confirm that we’re all fine with it and there’s no need to 
add & remove anything.



Best regards,

Jiawei



From: Nieto, David M 
Sent: Tuesday, May 18, 2021 12:40 PM
To: Gu, JiaWei (Will) ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org; mar...@gmail.com; 
Deucher, Alexander 
Cc: Deng, Emily 
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[Public]



Yes, let's remove that too,



Thanks,



David



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 8:07 PM
To: Nieto, David M mailto:david.ni...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>; 
mar...@gmail.com<mailto:mar...@gmail.com> 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



OK let’s remove serial.



dbdf comes from this:

vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, adev->pdev->devfn);



I think we can remove dbdf as well.



Best regards,

Jiawei



From: Nieto, David M mailto:david.ni...@amd.com>>
Sent: Tuesday, May 18, 2021 10:45 AM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; 
mar...@gmail.com<mailto:mar...@gmail.com>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



The serial number is ASIC information, not VBIOS information, and it is still 
available as a sysfs node... I don't think we should put it there.



Not sure what dbdf stands for.



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 7:11 PM
To: Koenig, Christian 
mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com<mailto:mar...@gmail.com> 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]

So I guess the dbdf is also needed to be removed?
And how about serial?

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf; // do we need this?
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial; // do we need this?
> +};

Best regards,
Jiawei

-Original Message-
From: Koenig, Christian 
mailto:christian.koe...@amd.com>>
Sent: Monday, May 17, 2021 8:26 PM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com<mailto:mar...@gmail.com>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

I'm not very familiar with the technical background why we have the fields here 
once more.

But of hand we should at least remove everything which is also available from 
the PCI information.

E.g. dev_id, rev_id, sub_dev_id, sub_ved_id.

Regards,
Christian.

Am 17.05.21 um 14:17 schrieb Gu, JiaWei (Will):
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi all,
>
> Thanks Christian's suggestion.
> I reverted the previous patches and squash them into this single 

Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

2021-05-18 Thread Nieto, David M
[AMD Official Use Only]

I think the sysfs node should be moved into amdgpu_pm instead of the 
amdgpu_device.c and generation of the unique_id should be moved to 
navi10_ppt.c, similarly to other chips.

Thinking it better, generating a random UUID makes no sense in the driver 
level, any application can do the same thing on userspace if the UUID sysfs 
node is empty.

So, I think we should do the same as with the unique_id node, if the unique_id 
is not present, just return.

David

From: Alex Deucher 
Sent: Tuesday, May 18, 2021 7:12 AM
To: Gu, JiaWei (Will) 
Cc: amd-gfx list ; Deng, Emily 
; Nieto, David M 
Subject: Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

On Mon, May 17, 2021 at 1:54 AM Jiawei Gu  wrote:
>
> Introduce an RFC 4122 compliant UUID for the GPUs derived
> from the unique GPU serial number (from Vega10) on gpus.
> Where this serial number is not available, use a compliant
> random UUID.
>
> For virtualization, the unique ID is passed by the host driver
> in the PF2VF structure.
>
> Signed-off-by: Jiawei Gu 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 36 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 96 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c|  4 +
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c |  5 ++
>  drivers/gpu/drm/amd/amdgpu/nv.h |  3 +
>  6 files changed, 146 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 3147c1c935c8..ad6d4b55be6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -802,6 +802,40 @@ struct amd_powerplay {
>   (rid == 0x01) || \
>   (rid == 0x10
>
> +union amdgpu_uuid_info {
> +   struct {
> +   union {
> +   struct {
> +   uint32_t did: 16;
> +   uint32_t fcn: 8;
> +   uint32_t asic_7 : 8;
> +   };
> +   uint32_t time_low;
> +   };
> +
> +   struct {
> +   uint32_t time_mid  : 16;
> +   uint32_t time_high : 12;
> +   uint32_t version   : 4;
> +   };
> +
> +   struct {
> +   struct {
> +   uint8_t clk_seq_hi : 6;
> +   uint8_t variant: 2;
> +   };
> +   union {
> +   uint8_t clk_seq_low;
> +   uint8_t asic_6;
> +   };
> +   uint16_t asic_4;
> +   };
> +
> +   uint32_t asic_0;
> +   };
> +   char as_char[16];
> +};
> +
>  #define AMDGPU_RESET_MAGIC_NUM 64
>  #define AMDGPU_MAX_DF_PERFMONS 4
>  struct amdgpu_device {
> @@ -1074,6 +1108,8 @@ struct amdgpu_device {
> charproduct_name[32];
> charserial[20];
>
> +   union amdgpu_uuid_info uuid_info;
> +
> struct amdgpu_autodump  autodump;
>
> atomic_tthrottling_logging_enabled;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7c6c435e5d02..079841e1cb52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
>  #include "amdgpu_i2c.h"
> @@ -3239,11 +3240,104 @@ static int 
> amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
> return ret;
>  }
>
> +static bool amdgpu_is_uuid_info_empty(union amdgpu_uuid_info *uuid_info)
> +{
> +   return (uuid_info->time_low== 0 &&
> +   uuid_info->time_mid== 0 &&
> +   uuid_info->time_high   == 0 &&
> +   uuid_info->version == 0 &&
> +   uuid_info->clk_seq_hi  == 0 &&
> +   uuid_info->variant == 0 &&
> +   uuid_info->clk_seq_low == 0 &&
> +   uuid_info->asic_4  == 0 &&
> +   uuid_info->asic_0  ==

Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface

2021-05-18 Thread Nieto, David M
[Public]

That is correct, it is 0x11030008, that matches the FW information.

From: Lazar, Lijo 
Sent: Tuesday, May 18, 2021 3:50 AM
To: Gu, JiaWei (Will) ; Nieto, David M 
; Koenig, Christian ; 
amd-gfx@lists.freedesktop.org ; mar...@gmail.com 
; Deucher, Alexander 
Cc: Deng, Emily 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface


[Public]


Not sure about this one –  vbios version : 285409288



Windows driver reports VBIOS version from atom_firmware_info_v3_1 / 
firmware_revision.



Thanks,

Lijo



From: amd-gfx  On Behalf Of Gu, JiaWei 
(Will)
Sent: Tuesday, May 18, 2021 11:28 AM
To: Nieto, David M ; Koenig, Christian 
; amd-gfx@lists.freedesktop.org; mar...@gmail.com; 
Deucher, Alexander 
Cc: Deng, Emily 
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[Public]



[Public]



Hi all,



Then the struct looks like:



> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> +};



Sample output:



vbios name : NAVI12 A0 XT D30501 8GB EVAL 1150e/334m HYN/SAM
vbios pn : 113-D3050100-104
vbios version : 285409288
vbios ver_str : 017.003.000.008.016956
vbios date : 2021/05/03 23:32

Please help double confirm that we’re all fine with it and there’s no need to 
add & remove anything.



Best regards,

Jiawei



From: Nieto, David M mailto:david.ni...@amd.com>>
Sent: Tuesday, May 18, 2021 12:40 PM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; 
mar...@gmail.com<mailto:mar...@gmail.com>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[Public]



Yes, let's remove that too,



Thanks,



David



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 8:07 PM
To: Nieto, David M mailto:david.ni...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>; 
mar...@gmail.com<mailto:mar...@gmail.com> 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



OK let’s remove serial.



dbdf comes from this:

vbios_info.dbdf = PCI_DEVID(adev->pdev->bus->number, adev->pdev->devfn);



I think we can remove dbdf as well.



Best regards,

Jiawei



From: Nieto, David M mailto:david.ni...@amd.com>>
Sent: Tuesday, May 18, 2021 10:45 AM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; Koenig, 
Christian mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; 
mar...@gmail.com<mailto:mar...@gmail.com>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: Re: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]



The serial number is ASIC information, not VBIOS information, and it is still 
available as a sysfs node... I don't think we should put it there.



Not sure what dbdf stands for.



From: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>
Sent: Monday, May 17, 2021 7:11 PM
To: Koenig, Christian 
mailto:christian.koe...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>; Nieto, 
David M mailto:david.ni...@amd.com>>; 
mar...@gmail.com<mailto:mar...@gmail.com> 
mailto:mar...@gmail.com>>; Deucher, Alexander 
mailto:alexander.deuc...@amd.com>>
Cc: Deng, Emily mailto:emily.d...@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: Add vbios info ioctl interface



[AMD Official Use Only - Internal Distribution Only]

So I guess the dbdf is also needed to be removed?
And how about serial?

> +struct drm_amdgpu_info_vbios {
> + __u8 name[64];
> + __u32 dbdf; // do we need this?
> + __u8 vbios_pn[64];
> + __u32 version;
> + __u8 vbios_ver_str[32];
> + __u8 date[32];
> + __u64 serial; // do we need this?
> +};

Best regards,
Jiawei

-Original Message-
From: Koenig, Christian 
mailto:christian.koe...@amd.com>>
Sent: Monday, May 17, 2021 8:26 PM
To: Gu, JiaWei (Will) mailto:jiawei...@amd.com>>; 
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Nieto, 
David M mailto:david.ni...

Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

2021-05-19 Thread Nieto, David M
[AMD Official Use Only]

For the case of virtualization, for example, the serial number has no relation 
to the uuid. Which means that at least for virtualization the node needs to be 
created. This may also be the case on other gpus.


From: Christian König 
Sent: Wednesday, May 19, 2021 3:58:35 AM
To: Nieto, David M ; Alex Deucher ; 
Gu, JiaWei (Will) 
Cc: Deng, Emily ; amd-gfx list 

Subject: Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

Well I don't think generating an UUID in the kernel makes sense in general.

What we can do is to expose the serial number of the device, so that userspace 
can create an UUID if necessary.

Christian.

Am 18.05.21 um 22:37 schrieb Nieto, David M:

[AMD Official Use Only]

I think the sysfs node should be moved into amdgpu_pm instead of the 
amdgpu_device.c and generation of the unique_id should be moved to 
navi10_ppt.c, similarly to other chips.

Thinking it better, generating a random UUID makes no sense in the driver 
level, any application can do the same thing on userspace if the UUID sysfs 
node is empty.

So, I think we should do the same as with the unique_id node, if the unique_id 
is not present, just return.

David

From: Alex Deucher <mailto:alexdeuc...@gmail.com>
Sent: Tuesday, May 18, 2021 7:12 AM
To: Gu, JiaWei (Will) <mailto:jiawei...@amd.com>
Cc: amd-gfx list 
<mailto:amd-gfx@lists.freedesktop.org>; Deng, 
Emily <mailto:emily.d...@amd.com>; Nieto, David M 
<mailto:david.ni...@amd.com>
Subject: Re: [PATCH] drm/amdgpu: Expose rfc4122 compliant UUID

On Mon, May 17, 2021 at 1:54 AM Jiawei Gu 
<mailto:jiawei...@amd.com> wrote:
>
> Introduce an RFC 4122 compliant UUID for the GPUs derived
> from the unique GPU serial number (from Vega10) on gpus.
> Where this serial number is not available, use a compliant
> random UUID.
>
> For virtualization, the unique ID is passed by the host driver
> in the PF2VF structure.
>
> Signed-off-by: Jiawei Gu <mailto:jiawei...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 36 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 96 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c|  4 +
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  4 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c |  5 ++
>  drivers/gpu/drm/amd/amdgpu/nv.h |  3 +
>  6 files changed, 146 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 3147c1c935c8..ad6d4b55be6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -802,6 +802,40 @@ struct amd_powerplay {
>   (rid == 0x01) || \
>   (rid == 0x10
>
> +union amdgpu_uuid_info {
> +   struct {
> +   union {
> +   struct {
> +   uint32_t did: 16;
> +   uint32_t fcn: 8;
> +   uint32_t asic_7 : 8;
> +   };
> +   uint32_t time_low;
> +   };
> +
> +   struct {
> +   uint32_t time_mid  : 16;
> +   uint32_t time_high : 12;
> +   uint32_t version   : 4;
> +   };
> +
> +   struct {
> +   struct {
> +   uint8_t clk_seq_hi : 6;
> +   uint8_t variant: 2;
> +   };
> +   union {
> +   uint8_t clk_seq_low;
> +   uint8_t asic_6;
> +   };
> +   uint16_t asic_4;
> +   };
> +
> +   uint32_t asic_0;
> +   };
> +   char as_char[16];
> +};
> +
>  #define AMDGPU_RESET_MAGIC_NUM 64
>  #define AMDGPU_MAX_DF_PERFMONS 4
>  struct amdgpu_device {
> @@ -1074,6 +1108,8 @@ struct amdgpu_device {
> charproduct_name[32];
> charserial[20];
>
> +   union amdgpu_uuid_info uuid_info;
> +
> struct amdgpu_autodump  autodump;
>
> atomic_tthrottling_logging_enabled;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7c6c435e5d02..079841e1cb52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#