On 6/7/2024 12:31 PM, SRINIVASAN SHANMUGAM wrote:
>
> On 6/6/2024 10:58 PM, Lazar, Lijo wrote:
>> On 6/6/2024 5:35 PM, Srinivasan Shanmugam wrote:
>>> Previously, this check was performed in the gfx_v9_4_3_sw_init function,
>>> and the amdgpu_gfx_sysfs_compute_init function was only called if the
>>> GPU was not a VF in SR-IOV mode. This is because the sysfs entries
>>> created by amdgpu_gfx_sysfs_compute_init are specific to compute
>>> partitions, which are only applicable on GFX9 and not on a VF in SR-IOV
>>> mode.
>>>
>>> By moving the check into amdgpu_gfx_sysfs_compute_init, we make this
>>> function responsible for deciding whether or not to create the compute
>>> partition sysfs entries.
>>>
>>> This change improves the code organization and maintainability. If in
>>> the future the conditions for creating the compute partition sysfs
>>> entries change, we would only need to update the
>>> amdgpu_gfx_sysfs_compute_init function.
>> This is not correct. If this has to be true, this will reside somewhere
>> in amdgpu_gfx and you would also need IP version inside this one. If for
>> a new IP version say gfx v9.4.5 this needs to be created for VF also,
>
> In this case, how about below?
>
> int amdgpu_gfx_sysfs_compute_init(struct amdgpu_device *adev, bool
> check_sriov)
> {
> int r;
>
> if (!check_sriov || !amdgpu_sriov_vf(adev)) {
> r = device_create_file(adev->dev,
> &dev_attr_current_compute_partition);
> if (r)
> return r;
>
> r = device_create_file(adev->dev,
> &dev_attr_available_compute_partition);
> if (r)
> return r;
> }
>
> return 0;
> }
>
> In gfx_v9_4_3_sw_init you would call amdgpu_gfx_sysfs_compute_init(adev,
> true),
>
> to perform the check, and in gfx_v9_4_5_sw_init you would call
> amdgpu_gfx_sysfs_compute_init(adev, false) to skip the check.
>
> This way, we can control the behavior of the function without needing to
> put condition in IP code version.?
>
> But would like have Christian's view also, onto this "a new IP version
> say gfx v9.4.5 this needs to be created for VF also,
>
Drop the patch. As you see, the patch is just adding more complexity
with more variables rather than simplifying anything.
Thanks,
Lijo
> "
>
>> then this check here won't work. This is the specific reason why we put
>> the conditions inside IP code.
>>
>> Thanks,
>> Lijo
>>
>>> Cc: Alex Deucher <alexander.deuc...@amd.com>
>>> Cc: Christian König <christian.koe...@amd.com>
>>> Suggested-by: Christian König <christian.koe...@amd.com>
>>> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmu...@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 24 +++++++++++++++---------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 ++--
>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 11 +++++------
>>> 3 files changed, 22 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 19b1817b55d7..72477a5aedca 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -1376,21 +1376,27 @@ static DEVICE_ATTR(current_compute_partition, 0644,
>>> static DEVICE_ATTR(available_compute_partition, 0444,
>>> amdgpu_gfx_get_available_compute_partition, NULL);
>>>
>>> -int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
>>> +int amdgpu_gfx_sysfs_compute_init(struct amdgpu_device *adev)
>>> {
>>> int r;
>>>
>>> - r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
>>> - if (r)
>>> - return r;
>>> + if (!amdgpu_sriov_vf(adev)) {
>>> + r = device_create_file(adev->dev,
>>> &dev_attr_current_compute_partition);
>>> + if (r)
>>> + return r;
>>>
>>> - r = device_create_file(adev->dev,
>>> &dev_attr_available_compute_partition);
>>> + r = device_create_file(adev->dev,
>>> &dev_attr_available_compute_partition);
>>> + if (r)
>>> + return r;
>>> + }
>>>
>>> - return r;
>>> + return 0;
>>> }
>>>
>>> -void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
>>> +void amdgpu_gfx_sysfs_compute_fini(struct amdgpu_device *adev)
>>> {
>>> - device_remove_file(adev->dev, &dev_attr_current_compute_partition);
>>> - device_remove_file(adev->dev, &dev_attr_available_compute_partition);
>>> + if (!amdgpu_sriov_vf(adev)) {
>>> + device_remove_file(adev->dev,
>>> &dev_attr_current_compute_partition);
>>> + device_remove_file(adev->dev,
>>> &dev_attr_available_compute_partition);
>>> + }
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> index 6b0416777c5b..b65c459b3aa9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> @@ -533,8 +533,8 @@ int amdgpu_gfx_poison_consumption_handler(struct
>>> amdgpu_device *adev,
>>> struct amdgpu_iv_entry *entry);
>>>
>>> bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
>>> -int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
>>> -void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
>>> +int amdgpu_gfx_sysfs_compute_init(struct amdgpu_device *adev);
>>> +void amdgpu_gfx_sysfs_compute_fini(struct amdgpu_device *adev);
>>> void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
>>> void *ras_error_status,
>>> void (*func)(struct amdgpu_device *adev, void *ras_error_status,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
>>> index aecc2bcea145..07ce614ef282 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
>>> @@ -939,11 +939,11 @@ static int gfx_v9_4_3_sw_init(void *handle)
>>> if (r)
>>> return r;
>>>
>>> + r = amdgpu_gfx_sysfs_compute_init(adev);
>>> + if (r)
>>> + return r;
>>>
>>> - if (!amdgpu_sriov_vf(adev))
>>> - r = amdgpu_gfx_sysfs_init(adev);
>>> -
>>> - return r;
>>> + return 0;
>>> }
>>>
>>> static int gfx_v9_4_3_sw_fini(void *handle)
>>> @@ -964,8 +964,7 @@ static int gfx_v9_4_3_sw_fini(void *handle)
>>> gfx_v9_4_3_mec_fini(adev);
>>> amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
>>> gfx_v9_4_3_free_microcode(adev);
>>> - if (!amdgpu_sriov_vf(adev))
>>> - amdgpu_gfx_sysfs_fini(adev);
>>> + amdgpu_gfx_sysfs_compute_fini(adev);
>>>
>>> return 0;
>>> }