On 2019-10-25 2:54 a.m., Koenig, Christian wrote:
> Am 25.10.19 um 01:44 schrieb Tuikov, Luben:
>> Simplify padding calculations.
>>
>> Signed-off-by: Luben Tuikov <luben.tui...@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
>>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++-----
>>   5 files changed, 17 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>> index c45304f1047c..1ea9e18d6f08 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>      u32 extra_bits = vmid & 0xf;
>>   
>>      /* IB packet must end on a 8 DW boundary */
>> -    cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 
>> 8);
>> +    cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
>>   
>>      amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, 
>> extra_bits));
>>      amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 
>> byte aligned */
>> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib)
>>      u32 pad_count;
>>      int i;
>>   
>> -    pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +    pad_count = (-ib->length_dw) & 7;
>>      for (i = 0; i < pad_count; i++)
>>              if (sdma && sdma->burst_nop && (i == 0))
>>                      ib->ptr[ib->length_dw++] =
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>> index a10175838013..d340f179401a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>      unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>   
>>      /* IB packet must end on a 8 DW boundary */
>> -    sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
>> % 8);
>> +    sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>   
>>      amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>                        SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib
>>      u32 pad_count;
>>      int i;
>>   
>> -    pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +    pad_count = (-ib->length_dw) & 7;
>>      for (i = 0; i < pad_count; i++)
>>              if (sdma && sdma->burst_nop && (i == 0))
>>                      ib->ptr[ib->length_dw++] =
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>> index 5f4e2c616241..5c3c310188b6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>      unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>   
>>      /* IB packet must end on a 8 DW boundary */
>> -    sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
>> % 8);
>> +    sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>   
>>      amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>                        SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib
>>      u32 pad_count;
>>      int i;
>>   
>> -    pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +    pad_count = (-ib->length_dw) & 7;
>>      for (i = 0; i < pad_count; i++)
>>              if (sdma && sdma->burst_nop && (i == 0))
>>                      ib->ptr[ib->length_dw++] =
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> index 45bd538ba97e..7c71c88e38a4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>      unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>   
>>      /* IB packet must end on a 8 DW boundary */
>> -    sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
>> % 8);
>> +    sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>   
>>      amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>                        SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib
>>      u32 pad_count;
>>      int i;
>>   
>> -    pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +    pad_count = (-ib->length_dw) & 7;
>>      for (i = 0; i < pad_count; i++)
>>              if (sdma && sdma->burst_nop && (i == 0))
>>                      ib->ptr[ib->length_dw++] =
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> index 0c41b4fdc58b..67ede9e4df01 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> @@ -382,8 +382,12 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring 
>> *ring,
>>      unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>      uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
>>   
>> -    /* IB packet must end on a 8 DW boundary */
>> -    sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) 
>> % 8);
>> +    /* An IB packet must end on a 8 DW boundary--the next dword must
>> +     * be on a 8-dword boundary. Our IB below is 6 dwords long,
> 
> That should probably read "our IB command" or "our IB packet".

Done.

> 
>> +     * thus add x number of NOPs, such that
>> +     * wptr + 6 + x = 8k, k >= 0.
> 
> Maybe write (wrptr + 6 + x) % 8 = 0 to make that more obvious.

That's less obvious. In my mind I always translate it to
a congruence expression. It's what I used to derive the algebraic
simplifications of this patch.

I'll add the highly cryptic "(wrptr + 6 + x) % 8 = 0" to make you happy,
below the algebraic expression already there.

> 
>> +     */
>> +    sdma_v5_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
> 
> Spaces between operators please.

Yeah, I wanted to put an emphasis on the AND-op, but I'll add them.

Regards,
Luben


> 
> Apart form that looks good to me,
> Christian.
> 
>>   
>>      amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>                        SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1086,10 +1090,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct 
>> amdgpu_ib *ib,
>>   }
>>   
>>   /**
>> - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
>> - *
>> + * sdma_v5_0_ring_pad_ib - pad the IB
>>    * @ib: indirect buffer to fill with padding
>>    *
>> + * Pad the IB with NOPs to a boundary multiple of 8.
>>    */
>>   static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct 
>> amdgpu_ib *ib)
>>   {
>> @@ -1097,7 +1101,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring 
>> *ring, struct amdgpu_ib *ib
>>      u32 pad_count;
>>      int i;
>>   
>> -    pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>> +    pad_count = (-ib->length_dw) & 0x7;
>>      for (i = 0; i < pad_count; i++)
>>              if (sdma && sdma->burst_nop && (i == 0))
>>                      ib->ptr[ib->length_dw++] =
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to