By caching some values in local variables we can allow the compiler to emit more compact code because it does not have to reload those values constantly.
Before and after size comparisons: text data bss dec hex filename 10708384 547307 213512 11469203 af0193 amdgpu.ko.0 10688632 547307 213512 11449451 aeb46b amdgpu.ko.1 add/remove: 0/0 grow/shrink: 3/340 up/down: 29/-20025 (-19996) Function old new delta amdgpu_ring_write_multiple 600 612 +12 amdgpu_umsch_mm_submit_pkt 196 207 +11 amdgpu_ring_write_multiple.constprop 453 459 +6 vcn_v2_0_enc_ring_insert_end 69 64 -5 ... jpeg_v4_0_3_dec_ring_emit_ib 1281 1045 -236 jpeg_v2_0_dec_ring_emit_ib 1402 1147 -255 jpeg_v1_0_decode_ring_emit_fence 1788 1507 -281 Total: Before=8949691, After=8929695, chg -0.22% Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index afaf951b0b78..d37e822ff46e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -467,8 +467,10 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) { - ring->ring[ring->wptr++ & ring->buf_mask] = v; - ring->wptr &= ring->ptr_mask; + u64 wptr = ring->wptr; + + ring->ring[wptr++ & ring->buf_mask] = v; + ring->wptr = wptr & ring->ptr_mask; ring->count_dw--; } @@ -476,9 +478,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) { unsigned occupied, chunk1, chunk2; + u32 buf_mask = ring->buf_mask; + u64 wptr = ring->wptr; - occupied = ring->wptr & ring->buf_mask; - chunk1 = ring->buf_mask + 1 - occupied; + occupied = wptr & buf_mask; + chunk1 = buf_mask + 1 - occupied; chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; chunk2 = count_dw - chunk1; chunk1 <<= 2; @@ -492,8 +496,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, memcpy(ring->ring, src, chunk2); } - ring->wptr += count_dw; - ring->wptr &= ring->ptr_mask; + wptr += count_dw; + ring->wptr = wptr & ring->ptr_mask; ring->count_dw -= count_dw; } -- 2.48.0