On 11.03.2015 16:44, Alex Deucher wrote: > radeon_bo_create() calls radeon_ttm_placement_from_domain() > before ttm_bo_init() is called. radeon_ttm_placement_from_domain() > uses the ttm bo size to determine when to select top down > allocation but since the ttm bo is not initialized yet the > check is always false. > > Noticed-by: Oded Gabbay <oded.gabbay at amd.com> > Signed-off-by: Alex Deucher <alexander.deucher at amd.com> > Cc: stable at vger.kernel.org
And I was already wondering why the heck the BOs always made this ping/pong in memory after creation. Patch is Reviewed-by: Christian König <christian.koenig at amd.com> Regards, Christian. > --- > drivers/gpu/drm/radeon/radeon.h | 3 ++- > drivers/gpu/drm/radeon/radeon_gem.c | 2 +- > drivers/gpu/drm/radeon/radeon_mn.c | 2 +- > drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++------- > drivers/gpu/drm/radeon/radeon_ttm.c | 12 ++++++++---- > 5 files changed, 22 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 5587603..726e89f 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device > *rdev); > extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); > extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int > enable); > extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int > enable); > -extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 > domain); > +extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 > domain, > + u64 size); > extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); > extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, > uint32_t flags); > diff --git a/drivers/gpu/drm/radeon/radeon_gem.c > b/drivers/gpu/drm/radeon/radeon_gem.c > index ac3c131..d613d0c 100644 > --- a/drivers/gpu/drm/radeon/radeon_gem.c > +++ b/drivers/gpu/drm/radeon/radeon_gem.c > @@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void > *data, > goto release_object; > } > > - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); > + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, > bo->tbo.mem.size); > r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); > radeon_bo_unreserve(bo); > up_read(¤t->mm->mmap_sem); > diff --git a/drivers/gpu/drm/radeon/radeon_mn.c > b/drivers/gpu/drm/radeon/radeon_mn.c > index a69bd44..e51f09b 100644 > --- a/drivers/gpu/drm/radeon/radeon_mn.c > +++ b/drivers/gpu/drm/radeon/radeon_mn.c > @@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct > mmu_notifier *mn, > DRM_ERROR("(%d) failed to wait for user bo\n", > r); > } > > - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); > + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, > bo->tbo.mem.size); > r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); > if (r) > DRM_ERROR("(%d) failed to validate user bo\n", r); > diff --git a/drivers/gpu/drm/radeon/radeon_object.c > b/drivers/gpu/drm/radeon/radeon_object.c > index 43e0994..07f8fd5 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object > *bo) > return false; > } > > -void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) > +void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain, > + u64 size) > { > u32 c = 0, i; > > @@ -179,7 +180,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo > *rbo, u32 domain) > * improve fragmentation quality. > * 512kb was measured as the most optimal number. > */ > - if (rbo->tbo.mem.size > 512 * 1024) { > + if (size > 512 * 1024) { > for (i = 0; i < c; i++) { > rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; > } > @@ -252,7 +253,7 @@ int radeon_bo_create(struct radeon_device *rdev, > bo->flags &= ~RADEON_GEM_GTT_WC; > #endif > > - radeon_ttm_placement_from_domain(bo, domain); > + radeon_ttm_placement_from_domain(bo, domain, size); > /* Kernel allocation are uninterruptible */ > down_read(&rdev->pm.mclk_lock); > r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, > @@ -350,7 +351,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 > domain, u64 max_offset, > > return 0; > } > - radeon_ttm_placement_from_domain(bo, domain); > + radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size); > for (i = 0; i < bo->placement.num_placement; i++) { > /* force to pin into visible video ram */ > if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && > @@ -557,7 +558,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, > } > > retry: > - radeon_ttm_placement_from_domain(bo, domain); > + radeon_ttm_placement_from_domain(bo, domain, > bo->tbo.mem.size); > if (ring == R600_RING_TYPE_UVD_INDEX) > radeon_uvd_force_into_uvd_segment(bo, allowed); > > @@ -800,7 +801,8 @@ int radeon_bo_fault_reserve_notify(struct > ttm_buffer_object *bo) > return 0; > > /* hurrah the memory is not visible ! */ > - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); > + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM, > + rbo->tbo.mem.size); > lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; > for (i = 0; i < rbo->placement.num_placement; i++) { > /* Force into visible VRAM */ > @@ -810,7 +812,8 @@ int radeon_bo_fault_reserve_notify(struct > ttm_buffer_object *bo) > } > r = ttm_bo_validate(bo, &rbo->placement, false, false); > if (unlikely(r == -ENOMEM)) { > - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); > + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, > + rbo->tbo.mem.size); > return ttm_bo_validate(bo, &rbo->placement, false, false); > } else if (unlikely(r != 0)) { > return r; > diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c > b/drivers/gpu/drm/radeon/radeon_ttm.c > index d02aa1d..ce8ed2d 100644 > --- a/drivers/gpu/drm/radeon/radeon_ttm.c > +++ b/drivers/gpu/drm/radeon/radeon_ttm.c > @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object > *bo, > switch (bo->mem.mem_type) { > case TTM_PL_VRAM: > if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == > false) > - radeon_ttm_placement_from_domain(rbo, > RADEON_GEM_DOMAIN_CPU); > + radeon_ttm_placement_from_domain(rbo, > RADEON_GEM_DOMAIN_CPU, > + rbo->tbo.mem.size); > else if (rbo->rdev->mc.visible_vram_size < > rbo->rdev->mc.real_vram_size && > bo->mem.start < (rbo->rdev->mc.visible_vram_size >> > PAGE_SHIFT)) { > unsigned fpfn = rbo->rdev->mc.visible_vram_size >> > PAGE_SHIFT; > @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object > *bo, > * BOs to be evicted from VRAM > */ > radeon_ttm_placement_from_domain(rbo, > RADEON_GEM_DOMAIN_VRAM | > - RADEON_GEM_DOMAIN_GTT); > + RADEON_GEM_DOMAIN_GTT, > + rbo->tbo.mem.size); > rbo->placement.num_busy_placement = 0; > for (i = 0; i < rbo->placement.num_placement; i++) { > if (rbo->placements[i].flags & > TTM_PL_FLAG_VRAM) { > @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object > *bo, > } > } > } else > - radeon_ttm_placement_from_domain(rbo, > RADEON_GEM_DOMAIN_GTT); > + radeon_ttm_placement_from_domain(rbo, > RADEON_GEM_DOMAIN_GTT, > + rbo->tbo.mem.size); > break; > case TTM_PL_TT: > default: > - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU); > + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, > + rbo->tbo.mem.size); > } > *placement = rbo->placement; > }