On 3/21/25 10:43, Bert Karwatzki wrote: > I did some monitoring using this patch (on top of 6.12.18): > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > index 0760e70402ec..ccd0c9058cee 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c > @@ -121,6 +121,8 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager > *man, > int r; > > node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL); > + if (!strcmp(get_current()->comm, "stellaris")) > + printk(KERN_INFO "%s: node = %px\n", __func__, node); > if (!node) > return -ENOMEM; > > @@ -142,10 +144,16 @@ static int amdgpu_gtt_mgr_new(struct > ttm_resource_manager > *man, > goto err_free; > > node->base.start = node->mm_nodes[0].start; > + if (!strcmp(get_current()->comm, "stellaris")) > + printk(KERN_INFO "%s %d: node->base.start = 0x%lx node- >> base.size = 0x%lx\n", > + __func__, __LINE__, node->base.start, node- >> base.size); > } else { > node->mm_nodes[0].start = 0; > node->mm_nodes[0].size = PFN_UP(node->base.size); > node->base.start = AMDGPU_BO_INVALID_OFFSET; > + if (!strcmp(get_current()->comm, "stellaris")) > + printk(KERN_INFO "%s %d: node->base.start = 0x%lx node- >> base.size = 0x%lx\n", > + __func__, __LINE__, node->base.start, node- >> base.size); > } > > *res = &node->base; > @@ -170,6 +178,8 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager > *man, > { > struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); > struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); > + if (!strcmp(get_current()->comm, "stellaris")) > + printk(KERN_INFO "%s: node = %px\n", __func__, node); > > spin_lock(&mgr->lock); > if (drm_mm_node_allocated(&node->mm_nodes[0])) > @@ -217,7 +227,11 @@ static bool amdgpu_gtt_mgr_intersects(struct > ttm_resource_manager *man, > const struct ttm_place *place, > size_t size) > { > - return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); > + bool ret; > + ret = !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); > + if (!strcmp(get_current()->comm, "stellaris")) > + printk(KERN_INFO, "%s: returning ret = %d", __func__, ret); > + return ret; > } > > /** > @@ -235,7 +249,11 @@ static bool amdgpu_gtt_mgr_compatible(struct > ttm_resource_manager *man, > const struct ttm_place *place, > size_t size) > { > - return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); > + bool ret; > + ret = !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); > + if (!strcmp(get_current()->comm, "stellaris")) > + printk(KERN_INFO, "%s: returning ret = %d", __func__, ret); > + return ret; > } > > /** > @@ -288,6 +306,8 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, > uint64_t > gtt_size) > drm_mm_init(&mgr->mm, start, size); > spin_lock_init(&mgr->lock); > > + dev_info(adev->dev, "%s: start = 0x%llx size = 0x%llx\n", __func__, > start, size); > + > ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); > ttm_resource_manager_set_used(man, true); > return 0; > diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c > index 1ed68d3cd80b..e525a1276304 100644 > --- a/drivers/gpu/drm/drm_mm.c > +++ b/drivers/gpu/drm/drm_mm.c > @@ -223,6 +223,13 @@ static void insert_hole_size(struct rb_root_cached *root, > struct rb_node **link = &root->rb_root.rb_node, *rb = NULL; > u64 x = node->hole_size; > bool first = true; > + int count = 0; > + > + if (!strcmp(get_current()->comm, "stellaris")) { > + for(struct rb_node *first = rb_first_cached(root); first; first > = rb_next(first)) > + count++; > + printk(KERN_INFO "%s: RB count = %d\n", __func__, count); > + } > > while (*link) { > rb = *link; > @@ -247,6 +254,13 @@ static void insert_hole_addr(struct rb_root *root, struct > drm_mm_node *node) > struct rb_node **link = &root->rb_node, *rb_parent = NULL; > u64 start = HOLE_ADDR(node), subtree_max_hole = node->subtree_max_hole; > struct drm_mm_node *parent; > + int count = 0; > + > + if (!strcmp(get_current()->comm, "stellaris")) { > + for(struct rb_node *first = rb_first(root); first; first = > rb_next(first)) > + count++; > + printk(KERN_INFO "%s: RB count = %d\n", __func__, count); > + } > > while (*link) { > rb_parent = *link; > > > With this I ran stellaris (just opening the game the closing it again from the > game menu) > > The findings are: > (a) The size of the RB tree is the same in the working and non-working case > (50- > 60) > (b) The number of calls to amdgpu_gtt_mgr_new() is ~2000 in both cases > (c) In the non-working case amdgpu_gtt_mgr_del() is called far more often then > in the working case: > Non-working case (cmdline: nokaslr) 834 calls to amdgpu_gtt_mgt_del() > Working case (cmdline: nokaslr amdgpu.vramlimit=512) 51 calls to > amdgpu_gtt_mgr_del() > Working case (cmdline: no additional arguments) 44 calls to > amdgpu_gtt_mgr_del() >
I am not an expert in amdgpu or gtt_mgr, but I wonder if some of the deletes are coming from forceful eviction of memory during allocation? Have you filed a bug report for the nokaslr case? Balbir Singh