Am Mittwoch, dem 26.03.2025 um 15:58 -0700 schrieb Linus Torvalds:
> On Wed, 26 Mar 2025 at 15:00, Bert Karwatzki <spassw...@web.de> wrote:
> >
> > As Balbir Singh found out this memory comes from amdkfd
> > (kgd2kfd_init_zone_device()) with CONFIG_HSA_AMD_SVM=y. The memory gets 
> > placed
> > by devm_request_free_mem_region() which places the memory at the end of the
> > physical address space (DIRECT_MAP_PHYSMEM_END). DIRECT_MAP_PHYSMEM_END 
> > changes
> > when using nokaslr and so the memory shifts.
>
> So I just want to say that having followed the thread as a spectator,
> big kudos to everybody involved in this thing. Particularly to you,
> Bart, for all your debugging and testing, and to Balbir for following
> up and figuring it out.
>
> Because this was a strange one.
>
> >  One can work around this by removing the GFR_DESCENDING flag from
> > devm_request_free_mem_region() so the memory gets placed right after the 
> > other
> > resources:
>
> I worry that there might be other machines where that completely breaks 
> things.
>
> There are various historical reasons why we look for addresses in high
> regions, ie on machines where there are various hidden IO regions that
> aren't enumerated by e280 and aren't found by our usual PCI BAR
> discovery because they are special hidden ones.
>
> So then users of [devm_]request_free_mem_region() might end up getting
> allocated a region that has some magic system resource in it.
>
> And no, this shouldn't happen on any normal machine, but it has
> definitely been a thing in the past.
>
> So I'm very happy that you guys figured out what ended up happening,
> but I'm not convinced that the devm_request_free_mem_region()
> workaround is tenable.
>
> So I think it needs to be more targeted to the HSA_AMD_SVM case than
> touch the devm_request_free_mem_region() logic for everybody.
>
>            Linus

This patch adds another function devm_request_free_mem_region_from_end()
with an additional argument which allows to choose the end address from
which to place the resource.
The problem here is this uses dma_get_mask(adev->dev) as end address which
uses the dma mask for the discrete GPU while it should use the dma mask for
the built-in GPU (In my case both are equal (44bits), but I'm not sure if
this is always the case)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d05d199b5e44..e1942fef3637 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1042,7 +1042,8 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
                pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size -
1;
                pgmap->type = MEMORY_DEVICE_COHERENT;
        } else {
-               res = devm_request_free_mem_region(adev->dev, &iomem_resource,
size);
+               res = devm_request_free_mem_region_from_end(adev->dev,
+                               &iomem_resource, size, dma_get_mask(adev-
>dev));
                if (IS_ERR(res))
                        return PTR_ERR(res);
                pgmap->range.start = res->start;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5385349f0b8a..a9a765721ab4 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -407,6 +407,9 @@ walk_iomem_res_desc(unsigned long desc, unsigned long flags,
u64 start, u64 end,

 struct resource *devm_request_free_mem_region(struct device *dev,
                struct resource *base, unsigned long size);
+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+               struct resource *base, unsigned long size,
+               resource_size_t seek_end);
 struct resource *request_free_mem_region(struct resource *base,
                unsigned long size, const char *name);
 struct resource *alloc_free_mem_region(struct resource *base,
diff --git a/kernel/resource.c b/kernel/resource.c
index 12004452d999..82f40407c02d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1875,12 +1875,14 @@ EXPORT_SYMBOL(resource_list_free);
 #endif

 static resource_size_t gfr_start(struct resource *base, resource_size_t size,
-                                resource_size_t align, unsigned long flags)
+                                resource_size_t align, resource_size_t
seek_end,
+                                unsigned long flags)
 {
        if (flags & GFR_DESCENDING) {
                resource_size_t end;

                end = min_t(resource_size_t, base->end,
DIRECT_MAP_PHYSMEM_END);
+               end = min_t(resource_size_t, end, seek_end);
                return end - size + 1;
        }

@@ -1920,8 +1922,8 @@ static void remove_free_mem_region(void *_res)
 static struct resource *
 get_free_mem_region(struct device *dev, struct resource *base,
                    resource_size_t size, const unsigned long align,
-                   const char *name, const unsigned long desc,
-                   const unsigned long flags)
+                   resource_size_t seek_end, const char *name,
+                   const unsigned long desc, const unsigned long flags)
 {
        resource_size_t addr;
        struct resource *res;
@@ -1946,7 +1948,7 @@ get_free_mem_region(struct device *dev, struct resource
*base,
        }

        write_lock(&resource_lock);
-       for (addr = gfr_start(base, size, align, flags);
+       for (addr = gfr_start(base, size, align, seek_end, flags);
             gfr_continue(base, addr, align, flags);
             addr = gfr_next(addr, align, flags)) {
                if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE)
!=
@@ -2021,17 +2023,30 @@ struct resource *devm_request_free_mem_region(struct
device *dev,
        unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

        return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
-                                  dev_name(dev),
+                                  DIRECT_MAP_PHYSMEM_END, dev_name(dev),
                                   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
 }
 EXPORT_SYMBOL_GPL(devm_request_free_mem_region);

+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+               struct resource *base, unsigned long size,
+               resource_size_t seek_end)
+{
+       unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
+
+       return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
+                                  seek_end, dev_name(dev),
+                                  IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
+}
+EXPORT_SYMBOL_GPL(devm_request_free_mem_region_from_end);
+
 struct resource *request_free_mem_region(struct resource *base,
                unsigned long size, const char *name)
 {
        unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

-       return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN, name,
+       return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN,
+                                  DIRECT_MAP_PHYSMEM_END, name,
                                   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
 }
 EXPORT_SYMBOL_GPL(request_free_mem_region);
@@ -2055,8 +2070,8 @@ struct resource *alloc_free_mem_region(struct resource
*base,
        /* Default of ascending direction and insert resource */
        unsigned long flags = 0;

-       return get_free_mem_region(NULL, base, size, align, name,
-                                  IORES_DESC_NONE, flags);
+       return get_free_mem_region(NULL, base, size, align,
DIRECT_MAP_PHYSMEM_END,
+                                  name, IORES_DESC_NONE, flags);
 }
 EXPORT_SYMBOL_GPL(alloc_free_mem_region);
 #endif /* CONFIG_GET_FREE_REGION */



Bert Karwatzki

Reply via email to