On 5/23/26 2:43 AM, Anisa Su wrote:
> Replace the no-op ack stub for cxl_rm_extent() with the real teardown:
> resolve the released DPA range to its region and endpoint decoder,
> locate the matching dc_extent in cxlr_dax->dc_extents (filtering by
> cxled, range containment, and tag), and tear down the entire containing
> tag group atomically through rm_tag_group(). Partial release is not
> supported.
>
> rm_tag_group() invalidates caches once for the whole group (no mappings
> exist at this point — partial release is not supported, so all members
> are leaving together), then walks the group's dc_extents and releases
> each via its devm action installed at online_tag_group() time.
>
> cxl_region_invalidate_memregion() becomes non-static and is declared
> in core.h so rm_tag_group() can flush caches before tearing the group down.
>
> When the released range maps to no region (host crashed before
> persisting acceptance, region destruction raced device release, or the
> device is confused) the host has nothing to drop, so reply via
> memdev_release_extent() to keep the device's view consistent.
>
> Based on an original patch by Navneet Singh.
>
> Signed-off-by: Ira Weiny <[email protected]>
> Signed-off-by: Anisa Su <[email protected]>
>
> ---
> Changes:
> [anisa: restructured from the original "Process dynamic partition
> events" monolith; this commit replaces the stubbed release with the
> real walk-and-tear-down of the matching tag group.]
> ---
> drivers/cxl/core/core.h | 8 +++
> drivers/cxl/core/extent.c | 101 ++++++++++++++++++++++++++++++++++++++
> drivers/cxl/core/mbox.c | 19 -------
> drivers/cxl/core/region.c | 2 +-
> 4 files changed, 110 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 30b6b05b155b..65daaaadf68e 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -28,6 +28,8 @@ cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> return container_of(cxlds, struct cxl_memdev_state, cxlds);
> }
>
> +int cxl_region_invalidate_memregion(struct cxl_region *cxlr);
Doesn't this need to go within CONFIG_CXL_REGION?
> +
> #ifdef CONFIG_CXL_REGION
>
> struct cxl_region_context {
> @@ -67,6 +69,7 @@ int devm_cxl_add_pmem_region(struct cxl_region *cxlr);
>
> int cxl_add_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent,
> u16 seq_num);
> +int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent);
> int online_tag_group(struct cxl_dc_tag_group *group);
> #else
> static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
> @@ -79,6 +82,11 @@ static inline int cxl_add_extent(struct cxl_memdev_state
> *mds,
> {
> return 0;
> }
> +static inline int cxl_rm_extent(struct cxl_memdev_state *mds,
> + struct cxl_extent *extent)
> +{
> + return 0;
> +}
> static inline int online_tag_group(struct cxl_dc_tag_group *group)
> {
> return 0;
> diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> index b01507022cff..51116c8139ed 100644
> --- a/drivers/cxl/core/extent.c
> +++ b/drivers/cxl/core/extent.c
> @@ -344,6 +344,107 @@ static void dc_extent_unregister(void *ext)
> device_unregister(&dc_extent->dev);
> }
>
> +static void rm_tag_group(struct cxl_dc_tag_group *group)
> +{
> + struct device *region_dev = &group->cxlr_dax->dev;
> + struct dc_extent *dc_extent;
> + unsigned long index;
> +
> + /*
> + * Tagged allocations release atomically. Invalidate caches once
> + * for the whole group (no mappings exist at this point — partial
> + * release is not supported, so all members are leaving use
> + * together) before tearing down each dc_extent device.
> + *
> + * Pin @group across the walk: each devm_release_action runs the
> + * dc_extent_unregister action synchronously, which drops the last
> + * reference on the dc_extent device and fires dc_extent_release.
> + * The release decrements group->nr_extents and, on the final
> + * decrement, frees @group. Without the pin the next iteration's
> + * xa_find_after() dereferences a freed xarray.
> + */
> + cxl_region_invalidate_memregion(group->cxlr_dax->cxlr);
check return value?
> +
> + group->nr_extents++;
> + xa_for_each(&group->dc_extents, index, dc_extent)
> + devm_release_action(region_dev, dc_extent_unregister,
> dc_extent);
> + group->nr_extents--;
> + if (!group->nr_extents)
> + free_tag_group(group);
> +}
> +
> +int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent)
> +{
> + u64 start_dpa = le64_to_cpu(extent->start_dpa);
> + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_dax_region *cxlr_dax;
> + struct cxl_dc_tag_group *group;
> + struct dc_extent *dc_extent;
> + struct cxl_region *cxlr;
> + struct range dpa_range;
> + unsigned long idx;
> + uuid_t tag;
> +
> + dpa_range = (struct range) {
> + .start = start_dpa,
> + .end = start_dpa + le64_to_cpu(extent->length) - 1,
> + };
> +
> + guard(rwsem_read)(&cxl_rwsem.region);
> + cxlr = cxl_dpa_to_region(cxlmd, start_dpa, &cxled);
> + if (!cxlr) {
> + /*
> + * No region can happen here for a few reasons:
> + *
> + * 1) Extents were accepted and the host crashed/rebooted
> + * leaving them in an accepted state. On reboot the host
> + * has not yet created a region to own them.
> + *
> + * 2) Region destruction won the race with the device releasing
> + * all the extents. Here the release will be a duplicate of
> + * the one sent via region destruction.
> + *
> + * 3) The device is confused and releasing extents for which no
> + * region ever existed.
> + *
> + * In all these cases make sure the device knows we are not
> + * using this extent.
> + */
> + memdev_release_extent(mds, &dpa_range);
> + return -ENXIO;
> + }
> +
> + cxlr_dax = cxlr->cxlr_dax;
Does it need to check if cxlr_dax is NULL?
DJ
> + import_uuid(&tag, extent->uuid);
> +
> + /*
> + * Find the dc_extent whose DPA range covers the released range and
> + * whose tag matches. The release targets the entire containing
> + * tag group atomically; partial release is not supported.
> + */
> + group = NULL;
> + xa_for_each(&cxlr_dax->dc_extents, idx, dc_extent) {
> + if (dc_extent->cxled != cxled)
> + continue;
> + if (!range_contains(&dc_extent->dpa_range, &dpa_range))
> + continue;
> + if (!uuid_equal(&dc_extent->group->uuid, &tag))
> + continue;
> + group = dc_extent->group;
> + break;
> + }
> + if (!group) {
> + dev_err(&cxlr_dax->dev,
> + "release DPA %pra (%pU) matches no dc_extent\n",
> + &dpa_range, &tag);
> + return -EINVAL;
> + }
> +
> + rm_tag_group(group);
> + return 0;
> +}
> +
> static void cleanup_pending_dc_extent(struct dc_extent *dc_extent)
> {
> struct cxl_dc_tag_group *group = dc_extent->group;
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 545c48c9c373..70e6c4c9743c 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -1587,25 +1587,6 @@ static int handle_add_event(struct cxl_memdev_state
> *mds,
> return rc;
> }
>
> -/*
> - * Stub: ack the release back to the device so it knows we are not
> - * using the range. A later commit replaces this with the real
> - * teardown that walks the region's tag group and tears down the
> - * member dc_extent devices.
> - */
> -static int cxl_rm_extent(struct cxl_memdev_state *mds,
> - struct cxl_extent *extent)
> -{
> - u64 start_dpa = le64_to_cpu(extent->start_dpa);
> - struct range dpa_range = {
> - .start = start_dpa,
> - .end = start_dpa + le64_to_cpu(extent->length) - 1,
> - };
> -
> - memdev_release_extent(mds, &dpa_range);
> - return 0;
> -}
> -
> static char *cxl_dcd_evt_type_str(u8 type)
> {
> switch (type) {
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 733d77c07493..317630d8bf2e 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -222,7 +222,7 @@ static struct cxl_region_ref *cxl_rr_load(struct cxl_port
> *port,
> return xa_load(&port->regions, (unsigned long)cxlr);
> }
>
> -static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
> +int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
> {
> if (!cpu_cache_has_invalidate_memregion()) {
> if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {