On 5/23/26 2:43 AM, Anisa Su wrote:
> Implement the release path that mirrors the add path: when the
> device asks for capacity back, the dax layer tears down the
> per-extent resources for the whole tag group atomically.
>
> If any extent in the group is still mapped by a dev_dax, the release
> is refused with -EBUSY and no state changes; the cxl side then leaves
> the tag group intact and the device retries.
>
> Also add a rollback to the add path: if any per-extent registration
> fails midway through a group, undo the ones already added so a
> partial group never leaks into the dax region.
>
> Based on an original patch by Navneet Singh.
>
> Signed-off-by: Ira Weiny <[email protected]>
> Signed-off-by: Anisa Su <[email protected]>
Just a nit below
Reviewed-by: Dave Jiang <[email protected]>
>
> ---
> Changes:
> [anisa: split out from the original "Surface dc_extents" commit;
> fills in the RELEASE half of the bridge, moves the cxl-side RELEASE
> notify into this commit, and adds the rollback path to ADD.]
> ---
> drivers/cxl/core/extent.c | 13 +++++++++
> drivers/dax/bus.c | 59 +++++++++++++++++++++++++++++++++++++++
> drivers/dax/cxl.c | 54 +++++++++++++++++++++++++++--------
> drivers/dax/dax-private.h | 8 ++++--
> 4 files changed, 120 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> index 3fc4b7292664..2c8edfe53c0a 100644
> --- a/drivers/cxl/core/extent.c
> +++ b/drivers/cxl/core/extent.c
> @@ -532,6 +532,7 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct
> cxl_extent *extent)
> struct range dpa_range;
> unsigned long idx;
> uuid_t tag;
> + int rc;
>
> dpa_range = (struct range) {
> .start = start_dpa,
> @@ -588,6 +589,18 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct
> cxl_extent *extent)
> return -EINVAL;
> }
>
> + rc = cxlr_notify_extent(cxlr, DCD_RELEASE_CAPACITY, group);
> + if (rc) {
> + /*
> + * dax layer refused (-EBUSY) or failed (-ENOMEM, etc.). Do
> + * not proceed to tear down the tag group — leave its
> + * dax_resources alive so we do not free them out from under
> + * live dev_dax ranges. The device will retry the release.
> + */
> + return 0;
> + }
> +
> + /* Release the entire tag group */
> rm_tag_group(group);
> return 0;
> }
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index a6ee59f2d8a1..6368bdfdf93a 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -253,6 +253,65 @@ int dax_region_add_resource(struct dax_region
> *dax_region,
> }
> EXPORT_SYMBOL_GPL(dax_region_add_resource);
>
> +int dax_region_rm_resource(struct dax_region *dax_region,
> + struct device *dev)
> +{
> + struct dax_resource *dax_resource;
> +
> + guard(rwsem_write)(&dax_region_rwsem);
> +
> + dax_resource = dev_get_drvdata(dev);
> + if (!dax_resource)
> + return 0;
> +
> + if (dax_resource->use_cnt)
> + return -EBUSY;
> +
> + /*
> + * release the resource under dax_region_rwsem to avoid races with
> + * users trying to use the extent
> + */
> + __dax_release_resource(dax_resource);
> + dev_set_drvdata(dev, NULL);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(dax_region_rm_resource);
No reason to export. Seems only used within DAX.
DJ
> +
> +/**
> + * dax_region_rm_resources - atomically remove a set of dax_resources.
> + *
> + * Walk @devs twice under dax_region_rwsem. First pass refuses the
> + * operation if any member's use_cnt is non-zero; second pass releases
> + * each. This gives refuse-all-or-none semantics across the set, which
> + * a tag group's atomic release relies on. Devices with no
> + * dax_resource attached are silently skipped.
> + */
> +int dax_region_rm_resources(struct dax_region *dax_region,
> + struct device * const *devs, unsigned int n)
> +{
> + unsigned int i;
> +
> + guard(rwsem_write)(&dax_region_rwsem);
> +
> + for (i = 0; i < n; i++) {
> + struct dax_resource *r = dev_get_drvdata(devs[i]);
> +
> + if (r && r->use_cnt)
> + return -EBUSY;
> + }
> +
> + for (i = 0; i < n; i++) {
> + struct dax_resource *r = dev_get_drvdata(devs[i]);
> +
> + if (!r)
> + continue;
> + __dax_release_resource(r);
> + dev_set_drvdata(devs[i], NULL);
> + }
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(dax_region_rm_resources);
> +
> bool static_dev_dax(struct dev_dax *dev_dax)
> {
> return is_static(dev_dax->region);
> diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
> index 690cf625e052..04b73315a8f2 100644
> --- a/drivers/dax/cxl.c
> +++ b/drivers/dax/cxl.c
> @@ -44,19 +44,52 @@ static int cxl_dax_group_add(struct dax_region
> *dax_region,
>
> xa_for_each(&group->dc_extents, index, dc_extent) {
> rc = __cxl_dax_add_resource(dax_region, dc_extent);
> - if (rc)
> + if (rc) {
> + /*
> + * Unwind every dax_resource already added for this
> + * group; one rm per owner suffices.
> + */
> + struct dc_extent *u;
> + unsigned long uidx;
> +
> + xa_for_each(&group->dc_extents, uidx, u) {
> + if (u == dc_extent)
> + break;
> + dax_region_rm_resource(dax_region, &u->dev);
> + }
> return rc;
> + }
> }
> return 0;
> }
>
> -/*
> - * RELEASE is still a stub here — the atomic dax_region_rm_resources API
> - * and its wire-up land in the next commit. An incoming RELEASE returns
> - * success and the cxl side proceeds to rm_tag_group(), which device-
> - * unregisters each dc_extent; the devm action armed by
> - * dax_region_add_resource() then tears down each dax_resource.
> - */
> +static int cxl_dax_group_rm(struct dax_region *dax_region,
> + struct cxl_dc_tag_group *group)
> +{
> + struct dc_extent *dc_extent;
> + struct device **devs;
> + unsigned long index;
> + unsigned int n = 0;
> + int rc;
> +
> + if (!group->nr_extents)
> + return 0;
> +
> + devs = kmalloc_array(group->nr_extents, sizeof(*devs), GFP_KERNEL);
> + if (!devs)
> + return -ENOMEM;
> +
> + xa_for_each(&group->dc_extents, index, dc_extent) {
> + if (n == group->nr_extents)
> + break;
> + devs[n++] = &dc_extent->dev;
> + }
> +
> + rc = dax_region_rm_resources(dax_region, devs, n);
> + kfree(devs);
> + return rc;
> +}
> +
> static int cxl_dax_region_notify(struct device *dev,
> struct cxl_notify_data *notify_data)
> {
> @@ -68,10 +101,7 @@ static int cxl_dax_region_notify(struct device *dev,
> case DCD_ADD_CAPACITY:
> return cxl_dax_group_add(dax_region, group);
> case DCD_RELEASE_CAPACITY:
> - dev_dbg(&cxlr_dax->dev,
> - "DCD RELEASE notify (tag %pUb): no-op (stub)\n",
> - &group->uuid);
> - return 0;
> + return cxl_dax_group_rm(dax_region, group);
> case DCD_FORCED_CAPACITY_RELEASE:
> default:
> dev_err(&cxlr_dax->dev, "Unknown DC event %d\n",
> diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
> index f2ae5918f94d..414813a6137f 100644
> --- a/drivers/dax/dax-private.h
> +++ b/drivers/dax/dax-private.h
> @@ -146,13 +146,17 @@ struct dax_resource {
> };
>
> /*
> - * Similar to run_dax() dax_region_add_resource() is exported but is not
> - * intended to be a generic operation outside the dax subsystem. It is only
> + * Similar to run_dax() dax_region_{add,rm}_resource() are exported but are
> not
> + * intended to be generic operations outside the dax subsystem. They are
> only
> * generic between the dax layer and the dax drivers.
> */
> int dax_region_add_resource(struct dax_region *dax_region, struct device
> *dev,
> resource_size_t start, resource_size_t length,
> const uuid_t *tag, u16 seq_num);
> +int dax_region_rm_resource(struct dax_region *dax_region,
> + struct device *dev);
> +int dax_region_rm_resources(struct dax_region *dax_region,
> + struct device * const *devs, unsigned int n);
>
> static inline struct dev_dax *to_dev_dax(struct device *dev)
> {