On Thu, May 28, 2026 at 03:13:29PM -0700, Dave Jiang wrote:
> 
> 
> On 5/23/26 2:43 AM, Anisa Su wrote:
> > Replace the no-op ack stub for cxl_rm_extent() with the real teardown:
> > resolve the released DPA range to its region and endpoint decoder,
> > locate the matching dc_extent in cxlr_dax->dc_extents (filtering by
> > cxled, range containment, and tag), and tear down the entire containing
> > tag group atomically through rm_tag_group().  Partial release is not
> > supported.
> > 
> > rm_tag_group() invalidates caches once for the whole group (no mappings
> > exist at this point — partial release is not supported, so all members
> > are leaving together), then walks the group's dc_extents and releases
> > each via its devm action installed at online_tag_group() time.
> > 
> > cxl_region_invalidate_memregion() becomes non-static and is declared
> > in core.h so rm_tag_group() can flush caches before tearing the group down.
> > 
> > When the released range maps to no region (host crashed before
> > persisting acceptance, region destruction raced device release, or the
> > device is confused) the host has nothing to drop, so reply via
> > memdev_release_extent() to keep the device's view consistent.
> > 
> > Based on an original patch by Navneet Singh.
> > 
> > Signed-off-by: Ira Weiny <[email protected]>
> > Signed-off-by: Anisa Su <[email protected]>
> > 
> > ---
> > Changes:
> > [anisa: restructured from the original "Process dynamic partition
> >  events" monolith; this commit replaces the stubbed release with the
> >  real walk-and-tear-down of the matching tag group.]
> > ---
> >  drivers/cxl/core/core.h   |   8 +++
> >  drivers/cxl/core/extent.c | 101 ++++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/core/mbox.c   |  19 -------
> >  drivers/cxl/core/region.c |   2 +-
> >  4 files changed, 110 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> > index 30b6b05b155b..65daaaadf68e 100644
> > --- a/drivers/cxl/core/core.h
> > +++ b/drivers/cxl/core/core.h
> > @@ -28,6 +28,8 @@ cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> >     return container_of(cxlds, struct cxl_memdev_state, cxlds);
> >  }
> >  
> > +int cxl_region_invalidate_memregion(struct cxl_region *cxlr);
> 
> Doesn't this need to go within CONFIG_CXL_REGION?
> 
oopsie yes
> > +
> >  #ifdef CONFIG_CXL_REGION
> >  
> >  struct cxl_region_context {
> > @@ -67,6 +69,7 @@ int devm_cxl_add_pmem_region(struct cxl_region *cxlr);
> >  
> >  int cxl_add_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent,
> >                u16 seq_num);
> > +int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent);
> >  int online_tag_group(struct cxl_dc_tag_group *group);
> >  #else
> >  static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
> > @@ -79,6 +82,11 @@ static inline int cxl_add_extent(struct cxl_memdev_state 
> > *mds,
> >  {
> >     return 0;
> >  }
> > +static inline int cxl_rm_extent(struct cxl_memdev_state *mds,
> > +                           struct cxl_extent *extent)
> > +{
> > +   return 0;
> > +}
> >  static inline int online_tag_group(struct cxl_dc_tag_group *group)
> >  {
> >     return 0;
> > diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> > index b01507022cff..51116c8139ed 100644
> > --- a/drivers/cxl/core/extent.c
> > +++ b/drivers/cxl/core/extent.c
> > @@ -344,6 +344,107 @@ static void dc_extent_unregister(void *ext)
> >     device_unregister(&dc_extent->dev);
> >  }
> >  
> > +static void rm_tag_group(struct cxl_dc_tag_group *group)
> > +{
> > +   struct device *region_dev = &group->cxlr_dax->dev;
> > +   struct dc_extent *dc_extent;
> > +   unsigned long index;
> > +
> > +   /*
> > +    * Tagged allocations release atomically.  Invalidate caches once
> > +    * for the whole group (no mappings exist at this point — partial
> > +    * release is not supported, so all members are leaving use
> > +    * together) before tearing down each dc_extent device.
> > +    *
> > +    * Pin @group across the walk: each devm_release_action runs the
> > +    * dc_extent_unregister action synchronously, which drops the last
> > +    * reference on the dc_extent device and fires dc_extent_release.
> > +    * The release decrements group->nr_extents and, on the final
> > +    * decrement, frees @group.  Without the pin the next iteration's
> > +    * xa_find_after() dereferences a freed xarray.
> > +    */
> > +   cxl_region_invalidate_memregion(group->cxlr_dax->cxlr);
> 
> check return value?
> 
done. if invalidate_memregion fails, doesn't release and device needs to
retry

> > +
> > +   group->nr_extents++;
> > +   xa_for_each(&group->dc_extents, index, dc_extent)
> > +           devm_release_action(region_dev, dc_extent_unregister, 
> > dc_extent);
> > +   group->nr_extents--;
> > +   if (!group->nr_extents)
> > +           free_tag_group(group);
> > +}
> > +
> > +int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent)
> > +{
> > +   u64 start_dpa = le64_to_cpu(extent->start_dpa);
> > +   struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
> > +   struct cxl_endpoint_decoder *cxled;
> > +   struct cxl_dax_region *cxlr_dax;
> > +   struct cxl_dc_tag_group *group;
> > +   struct dc_extent *dc_extent;
> > +   struct cxl_region *cxlr;
> > +   struct range dpa_range;
> > +   unsigned long idx;
> > +   uuid_t tag;
> > +
> > +   dpa_range = (struct range) {
> > +           .start = start_dpa,
> > +           .end = start_dpa + le64_to_cpu(extent->length) - 1,
> > +   };
> > +
> > +   guard(rwsem_read)(&cxl_rwsem.region);
> > +   cxlr = cxl_dpa_to_region(cxlmd, start_dpa, &cxled);
> > +   if (!cxlr) {
> > +           /*
> > +            * No region can happen here for a few reasons:
> > +            *
> > +            * 1) Extents were accepted and the host crashed/rebooted
> > +            *    leaving them in an accepted state.  On reboot the host
> > +            *    has not yet created a region to own them.
> > +            *
> > +            * 2) Region destruction won the race with the device releasing
> > +            *    all the extents.  Here the release will be a duplicate of
> > +            *    the one sent via region destruction.
> > +            *
> > +            * 3) The device is confused and releasing extents for which no
> > +            *    region ever existed.
> > +            *
> > +            * In all these cases make sure the device knows we are not
> > +            * using this extent.
> > +            */
> > +           memdev_release_extent(mds, &dpa_range);
> > +           return -ENXIO;
> > +   }
> > +
> > +   cxlr_dax = cxlr->cxlr_dax;
> 
> Does it need to check if cxlr_dax is NULL?
> 
added check. same behavior as if !cxlr, bc if !cxlr_dax, we're not
tracking or using any extents, so safe to reply to device with release

> DJ
> 
Thanks,
Anisa
> > +   import_uuid(&tag, extent->uuid);
> > +
> > +   /*
> > +    * Find the dc_extent whose DPA range covers the released range and
> > +    * whose tag matches.  The release targets the entire containing
> > +    * tag group atomically; partial release is not supported.
> > +    */
> > +   group = NULL;
> > +   xa_for_each(&cxlr_dax->dc_extents, idx, dc_extent) {
> > +           if (dc_extent->cxled != cxled)
> > +                   continue;
> > +           if (!range_contains(&dc_extent->dpa_range, &dpa_range))
> > +                   continue;
> > +           if (!uuid_equal(&dc_extent->group->uuid, &tag))
> > +                   continue;
> > +           group = dc_extent->group;
> > +           break;
> > +   }
> > +   if (!group) {
> > +           dev_err(&cxlr_dax->dev,
> > +                   "release DPA %pra (%pU) matches no dc_extent\n",
> > +                   &dpa_range, &tag);
> > +           return -EINVAL;
> > +   }
> > +
> > +   rm_tag_group(group);
> > +   return 0;
> > +}
> > +
> >  static void cleanup_pending_dc_extent(struct dc_extent *dc_extent)
> >  {
> >     struct cxl_dc_tag_group *group = dc_extent->group;
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 545c48c9c373..70e6c4c9743c 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -1587,25 +1587,6 @@ static int handle_add_event(struct cxl_memdev_state 
> > *mds,
> >     return rc;
> >  }
> >  
> > -/*
> > - * Stub: ack the release back to the device so it knows we are not
> > - * using the range.  A later commit replaces this with the real
> > - * teardown that walks the region's tag group and tears down the
> > - * member dc_extent devices.
> > - */
> > -static int cxl_rm_extent(struct cxl_memdev_state *mds,
> > -                    struct cxl_extent *extent)
> > -{
> > -   u64 start_dpa = le64_to_cpu(extent->start_dpa);
> > -   struct range dpa_range = {
> > -           .start = start_dpa,
> > -           .end = start_dpa + le64_to_cpu(extent->length) - 1,
> > -   };
> > -
> > -   memdev_release_extent(mds, &dpa_range);
> > -   return 0;
> > -}
> > -
> >  static char *cxl_dcd_evt_type_str(u8 type)
> >  {
> >     switch (type) {
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index 733d77c07493..317630d8bf2e 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -222,7 +222,7 @@ static struct cxl_region_ref *cxl_rr_load(struct 
> > cxl_port *port,
> >     return xa_load(&port->regions, (unsigned long)cxlr);
> >  }
> >  
> > -static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
> > +int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
> >  {
> >     if (!cpu_cache_has_invalidate_memregion()) {
> >             if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
> 

Reply via email to