On 5/23/26 2:43 AM, Anisa Su wrote:
> The per-region scan in cxl_tag_already_committed() only catches a tag
> re-appearing on the same cxlr_dax.  The orchestrator owns tag
> allocation and is responsible for global uniqueness, but a buggy FM
> (or firmware redelivering a tag for a previously-closed allocation)
> can still hand the same uuid to extents on two different regions or
> memdevs, and the per-region check accepts the second one — leaving
> two independent cxl_dc_tag_group objects with the same uuid.
> 
> Add a host-wide registry of live tag groups with non-null uuids.
> alloc_tag_group() inserts on success, free_tag_group() removes; both
> skip the null-uuid case since the spec defines no cross-chain identity
> for untagged allocations.
> 
> An attempt to add a second group with the same uuid fails with
> -EBUSY.
> 
> No exit hook is needed: cxl_core only unloads after every dependent
> module has, by which point every live tag group has been freed and
> the registry is empty.
> 
> Signed-off-by: Anisa Su <[email protected]>

Reviewed-by: Dave Jiang <[email protected]>

> ---
>  drivers/cxl/core/core.h   |  5 ++++
>  drivers/cxl/core/extent.c | 60 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/mbox.c   | 19 +++++++++++++
>  drivers/cxl/cxl.h         |  3 ++
>  4 files changed, 87 insertions(+)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 65daaaadf68e..02b36728c22d 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -69,6 +69,7 @@ int devm_cxl_add_pmem_region(struct cxl_region *cxlr);
>  
>  int cxl_add_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent,
>                  u16 seq_num);
> +bool cxl_tag_already_committed(const uuid_t *tag);
>  int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent);
>  int online_tag_group(struct cxl_dc_tag_group *group);
>  #else
> @@ -91,6 +92,10 @@ static inline int online_tag_group(struct cxl_dc_tag_group 
> *group)
>  {
>       return 0;
>  }
> +static inline bool cxl_tag_already_committed(const uuid_t *tag)
> +{
> +     return false;
> +}
>  static inline
>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa,
>                                    struct cxl_endpoint_decoder **cxled)
> diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> index 51116c8139ed..f66fa8c600c5 100644
> --- a/drivers/cxl/core/extent.c
> +++ b/drivers/cxl/core/extent.c
> @@ -18,8 +18,60 @@ static void cxled_release_extent(struct 
> cxl_endpoint_decoder *cxled,
>       memdev_release_extent(mds, &dc_extent->dpa_range);
>  }
>  
> +/*
> + * Host-wide registry of live tag groups with non-null uuids.  Enforces
> + * that within this host, a tag uuid identifies exactly one allocation
> + * across all regions and memdevs — closing the gap left by the
> + * per-region scans in cxlr_add_extent() and uuid_claim_tagged().  The
> + * orchestrator (FM) owns tag-uuid allocation per spec; this is a
> + * defense against firmware bugs and orchestrator misbehavior.  Untagged
> + * (null uuid) allocations are not tracked: the spec defines no
> + * cross-chain identity for them.
> + */
> +static DEFINE_MUTEX(cxl_tag_lock);
> +static LIST_HEAD(cxl_tag_groups);
> +
> +static int cxl_tag_register(struct cxl_dc_tag_group *grp)
> +{
> +     struct cxl_dc_tag_group *g;
> +
> +     if (uuid_is_null(&grp->uuid))
> +             return 0;
> +
> +     guard(mutex)(&cxl_tag_lock);
> +     list_for_each_entry(g, &cxl_tag_groups, registry_node)
> +             if (uuid_equal(&g->uuid, &grp->uuid))
> +                     return -EBUSY;
> +     list_add_tail(&grp->registry_node, &cxl_tag_groups);
> +     return 0;
> +}
> +
> +static void cxl_tag_unregister(struct cxl_dc_tag_group *grp)
> +{
> +     if (uuid_is_null(&grp->uuid))
> +             return;
> +
> +     guard(mutex)(&cxl_tag_lock);
> +     list_del(&grp->registry_node);
> +}
> +
> +bool cxl_tag_already_committed(const uuid_t *tag)
> +{
> +     struct cxl_dc_tag_group *g;
> +
> +     if (uuid_is_null(tag))
> +             return false;
> +
> +     guard(mutex)(&cxl_tag_lock);
> +     list_for_each_entry(g, &cxl_tag_groups, registry_node)
> +             if (uuid_equal(&g->uuid, tag))
> +                     return true;
> +     return false;
> +}
> +
>  static void free_tag_group(struct cxl_dc_tag_group *group)
>  {
> +     cxl_tag_unregister(group);
>       xa_destroy(&group->dc_extents);
>       kfree(group);
>  }
> @@ -54,12 +106,20 @@ alloc_tag_group(struct cxl_dax_region *cxlr_dax, uuid_t 
> *uuid)
>  {
>       struct cxl_dc_tag_group *group __free(kfree) =
>                               kzalloc(sizeof(*group), GFP_KERNEL);
> +     int rc;
> +
>       if (!group)
>               return ERR_PTR(-ENOMEM);
>  
>       group->cxlr_dax = cxlr_dax;
>       uuid_copy(&group->uuid, uuid);
>       xa_init(&group->dc_extents);
> +     INIT_LIST_HEAD(&group->registry_node);
> +
> +     rc = cxl_tag_register(group);
> +     if (rc)
> +             return ERR_PTR(rc);
> +
>       return no_free_ptr(group);
>  }
>  
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 70e6c4c9743c..85959dee35ea 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -1474,6 +1474,25 @@ static int cxl_add_pending(struct cxl_memdev_state 
> *mds)
>               extract_tag_group(pending, &tag, &group);
>               list_sort(NULL, &group, extent_seq_compare);
>  
> +             /*
> +              * Cross-More-chain uniqueness.  A non-null tag seen in this
> +              * group must not already correspond to a committed tag group
> +              * anywhere on this host.  More=0 was supposed to close that
> +              * allocation, and tag uuids must be unique across all regions
> +              * and memdevs (the orchestrator owns assignment per spec).
> +              * Either constraint failing — same chain redelivered, or two
> +              * distinct allocations colliding on the same uuid — is a
> +              * firmware/orchestrator bug; reject the whole group.
> +              */
> +             if (cxl_tag_already_committed(&tag)) {
> +                     dev_warn(dev,
> +                              "Tag %pUb: dropping group, tag already 
> committed (firmware/orchestrator bug)\n",
> +                              &tag);
> +                     list_for_each_entry_safe(pos, tmp, &group, list)
> +                             delete_extent_node(pos);
> +                     continue;
> +             }
> +
>               /* Sequence-number integrity */
>               if (cxl_check_group_seq(dev, &tag, &group)) {
>                       list_for_each_entry_safe(pos, tmp, &group, list)
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index cbbfba92fea9..a28e7b12a4a8 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -598,12 +598,15 @@ struct cxl_dax_region {
>   *           allocations.
>   * @nr_extents: live count of dc_extents in the group; the group is freed
>   *           when the last dc_extent device is released.
> + * @registry_node: anchor in the host-wide non-null-tag registry that
> + *           enforces tag uuid uniqueness across all regions and memdevs.
>   */
>  struct cxl_dc_tag_group {
>       struct cxl_dax_region *cxlr_dax;
>       uuid_t uuid;
>       struct xarray dc_extents;
>       unsigned int nr_extents;
> +     struct list_head registry_node;
>  };
>  
>  bool is_dc_extent(struct device *dev);


Reply via email to