On Wed, May 27, 2026 at 03:28:56PM -0700, Dave Jiang wrote:
> 
> 
> On 5/23/26 2:42 AM, Anisa Su wrote:
> > From: Ira Weiny <[email protected]>
> > 
> > Devices which optionally support Dynamic Capacity (DC) are configured
> > via mailbox commands.  CXL 3.2 section 9.13.3 requires the host to issue
> 
> 4.0
> 
done
> > the Get DC Configuration command in order to properly configure DCDs.
> > Without the Get DC Configuration command DCD can't be supported.
> > 
> > Implement the DC mailbox commands as specified in CXL 3.2 section
> 
> 4.0
> 
done :)
> > 8.2.10.9.9 (opcodes 48XXh) to read and store the DCD configuration
> > information.  Disable DCD if an invalid configuration is found.
> > 
> > Linux has no support for more than one dynamic capacity partition.  Read
> > and validate all the partitions but configure only the first partition
> > as 'dynamic ram A'.  Additional partitions can be added in the future if
> > such a device ever materializes.  Additionally is it anticipated that no
> > skips will be present from the end of the pmem partition.  Check for an
> > disallow this configuration as well.
> > 
> > Linux has no use for the trailing fields of the Get Dynamic Capacity
> > Configuration Output Payload (Total number of supported extents, number
> > of available extents, total number of supported tags, and number of
> > available tags).  Avoid defining those fields to use the more useful
> > dynamic C array.
> > 
> > Based on an original patch by Navneet Singh.
> > 
> > Signed-off-by: Ira Weiny <[email protected]>
> 
> Missing Anisa sign off
> 
Added
> > 
> > ---
> > Changes:
> > [anisa: rebase]
> > [jonathan: mbox.c: use max possible size for get_dc_config command to
> > avoid vmalloc]
> > [jonathan & fan: cxlmem.h: remove unused struct cxl_mem_dev_info]
> > ---
> >  drivers/cxl/core/hdm.c  |   2 +
> >  drivers/cxl/core/mbox.c | 182 ++++++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/cxlmem.h    |  47 +++++++++++
> >  drivers/cxl/pci.c       |   3 +
> >  include/cxl/cxl.h       |   3 +-
> >  5 files changed, 236 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index 3930e130d6b6..28974adaab75 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -453,6 +453,8 @@ static const char *cxl_mode_name(enum 
> > cxl_partition_mode mode)
> >             return "ram";
> >     case CXL_PARTMODE_PMEM:
> >             return "pmem";
> > +   case CXL_PARTMODE_DYNAMIC_RAM_A:
> > +           return "dynamic_ram_a";
> >     default:
> >             return "";
> >     };
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 7ef5708bf210..71b29cd6abfe 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -1351,6 +1351,156 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 
> > cmd)
> >     return -EBUSY;
> >  }
> >  
> > +static int cxl_dc_check(struct device *dev, struct cxl_dc_partition_info 
> > *part_array,
> > +                   u8 index, struct cxl_dc_partition *dev_part)
> > +{
> > +   size_t blk_size = le64_to_cpu(dev_part->block_size);
> > +   size_t len = le64_to_cpu(dev_part->length);
> > +
> > +   part_array[index].start = le64_to_cpu(dev_part->base);
> > +   part_array[index].size = le64_to_cpu(dev_part->decode_length);
> > +   part_array[index].size *= CXL_CAPACITY_MULTIPLIER;
> > +
> > +   /* Check partitions are in increasing DPA order */
> > +   if (index > 0) {
> > +           struct cxl_dc_partition_info *prev_part = &part_array[index - 
> > 1];
> > +
> > +           if ((prev_part->start + prev_part->size) >
> > +                part_array[index].start) {
> > +                   dev_err(dev,
> > +                           "DPA ordering violation for DC partition %d and 
> > %d\n",
> > +                           index - 1, index);
> > +                   return -EINVAL;
> > +           }
> > +   }
> > +
> > +   if (!IS_ALIGNED(part_array[index].start, SZ_256M) ||
> > +       !IS_ALIGNED(part_array[index].start, blk_size)) {
> > +           dev_err(dev, "DC partition %d invalid start %zu blk size %zu\n",
> > +                   index, part_array[index].start, blk_size);
> > +           return -EINVAL;
> > +   }
> > +
> > +   if (part_array[index].size == 0 || len == 0 ||
> > +       part_array[index].size < len || !IS_ALIGNED(len, blk_size)) {
> > +           dev_err(dev, "DC partition %d invalid length; size %zu len %zu 
> > blk size %zu\n",
> > +                   index, part_array[index].size, len, blk_size);
> > +           return -EINVAL;
> > +   }
> > +
> > +   if (blk_size == 0 || blk_size % CXL_DCD_BLOCK_LINE_SIZE ||
> > +       !is_power_of_2(blk_size)) {
> > +           dev_err(dev, "DC partition %d invalid block size; %zu\n",
> 
> size: instead of size;
> 
fixed!
> > +                   index, blk_size);
> > +           return -EINVAL;
> > +   }
> > +
> > +   dev_dbg(dev, "DC partition %d start %zu start %zu size %zu\n",
> 
> should it be "DC partition %d start %zu size %zu blk_size: %zu\n"?
> 
yep, fixed! Also I changed the type of
struct cxl_dc_partition_info->start/size from size_t to u64 so
the print specifier uses %llu now. Unless it's better to stick with
size_t?

> > +           index, part_array[index].start, part_array[index].size,
> > +           blk_size);
> > +
> > +   return 0;
> > +}
> > +
> > +/* Returns the number of partitions in dc_resp or -ERRNO */
> > +static int cxl_get_dc_config(struct cxl_mailbox *mbox, u8 start_partition,
> > +                        struct cxl_mbox_get_dc_config_out *dc_resp,
> > +                        size_t dc_resp_size)
> > +{
> > +   struct cxl_mbox_get_dc_config_in get_dc = (struct 
> > cxl_mbox_get_dc_config_in) {
> > +           .partition_count = CXL_MAX_DC_PARTITIONS,
> > +           .start_partition_index = start_partition,
> > +   };
> > +   struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) {
> > +           .opcode = CXL_MBOX_OP_GET_DC_CONFIG,
> > +           .payload_in = &get_dc,
> > +           .size_in = sizeof(get_dc),
> > +           .size_out = dc_resp_size,
> > +           .payload_out = dc_resp,
> > +           .min_out = 8,
> > +   };
> > +   int rc;
> > +
> > +   rc = cxl_internal_send_cmd(mbox, &mbox_cmd);
> > +   if (rc < 0)
> > +           return rc;
> > +
> > +   dev_dbg(mbox->host, "Read %d/%d DC partitions\n",
> > +           dc_resp->partitions_returned, dc_resp->avail_partition_count);
> > +   return dc_resp->partitions_returned;
> > +}
> > +
> > +/**
> > + * cxl_dev_dc_identify() - Reads the dynamic capacity information from the
> > + *                         device.
> > + * @mbox: Mailbox to query
> > + * @dc_info: The dynamic partition information to return
> > + *
> > + * Read Dynamic Capacity information from the device and return the 
> > partition
> > + * information.
> > + *
> > + * Return: 0 if identify was executed successfully, -ERRNO on error.
> > + *         on error only dynamic_bytes is left unchanged.
> > + */
> > +int cxl_dev_dc_identify(struct cxl_mailbox *mbox,
> > +                   struct cxl_dc_partition_info *dc_info)
> > +{
> > +   struct cxl_dc_partition_info partitions[CXL_MAX_DC_PARTITIONS];
> > +   struct device *dev = mbox->host;
> > +   size_t dc_resp_size =
> > +           sizeof(struct cxl_mbox_get_dc_config_out) + sizeof(partitions);
> 
> I think it needs to be something like below because of the 'partition' flex 
> array:
> size_t dc_resp_size = struct_size(dc_resp, partition, CXL_MAX_DC_PARTITIONS);
> 
> partitions is type 'struct cxl_dc_partition_info'. and dc_resp->partition is 
> type 'struct cxl_dc_partition'. So the size calucation is wrong. It should at 
> least be:
> size_t dc_resp_size = sizeof(struct cxl_mbox_get_dc_config_out) + 
> sizeof(struct cxl_dc_partition) * CXL_MAX_DC_PARTITIONS;
> 
Fixed!
> 
> > +   u8 start_partition;
> > +   u8 num_partitions;
> > +
> > +   struct cxl_mbox_get_dc_config_out *dc_resp __free(kfree) =
> > +                                   kmalloc(dc_resp_size, GFP_KERNEL);
> > +   if (!dc_resp)
> > +           return -ENOMEM;
> > +
> > +   /**
> 
> /*
> 
> > +    * Read and check all partition information for validity and potential
> > +    * debugging; see debug output in cxl_dc_check()
> > +    */
> > +   start_partition = 0;
> > +   num_partitions = 0;
> > +   do {
> > +           int rc, i, j;
> > +
> > +           rc = cxl_get_dc_config(mbox, start_partition, dc_resp, 
> > dc_resp_size);
> > +           if (rc < 0) {
> > +                   dev_err(dev, "Failed to get DC config: %d\n", rc);
> > +                   return rc;
> > +           }
> > +
                if (rc == 0) {
                        dev_err(dev,
                                "Device reported %u partitions available but 
returned none at index %u\n",
                                dc_resp->avail_partition_count, 
start_partition);
                        return -EIO;
                }
> > +           num_partitions += rc;
> 
> Would cxl_get_dc_config() keep returning 0 be a problem? Not likely to happen 
> unless device is malicious.
> 
Not sure but I added a check anyway. ^ See above. It prohibits
cxl_get_dc_config() returning 0 at all though. But could be changed to
err only if 0 partitions are returned X amount of times...?
> > +
> > +           if (num_partitions < 1 || num_partitions > 
> > CXL_MAX_DC_PARTITIONS) {
> > +                   dev_err(dev, "Invalid num of dynamic capacity 
> > partitions %d\n",
> > +                           num_partitions);
> > +                   return -EINVAL;
> > +           }
> > +
> > +           for (i = start_partition, j = 0; i < num_partitions; i++, j++) {
> > +                   rc = cxl_dc_check(dev, partitions, i,
> > +                                     &dc_resp->partition[j]);
> > +                   if (rc)
> > +                           return rc;
> > +           }
> > +
> > +           start_partition = num_partitions;
> > +
> > +   } while (num_partitions < dc_resp->avail_partition_count);
> > +
> > +   /* Return 1st partition */
> > +   dc_info->start = partitions[0].start;
> > +   dc_info->size = partitions[0].size;
> > +   dev_dbg(dev, "Returning partition 0 %zu size %zu\n",
> > +           dc_info->start, dc_info->size);
> > +
> > +   return 0;
> > +}
> > +EXPORT_SYMBOL_NS_GPL(cxl_dev_dc_identify, "CXL");
> > +
> >  static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum 
> > cxl_partition_mode mode)
> >  {
> >     int i = info->nr_partitions;
> > @@ -1421,6 +1571,38 @@ int cxl_get_dirty_count(struct cxl_memdev_state 
> > *mds, u32 *count)
> >  }
> >  EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");
> >  
> > +void cxl_configure_dcd(struct cxl_memdev_state *mds, struct cxl_dpa_info 
> > *info)
> > +{
> > +   struct cxl_dc_partition_info dc_info = { 0 };
> > +   struct device *dev = mds->cxlds.dev;
> > +   size_t skip;
> > +   int rc;
> > +
> > +   rc = cxl_dev_dc_identify(&mds->cxlds.cxl_mbox, &dc_info);
> > +   if (rc) {
> > +           dev_warn(dev,
> > +                    "Failed to read Dynamic Capacity config: %d\n", rc);
> > +           cxl_disable_dcd(mds);
> > +           return;
> > +   }
> > +
> > +   /* Skips between pmem and the dynamic partition are not supported */
> > +   skip = dc_info.start - info->size;
> > +   if (skip) {
> 
> Would this be sufficient?
> 
> if (dc_info.start != info->size)
> 
Fixed!
> DJ
Thanks,
Anisa
> > +           dev_warn(dev,
> > +                    "Dynamic Capacity skip from pmem not supported: %zu\n",
> > +                    skip);
> > +           cxl_disable_dcd(mds);
> > +           return;
> > +   }
> > +
> > +   info->size += dc_info.size;
> > +   dev_dbg(dev, "Adding dynamic ram partition A; %zu size %zu\n",
> > +           dc_info.start, dc_info.size);
> > +   add_part(info, dc_info.start, dc_info.size, CXL_PARTMODE_DYNAMIC_RAM_A);
> > +}
> > +EXPORT_SYMBOL_NS_GPL(cxl_configure_dcd, "CXL");
> > +
> >  int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
> >  {
> >     struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > index 53444af448d7..87386488ad10 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> > @@ -380,6 +380,8 @@ struct cxl_security_state {
> >     struct kernfs_node *sanitize_node;
> >  };
> >  
> > +#define CXL_MAX_DC_PARTITIONS 8
> > +
> >  static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds)
> >  {
> >     /*
> > @@ -664,6 +666,31 @@ struct cxl_mbox_set_shutdown_state_in {
> >     u8 state;
> >  } __packed;
> >  
> > +/* See CXL 3.2 Table 8-178 get dynamic capacity config Input Payload */
> > +struct cxl_mbox_get_dc_config_in {
> > +   u8 partition_count;
> > +   u8 start_partition_index;
> > +} __packed;
> > +
> > +/* See CXL 3.2 Table 8-179 get dynamic capacity config Output Payload */
> > +struct cxl_mbox_get_dc_config_out {
> > +   u8 avail_partition_count;
> > +   u8 partitions_returned;
> > +   u8 rsvd[6];
> > +   /* See CXL 3.2 Table 8-180 */
> > +   struct cxl_dc_partition {
> > +           __le64 base;
> > +           __le64 decode_length;
> > +           __le64 length;
> > +           __le64 block_size;
> > +           __le32 dsmad_handle;
> > +           u8 flags;
> > +           u8 rsvd[3];
> > +   } __packed partition[] __counted_by(partitions_returned);
> > +   /* Trailing fields unused */
> > +} __packed;
> > +#define CXL_DCD_BLOCK_LINE_SIZE 0x40
> > +
> >  /* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
> >  struct cxl_mbox_set_timestamp_in {
> >     __le64 timestamp;
> > @@ -787,9 +814,18 @@ enum {
> >  int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox,
> >                       struct cxl_mbox_cmd *cmd);
> >  int cxl_dev_state_identify(struct cxl_memdev_state *mds);
> > +
> > +struct cxl_dc_partition_info {
> > +   size_t start;
> > +   size_t size;
> > +};
> > +
> > +int cxl_dev_dc_identify(struct cxl_mailbox *mbox,
> > +                   struct cxl_dc_partition_info *dc_info);
> >  int cxl_await_media_ready(struct cxl_dev_state *cxlds);
> >  int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
> >  int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info 
> > *info);
> > +void cxl_configure_dcd(struct cxl_memdev_state *mds, struct cxl_dpa_info 
> > *info);
> >  struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 
> > serial,
> >                                              u16 dvsec);
> >  void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
> > @@ -803,6 +839,17 @@ void cxl_event_trace_record(struct cxl_memdev *cxlmd,
> >                         const uuid_t *uuid, union cxl_event *evt);
> >  int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count);
> >  int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds);
> > +
> > +static inline bool cxl_dcd_supported(struct cxl_memdev_state *mds)
> > +{
> > +   return mds->dcd_supported;
> > +}
> > +
> > +static inline void cxl_disable_dcd(struct cxl_memdev_state *mds)
> > +{
> > +   mds->dcd_supported = false;
> > +}
> > +
> >  int cxl_set_timestamp(struct cxl_memdev_state *mds);
> >  int cxl_poison_state_init(struct cxl_memdev_state *mds);
> >  int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
> > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > index bace662dc988..60f9fa05d9ef 100644
> > --- a/drivers/cxl/pci.c
> > +++ b/drivers/cxl/pci.c
> > @@ -870,6 +870,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const 
> > struct pci_device_id *id)
> >     if (rc)
> >             return rc;
> >  
> > +   if (cxl_dcd_supported(mds))
> > +           cxl_configure_dcd(mds, &range_info);
> > +
> >     rc = cxl_dpa_setup(cxlds, &range_info);
> >     if (rc)
> >             return rc;
> > diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
> > index fa7269154620..bb1df0cef863 100644
> > --- a/include/cxl/cxl.h
> > +++ b/include/cxl/cxl.h
> > @@ -133,6 +133,7 @@ struct cxl_dpa_perf {
> >  enum cxl_partition_mode {
> >     CXL_PARTMODE_RAM,
> >     CXL_PARTMODE_PMEM,
> > +   CXL_PARTMODE_DYNAMIC_RAM_A,
> >  };
> >  
> >  /**
> > @@ -147,7 +148,7 @@ struct cxl_dpa_partition {
> >     enum cxl_partition_mode mode;
> >  };
> >  
> > -#define CXL_NR_PARTITIONS_MAX 2
> > +#define CXL_NR_PARTITIONS_MAX 3
> >  
> >  /**
> >   * struct cxl_dev_state - The driver device state
> 

Reply via email to