From: Zhi Wang <zhiw...@kernel.org> Introduce a CXL type-2 device emulation that provides a minimum base for testing kernel CXL core type-2 support and CXL type-2 virtualization. It is also a good base for introducing the more emulated features.
Currently, it only supports: - Emulating component registers with HDM decoders. - Volatile memory backend and emualtion of region access. The emulation is aimed to not tightly coupled with the current CXL type-3 emulation since many advanced CXL type-3 emulation features are not implemented in a CXL type-2 device. Co-developed-by: Ira Weiny <ira.we...@intel.com> Signed-off-by: Zhi Wang <zhiw...@kernel.org> --- MAINTAINERS | 1 + docs/system/devices/cxl.rst | 11 ++ hw/cxl/cxl-component-utils.c | 2 + hw/cxl/cxl-host.c | 19 +- hw/mem/Kconfig | 5 + hw/mem/cxl_accel.c | 319 +++++++++++++++++++++++++++++++++ hw/mem/meson.build | 1 + include/hw/cxl/cxl_component.h | 1 + include/hw/cxl/cxl_device.h | 25 +++ include/hw/pci/pci_ids.h | 1 + 10 files changed, 382 insertions(+), 3 deletions(-) create mode 100644 hw/mem/cxl_accel.c diff --git a/MAINTAINERS b/MAINTAINERS index aaf0505a21..72a6a505eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2914,6 +2914,7 @@ R: Fan Ni <fan...@samsung.com> S: Supported F: hw/cxl/ F: hw/mem/cxl_type3.c +F: hw/mem/cxl_accel.c F: include/hw/cxl/ F: qapi/cxl.json diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index 882b036f5e..13cc2417f2 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -332,6 +332,17 @@ The same volatile setup may optionally include an LSA region:: -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G +A very simple setup with just one directly attached CXL Type 2 Volatile Memory +Accelerator device:: + + qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \ + ... + -object memory-backend-ram,id=vmem0,share=on,size=256M \ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ + -device cxl-accel,bus=root_port13,volatile-memdev=vmem0,id=cxl-accel0 \ + -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G + A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way interleave across 2 CXL host bridges. Each host bridge has 2 CXL Root Ports, with the CXL Type3 device directly attached (no switches).:: diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 355103d165..717ef117ac 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -262,6 +262,7 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff; if (type == CXL2_DEVICE || type == CXL2_TYPE3_DEVICE || + type == CXL3_TYPE2_DEVICE || type == CXL2_LOGICAL_DEVICE) { write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] = 0xf0000000; @@ -293,6 +294,7 @@ void cxl_component_register_init_common(uint32_t *reg_state, case CXL2_UPSTREAM_PORT: case CXL2_TYPE3_DEVICE: case CXL2_LOGICAL_DEVICE: + case CXL3_TYPE2_DEVICE: /* + HDM */ caps = 3; break; diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index e9f2543c43..e603a3f2fc 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -201,7 +201,8 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) return NULL; } - if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3) || + object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { return d; } @@ -256,7 +257,13 @@ static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data, return MEMTX_ERROR; } - return cxl_type3_read(d, addr + fw->base, data, size, attrs); + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { + return cxl_type3_read(d, addr + fw->base, data, size, attrs); + } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { + return cxl_accel_read(d, addr + fw->base, data, size, attrs); + } + + return MEMTX_ERROR; } static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr, @@ -272,7 +279,13 @@ static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr, return MEMTX_OK; } - return cxl_type3_write(d, addr + fw->base, data, size, attrs); + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { + return cxl_type3_write(d, addr + fw->base, data, size, attrs); + } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { + return cxl_accel_write(d, addr + fw->base, data, size, attrs); + } + + return MEMTX_ERROR; } const MemoryRegionOps cfmws_ops = { diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig index 73c5ae8ad9..1f7d08c17d 100644 --- a/hw/mem/Kconfig +++ b/hw/mem/Kconfig @@ -16,3 +16,8 @@ config CXL_MEM_DEVICE bool default y if CXL select MEM_DEVICE + +config CXL_ACCEL_DEVICE + bool + default y if CXL + select MEM_DEVICE diff --git a/hw/mem/cxl_accel.c b/hw/mem/cxl_accel.c new file mode 100644 index 0000000000..770072126d --- /dev/null +++ b/hw/mem/cxl_accel.c @@ -0,0 +1,319 @@ +/* + * CXL accel (type-2) device + * + * Copyright(C) 2024 NVIDIA Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + * + * SPDX-License-Identifier: GPL-v2-only + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "hw/mem/memory-device.h" +#include "hw/mem/pc-dimm.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/range.h" +#include "sysemu/hostmem.h" +#include "sysemu/numa.h" +#include "hw/cxl/cxl.h" +#include "hw/pci/msix.h" + +static void update_dvsecs(CXLAccelDev *acceld) +{ + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; + uint8_t *dvsec; + uint32_t range1_size_hi = 0, range1_size_lo = 0, + range1_base_hi = 0, range1_base_lo = 0; + + if (acceld->hostvmem) { + range1_size_hi = acceld->hostvmem->size >> 32; + range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | + (acceld->hostvmem->size & 0xF0000000); + } + + dvsec = (uint8_t *)&(CXLDVSECDevice){ + .cap = 0x1e, + .ctrl = 0x2, + .status2 = 0x2, + .range1_size_hi = range1_size_hi, + .range1_size_lo = range1_size_lo, + .range1_base_hi = range1_base_hi, + .range1_base_lo = range1_base_lo, + }; + cxl_component_update_dvsec(cxl_cstate, PCIE_CXL_DEVICE_DVSEC_LENGTH, + PCIE_CXL_DEVICE_DVSEC, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + }; + cxl_component_update_dvsec(cxl_cstate, REG_LOC_DVSEC_LENGTH, + REG_LOC_DVSEC, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){ + .cap = 0x26, /* 68B, IO, Mem, non-MLD */ + .ctrl = 0x02, /* IO always enabled */ + .status = 0x26, /* same as capabilities */ + .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ + }; + cxl_component_update_dvsec(cxl_cstate, PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, + PCIE_FLEXBUS_PORT_DVSEC, dvsec); +} + +static void build_dvsecs(CXLAccelDev *acceld) +{ + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; + + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, + PCIE_CXL_DEVICE_DVSEC_LENGTH, + PCIE_CXL_DEVICE_DVSEC, + PCIE_CXL31_DEVICE_DVSEC_REVID, NULL); + + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, NULL); + + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, + PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, + PCIE_FLEXBUS_PORT_DVSEC, + PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, NULL); + update_dvsecs(acceld); +} + +static bool cxl_accel_dpa(CXLAccelDev *acceld, hwaddr host_addr, uint64_t *dpa) +{ + return cxl_host_addr_to_dpa(&acceld->cxl_cstate, host_addr, dpa); +} + +static int cxl_accel_hpa_to_as_and_dpa(CXLAccelDev *acceld, + hwaddr host_addr, + unsigned int size, + AddressSpace **as, + uint64_t *dpa_offset) +{ + MemoryRegion *vmr = NULL; + uint64_t vmr_size = 0; + + if (!acceld->hostvmem) { + return -ENODEV; + } + + vmr = host_memory_backend_get_memory(acceld->hostvmem); + if (!vmr) { + return -ENODEV; + } + + vmr_size = memory_region_size(vmr); + + if (!cxl_accel_dpa(acceld, host_addr, dpa_offset)) { + return -EINVAL; + } + + if (*dpa_offset >= vmr_size) { + return -EINVAL; + } + + *as = &acceld->hostvmem_as; + return 0; +} + +MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + CXLAccelDev *acceld = CXL_ACCEL(d); + uint64_t dpa_offset = 0; + AddressSpace *as = NULL; + int res; + + res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size, + &as, &dpa_offset); + if (res) { + return MEMTX_ERROR; + } + + return address_space_read(as, dpa_offset, attrs, data, size); +} + +MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data, + unsigned size, MemTxAttrs attrs) +{ + CXLAccelDev *acceld = CXL_ACCEL(d); + uint64_t dpa_offset = 0; + AddressSpace *as = NULL; + int res; + + res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size, + &as, &dpa_offset); + if (res) { + return MEMTX_ERROR; + } + + return address_space_write(as, dpa_offset, attrs, &data, size); +} + +static void clean_memory(PCIDevice *pci_dev) +{ + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); + + if (acceld->hostvmem) { + address_space_destroy(&acceld->hostvmem_as); + } +} + +static bool setup_memory(PCIDevice *pci_dev, Error **errp) +{ + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); + + if (acceld->hostvmem) { + MemoryRegion *vmr; + char *v_name; + + vmr = host_memory_backend_get_memory(acceld->hostvmem); + if (!vmr) { + error_setg(errp, "volatile memdev must have backing device"); + return false; + } + if (host_memory_backend_is_mapped(acceld->hostvmem)) { + error_setg(errp, "memory backend %s can't be used multiple times.", + object_get_canonical_path_component(OBJECT(acceld->hostvmem))); + return false; + } + memory_region_set_nonvolatile(vmr, false); + memory_region_set_enabled(vmr, true); + host_memory_backend_set_mapped(acceld->hostvmem, true); + v_name = g_strdup("cxl-accel-dpa-vmem-space"); + address_space_init(&acceld->hostvmem_as, vmr, v_name); + g_free(v_name); + } + return true; +} + +static void setup_cxl_regs(PCIDevice *pci_dev) +{ + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; + ComponentRegisters *regs = &cxl_cstate->crb; + MemoryRegion *mr = ®s->component_registers; + + cxl_cstate->dvsec_offset = 0x100; + cxl_cstate->pdev = pci_dev; + + build_dvsecs(acceld); + + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, + TYPE_CXL_ACCEL); + + pci_register_bar( + pci_dev, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr); +} + +#define MSIX_NUM 6 + +static int setup_msix(PCIDevice *pci_dev) +{ + int i, rc; + + /* MSI(-X) Initialization */ + rc = msix_init_exclusive_bar(pci_dev, MSIX_NUM, 4, NULL); + if (rc) { + return rc; + } + + for (i = 0; i < MSIX_NUM; i++) { + msix_vector_use(pci_dev, i); + } + return 0; +} + +static void cxl_accel_realize(PCIDevice *pci_dev, Error **errp) +{ + ERRP_GUARD(); + int rc; + uint8_t *pci_conf = pci_dev->config; + + if (!setup_memory(pci_dev, errp)) { + return; + } + + pci_config_set_prog_interface(pci_conf, 0x10); + pcie_endpoint_cap_init(pci_dev, 0x80); + + setup_cxl_regs(pci_dev); + + /* MSI(-X) Initialization */ + rc = setup_msix(pci_dev); + if (rc) { + clean_memory(pci_dev); + return; + } +} + +static void cxl_accel_exit(PCIDevice *pci_dev) +{ + clean_memory(pci_dev); +} + +static void cxl_accel_reset(DeviceState *dev) +{ + CXLAccelDev *acceld = CXL_ACCEL(dev); + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; + uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers; + uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask; + + update_dvsecs(acceld); + cxl_component_register_init_common(reg_state, write_msk, CXL3_TYPE2_DEVICE); +} + +static Property cxl_accel_props[] = { + DEFINE_PROP_LINK("volatile-memdev", CXLAccelDev, hostvmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void cxl_accel_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + + pc->realize = cxl_accel_realize; + pc->exit = cxl_accel_exit; + + pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL; + pc->vendor_id = PCI_VENDOR_ID_INTEL; + pc->device_id = 0xd94; + pc->revision = 1; + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "CXL Accelerator Device (Type 2)"; + device_class_set_legacy_reset(dc, cxl_accel_reset); + device_class_set_props(dc, cxl_accel_props); +} + +static const TypeInfo cxl_accel_dev_info = { + .name = TYPE_CXL_ACCEL, + .parent = TYPE_PCI_DEVICE, + .class_size = sizeof(struct CXLAccelClass), + .class_init = cxl_accel_class_init, + .instance_size = sizeof(CXLAccelDev), + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CXL_DEVICE }, + { INTERFACE_PCIE_DEVICE }, + {} + }, +}; + +static void cxl_accel_dev_registers(void) +{ + type_register_static(&cxl_accel_dev_info); +} + +type_init(cxl_accel_dev_registers); diff --git a/hw/mem/meson.build b/hw/mem/meson.build index 1c1c6da24b..36a395dbb6 100644 --- a/hw/mem/meson.build +++ b/hw/mem/meson.build @@ -4,6 +4,7 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) +mem_ss.add(when: 'CONFIG_CXL_ACCEL_DEVICE', if_true: files('cxl_accel.c')) system_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c')) system_ss.add(when: 'CONFIG_MEM_DEVICE', if_false: files('memory-device-stubs.c')) diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 30fe4bfa24..0e78db26b8 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -29,6 +29,7 @@ enum reg_type { CXL2_UPSTREAM_PORT, CXL2_DOWNSTREAM_PORT, CXL3_SWITCH_MAILBOX_CCI, + CXL3_TYPE2_DEVICE, }; /* diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 561b375dc8..ac26b264da 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -630,6 +630,26 @@ struct CSWMBCCIDev { CXLCCI *cci; }; +struct CXLAccelDev { + /* Private */ + PCIDevice parent_obj; + + /* Properties */ + HostMemoryBackend *hostvmem; + + /* State */ + AddressSpace hostvmem_as; + CXLComponentState cxl_cstate; +}; + +struct CXLAccelClass { + /* Private */ + PCIDeviceClass parent_class; +}; + +#define TYPE_CXL_ACCEL "cxl-accel" +OBJECT_DECLARE_TYPE(CXLAccelDev, CXLAccelClass, CXL_ACCEL) + #define TYPE_CXL_SWITCH_MAILBOX_CCI "cxl-switch-mailbox-cci" OBJECT_DECLARE_TYPE(CSWMBCCIDev, CSWMBCCIClass, CXL_SWITCH_MAILBOX_CCI) @@ -638,6 +658,11 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, unsigned size, MemTxAttrs attrs); +MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, + unsigned size, MemTxAttrs attrs); +MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data, + unsigned size, MemTxAttrs attrs); + uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index f1a53fea8d..08bc469316 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -55,6 +55,7 @@ #define PCI_CLASS_MEMORY_RAM 0x0500 #define PCI_CLASS_MEMORY_FLASH 0x0501 #define PCI_CLASS_MEMORY_CXL 0x0502 +#define PCI_CLASS_CXL_QEMU_ACCEL 0x0503 #define PCI_CLASS_MEMORY_OTHER 0x0580 #define PCI_BASE_CLASS_BRIDGE 0x06 -- 2.43.5