On 3/3/23 16:09, Jonathan Cameron wrote:
Inject poison using qmp command cxl-inject-poison to add an entry to the
poison list.
For now, the poison is not returned CXL.mem reads,
What do you mean?
but only via the
mailbox command Get Poison List.
See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h)
Kernel patches to use this interface here:
https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofi...@intel.com/
To inject poison using qmp (telnet to the qmp port)
{ "execute": "qmp_capabilities" }
{ "execute": "cxl-inject-poison",
"arguments": {
"path": "/machine/peripheral/cxl-pmem0",
"start": 2048,
"length": 256
}
}
Adjusted to select a device on your machine.
Note that the poison list supported is kept short enough to avoid the
complexity of state machine that is needed to handle the MORE flag.
Signed-off-by: Jonathan Cameron <jonathan.came...@huawei.com>
---
v4:
- Widen the mask on Poison source (lower bits of the address)
to allow for Vendor Defined. Change will make it easier to potentially
add a means to inject such poison in the future. Today it has no
impact.
---
hw/cxl/cxl-mailbox-utils.c | 90 +++++++++++++++++++++++++++++++++++++
hw/mem/cxl_type3.c | 56 +++++++++++++++++++++++
hw/mem/cxl_type3_stubs.c | 6 +++
include/hw/cxl/cxl_device.h | 20 +++++++++
qapi/cxl.json | 18 ++++++++
5 files changed, 190 insertions(+)
+/*
+ * This is very inefficient, but good enough for now!
+ * Also the payload will always fit, so no need to handle the MORE flag and
+ * make this stateful. We may want to allow longer poison lists to aid
+ * testing that kernel functionality.
+ */
+static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct get_poison_list_pl {
+ uint64_t pa;
+ uint64_t length;
+ } QEMU_PACKED;
+
+ struct get_poison_list_out_pl {
+ uint8_t flags;
+ uint8_t rsvd1;
+ uint64_t overflow_timestamp;
+ uint16_t count;
+ uint8_t rsvd2[0x14];
+ struct {
+ uint64_t addr;
+ uint32_t length;
+ uint32_t resv;
+ } QEMU_PACKED records[];
+ } QEMU_PACKED;
+
+ struct get_poison_list_pl *in = (void *)cmd->payload;
+ struct get_poison_list_out_pl *out = (void *)cmd->payload;
+ CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
+ uint16_t record_count = 0, i = 0;
+ uint64_t query_start, query_length;
+ CXLPoisonList *poison_list = &ct3d->poison_list;
+ CXLPoison *ent;
+ uint16_t out_pl_len;
+
+ query_start = ldq_le_p(&in->pa);
+ /* 64 byte alignemnt required */
+ if (query_start & 0x3f) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ query_length = ldq_le_p(&in->length) * 64;
+
+ QLIST_FOREACH(ent, poison_list, node) {
+ /* Check for no overlap */
+ if (ent->start >= query_start + query_length ||
+ ent->start + ent->length <= query_start) {
+ continue;
+ }
+ record_count++;
+ }
+ out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+ assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+ memset(out, 0, out_pl_len);
+ QLIST_FOREACH(ent, poison_list, node) {
+ uint64_t start, stop;
+
+ /* Check for no overlap */
+ if (ent->start >= query_start + query_length ||
+ ent->start + ent->length <= query_start) {
+ continue;
+ }
+
+ /* Deal with overlap */
+ start = MAX(ent->start & 0xffffffffffffffc0, query_start);
+ stop = MIN((ent->start & 0xffffffffffffffc0) + ent->length,
~63ull or ROUND_DOWN(, 64ull) could be easier to read.
+ query_start + query_length);
+ stq_le_p(&out->records[i].addr, start | (ent->type & 0x7));
+ stl_le_p(&out->records[i].length, (stop - start) / 64);
+ i++;
+ }
+ if (ct3d->poison_list_overflowed) {
+ out->flags = (1 << 1);
+ stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts);
+ }
+ stw_le_p(&out->count, record_count);
+ *len = out_pl_len;
+ return CXL_MBOX_SUCCESS;
+}
+
diff --git a/qapi/cxl.json b/qapi/cxl.json
index 4be7d46041..9ebd680dfe 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -5,6 +5,24 @@
# = CXL devices
##
+##
+# @cxl-inject-poison:
+#
+# Poison records indicate that a CXL memory device knows that a particular
+# memory region may be corrupted. This may be because of locally detected
+# errors (e.g. ECC failure) or poisoned writes received from other components
+# in the system. This injection mechanism enables testing of the OS handling
+# of poison records which may be queried via the CXL mailbox.
+#
+# @path: CXL type 3 device canonical QOM path
+# @start: Start address - must be 64 byte aligned.
+# @length: Length of poison to inject - must be a multiple of 64 bytes.
+#
+# Since: 8.0
8.1 per the cover.
+##
+{ 'command': 'cxl-inject-poison',
+ 'data': { 'path': 'str', 'start': 'uint64', 'length': 'uint64' }}
+