From: Mate Kukri <k...@mkukri.xyz> It is based on existing SeaBIOS code, so the license is LGPLv3.
Tested as a coreboot paload on the following targets: - QEMU Q35 with emulated NVMe disk - ASUS H610M-K with Intel 660p NVME disk Signed-off-by: Mate Kukri <k...@mkukri.xyz> --- Makefile.am | 2 +- grub-core/Makefile.core.def | 6 + grub-core/commands/nativedisk.c | 1 + grub-core/disk/nvme.c | 861 ++++++++++++++++++++++++++++++++ include/grub/disk.h | 1 + include/grub/nvme.h | 225 +++++++++ 6 files changed, 1095 insertions(+), 1 deletion(-) create mode 100644 grub-core/disk/nvme.c create mode 100644 include/grub/nvme.h diff --git a/Makefile.am b/Makefile.am index 43635d5ff..2c86dbbf6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -434,7 +434,7 @@ if COND_i386_coreboot FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst) default_payload.elf: grub-mkstandalone grub-mkimage FORCE test -f $@ && rm $@ || true - pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg + pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata nvme ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg endif endif diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def index 063ef5dd7..7b4cf972a 100644 --- a/grub-core/Makefile.core.def +++ b/grub-core/Makefile.core.def @@ -2606,3 +2606,9 @@ module = { enable = efi; depends = part_gpt; }; + +module = { + name = nvme; + common = disk/nvme.c; + enable = pci; +}; diff --git a/grub-core/commands/nativedisk.c b/grub-core/commands/nativedisk.c index 580c8d3b0..a2c766fbd 100644 --- a/grub-core/commands/nativedisk.c +++ b/grub-core/commands/nativedisk.c @@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative) case GRUB_DISK_DEVICE_ATA_ID: case GRUB_DISK_DEVICE_SCSI_ID: case GRUB_DISK_DEVICE_XEN: + case GRUB_DISK_DEVICE_NVME_ID: if (getnative) break; /* FALLTHROUGH */ diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c new file mode 100644 index 000000000..b6a886464 --- /dev/null +++ b/grub-core/disk/nvme.c @@ -0,0 +1,861 @@ +/* nvme.c - NVMe support. */ +/* + * Based on SeaBIOS NVMe driver, ported to GRUB2 by Mate Kukri + * + * Copyright 2017 Amazon.com, Inc. or its affiliates. + * Copyright 2024 Mate Kukri <k...@mkukri.xyz> + * + * This file may be distributed under the terms of the GNU LGPLv3 license. + */ + +#include <grub/disk.h> +#include <grub/dl.h> +#include <grub/nvme.h> +#include <grub/pci.h> +#include <grub/time.h> + +GRUB_MOD_LICENSE ("GPLv3"); /* LGPLv3 is GPLv3 compatible */ + +static grub_uint32_t grub_nvme_ctrlcnt; +static grub_uint32_t grub_nvme_nscnt; + +static struct grub_nvme_namespace *grub_nvme_namespaces; + +/* Page aligned "dma bounce buffer" of size GRUB_NVME_PAGE_SIZE */ +static void *nvme_dma_buffer; + +static void * +zalloc_page_aligned (grub_uint32_t size) +{ + void *res = grub_memalign (GRUB_NVME_PAGE_SIZE, size); + if (res) + grub_memset (res, 0, size); + return res; +} + +static void +nvme_init_queue_common (struct grub_nvme_ctrl *ctrl, + struct grub_nvme_queue *q, grub_uint16_t q_idx, + grub_uint16_t length) +{ + grub_memset (q, 0, sizeof (*q)); + q->dbl = + (grub_uint32_t *) ((char *) ctrl->reg + 0x1000 + + q_idx * ctrl->doorbell_stride); + grub_dprintf ("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl); + q->mask = length - 1; +} + +static int +nvme_init_sq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_sq *sq, + grub_uint16_t q_idx, grub_uint16_t length, + struct grub_nvme_cq *cq) +{ + nvme_init_queue_common (ctrl, &sq->common, q_idx, length); + sq->sqe = zalloc_page_aligned (sizeof (*sq->sqe) * length); + + if (!sq->sqe) + { + return -1; + } + + grub_dprintf ("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe); + sq->cq = cq; + sq->head = 0; + sq->tail = 0; + + return 0; +} + +static int +nvme_init_cq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_cq *cq, + grub_uint16_t q_idx, grub_uint16_t length) +{ + nvme_init_queue_common (ctrl, &cq->common, q_idx, length); + cq->cqe = zalloc_page_aligned (sizeof (*cq->cqe) * length); + if (!cq->cqe) + { + return -1; + } + + cq->head = 0; + + /* All CQE phase bits are initialized to zero. This means initially we wait + for the host controller to set these to 1. */ + cq->phase = 1; + + return 0; +} + +static int +nvme_poll_cq (struct grub_nvme_cq *cq) +{ + grub_uint32_t dw3 = *(volatile grub_uint32_t *) &cq->cqe[cq->head].dword[3]; + return (!!(dw3 & GRUB_NVME_CQE_DW3_P) == cq->phase); +} + +static int +nvme_is_cqe_success (const struct grub_nvme_cqe *cqe) +{ + return ((cqe->status >> 1) & 0xFF) == 0; +} + +static struct grub_nvme_cqe +nvme_error_cqe (void) +{ + struct grub_nvme_cqe r; + + /* 0xFF is a vendor specific status code != success. Should be okay for + indicating failure. */ + grub_memset (&r, 0xFF, sizeof (r)); + return r; +} + +static struct grub_nvme_cqe +nvme_consume_cqe (struct grub_nvme_sq *sq) +{ + struct grub_nvme_cq *cq = sq->cq; + + if (!nvme_poll_cq (cq)) + { + /* Cannot consume a completion queue entry, if there is none ready. */ + return nvme_error_cqe (); + } + + struct grub_nvme_cqe *cqe = &cq->cqe[cq->head]; + grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask; + grub_dprintf ("nvme", "cq %p head %u -> %u\n", cq, cq->head, cq_next_head); + if (cq_next_head < cq->head) + { + grub_dprintf ("nvme", "cq %p wrap\n", cq); + cq->phase = ~cq->phase; + } + cq->head = cq_next_head; + + /* Update the submission queue head. */ + if (cqe->sq_head != sq->head) + { + sq->head = cqe->sq_head; + grub_dprintf ("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head); + } + + /* Tell the controller that we consumed the completion. */ + *(volatile grub_uint32_t *) cq->common.dbl = cq->head; + + return *cqe; +} + +static struct grub_nvme_cqe +nvme_wait (struct grub_nvme_sq *sq) +{ + const grub_uint64_t limit_ms = grub_get_time_ms () + 5000; + + while (!nvme_poll_cq (sq->cq)) + { + if (limit_ms < grub_get_time_ms ()) + { + grub_dprintf ("nvme", "NVME SQ wait timeout\n"); + return nvme_error_cqe (); + } + } + + return nvme_consume_cqe (sq); +} + +/* Returns the next submission queue entry (or NULL if the queue is full). It + also fills out Command Dword 0 and clears the rest. */ +static struct grub_nvme_sqe * +nvme_get_next_sqe (struct grub_nvme_sq *sq, grub_uint8_t opc, void *metadata, + void *data, void *data2) +{ + if (((sq->head + 1) & sq->common.mask) == sq->tail) + { + grub_dprintf ("nvme", "submission queue is full\n"); + return NULL; + } + + struct grub_nvme_sqe *sqe = &sq->sqe[sq->tail]; + grub_dprintf ("nvme", "sq %p next_sqe %u\n", sq, sq->tail); + + grub_memset (sqe, 0, sizeof (*sqe)); + sqe->cdw0 = opc | (sq->tail << 16 /* CID */ ); + sqe->mptr = (grub_uint32_t) metadata; + sqe->dptr_prp1 = (grub_uint32_t) data; + sqe->dptr_prp2 = (grub_uint32_t) data2; + + return sqe; +} + +/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */ +static void +nvme_commit_sqe (struct grub_nvme_sq *sq) +{ + grub_dprintf ("nvme", "sq %p commit_sqe %u\n", sq, sq->tail); + sq->tail = (sq->tail + 1) & sq->common.mask; + *(volatile grub_uint32_t *) sq->common.dbl = sq->tail; +} + +/* Perform an identify command on the admin queue and return the resulting + buffer. This may be a NULL pointer, if something failed. This function + cannot be used after initialization, because it uses buffers in tmp zone. */ +static union nvme_identify * +nvme_admin_identify (struct grub_nvme_ctrl *ctrl, grub_uint8_t cns, + grub_uint32_t nsid) +{ + union nvme_identify *identify_buf = zalloc_page_aligned (4096); + if (!identify_buf) + return NULL; + + struct grub_nvme_sqe *cmd_identify; + cmd_identify = nvme_get_next_sqe (&ctrl->admin_sq, + GRUB_NVME_SQE_OPC_ADMIN_IDENTIFY, NULL, + identify_buf, NULL); + if (!cmd_identify) + goto error; + + cmd_identify->nsid = nsid; + cmd_identify->dword[10] = cns; + + nvme_commit_sqe (&ctrl->admin_sq); + + struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq); + + if (!nvme_is_cqe_success (&cqe)) + { + goto error; + } + + return identify_buf; +error: + grub_free (identify_buf); + return NULL; +} + +static struct grub_nvme_identify_ctrl * +nvme_admin_identify_ctrl (struct grub_nvme_ctrl *ctrl) +{ + return &nvme_admin_identify (ctrl, GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, + 0)->ctrl; +} + +static struct grub_nvme_identify_ns * +nvme_admin_identify_ns (struct grub_nvme_ctrl *ctrl, grub_uint32_t ns_id) +{ + return &nvme_admin_identify (ctrl, GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_NS, + ns_id)->ns; +} + +static void +nvme_probe_ns (struct grub_nvme_ctrl *ctrl, grub_uint32_t ns_idx, + grub_uint8_t mdts) +{ + grub_uint32_t ns_id = ns_idx + 1; + + struct grub_nvme_identify_ns *id = nvme_admin_identify_ns (ctrl, ns_id); + if (!id) + { + grub_dprintf ("nvme", "NVMe couldn't identify namespace %u.\n", ns_id); + goto free_buffer; + } + + grub_uint8_t current_lba_format = id->flbas & 0xF; + if (current_lba_format > id->nlbaf) + { + grub_dprintf ("nvme", + "NVMe NS %u: current LBA format %u is beyond what the " + " namespace supports (%u)?\n", ns_id, current_lba_format, + id->nlbaf + 1); + goto free_buffer; + } + + if (!id->nsze) + { + grub_dprintf ("nvme", "NVMe NS %u is inactive.\n", ns_id); + goto free_buffer; + } + + if (!nvme_dma_buffer) + { + nvme_dma_buffer = zalloc_page_aligned (GRUB_NVME_PAGE_SIZE); + if (!nvme_dma_buffer) + { + goto free_buffer; + } + } + + struct grub_nvme_namespace *ns = grub_malloc (sizeof (*ns)); + if (!ns) + { + goto free_buffer; + } + grub_memset (ns, 0, sizeof (*ns)); + ns->ctrl = ctrl; + ns->ns_id = ns_id; + ns->lba_count = id->nsze; + + struct grub_nvme_lba_format *fmt = &id->lbaf[current_lba_format]; + + ns->block_size = 1U << fmt->lbads; + ns->metadata_size = fmt->ms; + + if (ns->block_size > GRUB_NVME_PAGE_SIZE) + { + /* If we see devices that trigger this path, we need to increase our + buffer size. */ + grub_free (ns); + goto free_buffer; + } + + if (mdts) + { + ns->max_req_size = + ((1U << mdts) * GRUB_NVME_PAGE_SIZE) / ns->block_size; + grub_dprintf ("nvme", "NVME NS %u max request size: %d sectors\n", + ns_id, ns->max_req_size); + } + else + { + ns->max_req_size = -1U; + } + + ns->devname = grub_xasprintf ("nvme%un%u", ctrl->ctrlnum, ns_id); + ns->nsnum = grub_nvme_nscnt++; + + grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST (ns)); + +free_buffer: + grub_free (id); +} + + +/* Release memory allocated for a completion queue */ +static void +nvme_destroy_cq (struct grub_nvme_cq *cq) +{ + grub_free (cq->cqe); + cq->cqe = NULL; +} + +/* Release memory allocated for a submission queue */ +static void +nvme_destroy_sq (struct grub_nvme_sq *sq) +{ + grub_free (sq->sqe); + sq->sqe = NULL; +} + +/* Returns 0 on success. */ +static int +nvme_create_io_cq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_cq *cq, + grub_uint16_t q_idx) +{ + int rc; + struct grub_nvme_sqe *cmd_create_cq; + grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); + if (length > GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe)) + length = GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe); + + rc = nvme_init_cq (ctrl, cq, q_idx, length); + if (rc) + { + goto err; + } + + cmd_create_cq = nvme_get_next_sqe (&ctrl->admin_sq, + GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, + NULL, cq->cqe, NULL); + if (!cmd_create_cq) + { + goto err_destroy_cq; + } + + cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1); + cmd_create_cq->dword[11] = 1 /* physically contiguous */ ; + + nvme_commit_sqe (&ctrl->admin_sq); + + struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq); + + if (!nvme_is_cqe_success (&cqe)) + { + grub_dprintf ("nvme", "create io cq failed: %08x %08x %08x %08x\n", + cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); + + goto err_destroy_cq; + } + + return 0; + +err_destroy_cq: + nvme_destroy_cq (cq); +err: + return -1; +} + +/* Returns 0 on success. */ +static int +nvme_create_io_sq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_sq *sq, + grub_uint16_t q_idx, struct grub_nvme_cq *cq) +{ + int rc; + struct grub_nvme_sqe *cmd_create_sq; + grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); + if (length > GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe)) + length = GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe); + + rc = nvme_init_sq (ctrl, sq, q_idx, length, cq); + if (rc) + { + goto err; + } + + cmd_create_sq = nvme_get_next_sqe (&ctrl->admin_sq, + GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, + NULL, sq->sqe, NULL); + if (!cmd_create_sq) + { + goto err_destroy_sq; + } + + cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1); + cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */ ; + grub_dprintf ("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq, + cmd_create_sq->dword[10], cmd_create_sq->dword[11]); + + nvme_commit_sqe (&ctrl->admin_sq); + + struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq); + + if (!nvme_is_cqe_success (&cqe)) + { + grub_dprintf ("nvme", "create io sq failed: %08x %08x %08x %08x\n", + cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); + goto err_destroy_sq; + } + + return 0; + +err_destroy_sq: + nvme_destroy_sq (sq); +err: + return -1; +} + +/* Reads count sectors into buf. The buffer cannot cross page boundaries. */ +static int +nvme_io_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, void *prp1, + void *prp2, grub_uint16_t count, int write) +{ + if (((grub_uint32_t) prp1 & 0x3) || ((grub_uint32_t) prp2 & 0x3)) + { + /* Buffer is misaligned */ + return -1; + } + + struct grub_nvme_sqe *io_read = nvme_get_next_sqe (&ns->ctrl->io_sq, + write ? + GRUB_NVME_SQE_OPC_IO_WRITE + : + GRUB_NVME_SQE_OPC_IO_READ, + NULL, prp1, prp2); + io_read->nsid = ns->ns_id; + io_read->dword[10] = (grub_uint32_t) lba; + io_read->dword[11] = (grub_uint32_t) (lba >> 32); + io_read->dword[12] = (1U << 31 /* limited retry */ ) | (count - 1); + + nvme_commit_sqe (&ns->ctrl->io_sq); + + struct grub_nvme_cqe cqe = nvme_wait (&ns->ctrl->io_sq); + + if (!nvme_is_cqe_success (&cqe)) + { + grub_dprintf ("nvme", "read io: %08x %08x %08x %08x\n", + cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); + + return -1; + } + + grub_dprintf ("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id, + write ? "write" : "read", lba, count); + return count; +} + +/* Transfer up to one page of data using the internal dma bounce buffer */ +static int +nvme_bounce_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, + void *buf, grub_uint16_t count, int write) +{ + grub_uint16_t const max_blocks = GRUB_NVME_PAGE_SIZE / ns->block_size; + grub_uint16_t blocks = count < max_blocks ? count : max_blocks; + + if (write) + grub_memcpy (nvme_dma_buffer, buf, blocks * ns->block_size); + + int res = nvme_io_xfer (ns, lba, nvme_dma_buffer, NULL, blocks, write); + + if (!write && res >= 0) + grub_memcpy (buf, nvme_dma_buffer, res * ns->block_size); + + return res; +} + +#define GRUB_NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */ + +/* Transfer data using page list (if applicable) */ +static int +nvme_prpl_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, void *buf, + grub_uint16_t count, int write) +{ + grub_uint32_t base = (long) buf; + grub_int32_t size; + + if (count > ns->max_req_size) + count = ns->max_req_size; + + size = count * ns->block_size; + /* Special case for transfers that fit into PRP1, but are unaligned */ + if (((size + (base & ~GRUB_NVME_PAGE_MASK)) <= GRUB_NVME_PAGE_SIZE)) + goto single; + + /* Every request has to be page aligned */ + if (base & ~GRUB_NVME_PAGE_MASK) + goto bounce; + + /* Make sure a full block fits into the last chunk */ + if (size & (ns->block_size - 1ULL)) + goto bounce; + + /* Build PRP list if we need to describe more than 2 pages */ + if ((ns->block_size * count) > (GRUB_NVME_PAGE_SIZE * 2)) + { + grub_uint32_t prpl_len = 0; + grub_uint64_t *prpl = nvme_dma_buffer; + int first_page = 1; + for (; size > 0; + base += GRUB_NVME_PAGE_SIZE, size -= GRUB_NVME_PAGE_SIZE) + { + if (first_page) + { + /* First page is special */ + first_page = 0; + continue; + } + if (prpl_len >= GRUB_NVME_MAX_PRPL_ENTRIES) + goto bounce; + prpl[prpl_len++] = base; + } + return nvme_io_xfer (ns, lba, buf, prpl, count, write); + } + + /* Directly embed the 2nd page if we only need 2 pages */ + if ((ns->block_size * count) > GRUB_NVME_PAGE_SIZE) + return nvme_io_xfer (ns, lba, buf, (char *) buf + GRUB_NVME_PAGE_SIZE, + count, write); + +single: + /* One page is enough, don't expose anything else */ + return nvme_io_xfer (ns, lba, buf, NULL, count, write); + +bounce: + /* Use bounce buffer to make transfer */ + return nvme_bounce_xfer (ns, lba, buf, count, write); +} + +static int +nvme_create_io_queues (struct grub_nvme_ctrl *ctrl) +{ + if (nvme_create_io_cq (ctrl, &ctrl->io_cq, 3)) + goto err; + + if (nvme_create_io_sq (ctrl, &ctrl->io_sq, 2, &ctrl->io_cq)) + goto err_free_cq; + + return 0; + +err_free_cq: + nvme_destroy_cq (&ctrl->io_cq); +err: + return -1; +} + +/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */ +static int +nvme_wait_csts_rdy (struct grub_nvme_ctrl *ctrl, unsigned rdy) +{ + const grub_uint64_t limit_ms = + grub_get_time_ms () + 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU); + grub_uint32_t csts; + + while (rdy != ((csts = ctrl->reg->csts) & GRUB_NVME_CSTS_RDY)) + { + if (csts & GRUB_NVME_CSTS_FATAL) + { + grub_dprintf ("nvme", + "NVMe fatal error during controller shutdown\n"); + return -1; + } + + if (limit_ms < grub_get_time_ms ()) + { + grub_dprintf ("nvme", "NVMe timeout waiting for CSTS.RDY\n"); + return -1; + } + } + + return 0; +} + +static int +grub_nvme_controller_enable (struct grub_nvme_ctrl *ctrl) +{ + grub_pci_address_t addr; + int rc; + + addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND); + grub_pci_write_word (addr, + grub_pci_read_word (addr) | + GRUB_PCI_COMMAND_BUS_MASTER); + + /* Turn the controller off. */ + ctrl->reg->cc = 0; + if (nvme_wait_csts_rdy (ctrl, 0)) + { + grub_dprintf ("nvme", "NVMe fatal error during controller shutdown\n"); + return -1; + } + + ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF); + + rc = nvme_init_cq (ctrl, &ctrl->admin_cq, 1, + GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe)); + if (rc) + { + return -1; + } + + rc = nvme_init_sq (ctrl, &ctrl->admin_sq, 0, + GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_sqe), + &ctrl->admin_cq); + if (rc) + { + goto err_destroy_admin_cq; + } + + ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16 + | ctrl->admin_sq.common.mask; + + ctrl->reg->asq = (grub_uint32_t) ctrl->admin_sq.sqe; + ctrl->reg->acq = (grub_uint32_t) ctrl->admin_cq.cqe; + + grub_dprintf ("nvme", " admin submission queue: %p\n", ctrl->admin_sq.sqe); + grub_dprintf ("nvme", " admin completion queue: %p\n", ctrl->admin_cq.cqe); + + ctrl->reg->cc = GRUB_NVME_CC_EN | (GRUB_NVME_CQE_SIZE_LOG << 20) + | (GRUB_NVME_SQE_SIZE_LOG << 16 /* IOSQES */ ); + + if (nvme_wait_csts_rdy (ctrl, 1)) + { + grub_dprintf ("nvme", "NVMe fatal error while enabling controller\n"); + goto err_destroy_admin_sq; + } + + /* The admin queue is set up and the controller is ready. Let's figure out + what namespaces we have. */ + + struct grub_nvme_identify_ctrl *identify = nvme_admin_identify_ctrl (ctrl); + + if (!identify) + { + grub_dprintf ("nvme", "NVMe couldn't identify controller.\n"); + goto err_destroy_admin_sq; + } + + grub_dprintf ("nvme", "NVMe has %u namespace%s.\n", + identify->nn, (identify->nn == 1) ? "" : "s"); + + ctrl->ns_count = identify->nn; + grub_uint8_t mdts = identify->mdts; + grub_free (identify); + + if ((ctrl->ns_count == 0) || nvme_create_io_queues (ctrl)) + { + /* No point to continue, if the controller says it doesn't have + namespaces or we couldn't create I/O queues. */ + goto err_destroy_admin_sq; + } + + /* Give the controller a global number */ + ctrl->ctrlnum = grub_nvme_ctrlcnt++; + + /* Populate namespace IDs */ + for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) + { + nvme_probe_ns (ctrl, ns_idx, mdts); + } + + grub_dprintf ("nvme", "NVMe initialization complete!\n"); + return 0; + +err_destroy_admin_sq: + nvme_destroy_sq (&ctrl->admin_sq); +err_destroy_admin_cq: + nvme_destroy_cq (&ctrl->admin_cq); + return -1; +} + +static int +grub_nvme_pci_probe (grub_pci_device_t dev, grub_pci_id_t pciid + __attribute__((unused)), void *data + __attribute__((unused))) +{ + grub_pci_address_t addr; + grub_uint32_t class, bar, version; + struct grub_nvme_reg volatile *reg; + + class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS)); + if (class >> 16 != 0x0108) + return 0; + if ((class >> 8 & 0xff) != 2) + { /* as of NVM 1.0e */ + grub_dprintf ("nvme", "Found incompatble NVMe: prog-if=%02x\n", + class >> 8 & 0xff); + return 0; + } + + bar = + grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_ADDRESS_REG0)); + reg = + grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK, + sizeof (*reg)); + + addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND); + grub_pci_write_word (addr, + grub_pci_read_word (addr) | + GRUB_PCI_COMMAND_MEM_ENABLED); + + version = reg->vs; + grub_dprintf ("nvme", "Found NVMe controller with version %u.%u.%u.\n", + version >> 16, (version >> 8) & 0xFF, version & 0xFF); + grub_dprintf ("nvme", " Capabilities %016llx\n", reg->cap); + + if (~reg->cap & GRUB_NVME_CAP_CSS_NVME) + { + grub_dprintf ("nvme", + "Controller doesn't speak NVMe command set. Skipping.\n"); + goto err; + } + + struct grub_nvme_ctrl *ctrl = grub_malloc (sizeof (*ctrl)); + if (!ctrl) + goto err; + + grub_memset (ctrl, 0, sizeof (*ctrl)); + + ctrl->reg = reg; + ctrl->pci = dev; + + if (grub_nvme_controller_enable (ctrl)) + goto err_free_ctrl; + + return 0; + +err_free_ctrl: + grub_free (ctrl); +err: + grub_dprintf ("nvme", "Failed to enable NVMe controller.\n"); + return 0; +} + +static int +grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data, + grub_disk_pull_t pull) +{ + struct grub_nvme_namespace *ns; + + if (pull != GRUB_DISK_PULL_NONE) + return 0; + + FOR_LIST_ELEMENTS (ns, grub_nvme_namespaces) + if (hook (ns->devname, hook_data)) + return 1; + + return 0; +} + +static grub_err_t +grub_nvme_open (const char *name __attribute ((unused)), + grub_disk_t disk __attribute ((unused))) +{ + struct grub_nvme_namespace *ns; + + FOR_LIST_ELEMENTS (ns, grub_nvme_namespaces) + if (grub_strcmp (ns->devname, name) == 0) + break; + + if (!ns) + return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device"); + + disk->total_sectors = ns->lba_count; + disk->max_agglomerate = ns->max_req_size; + + disk->id = ns->nsnum; /* global id of the namespace */ + + disk->data = ns; + + return 0; +} + +static grub_err_t +nvme_readwrite (struct grub_nvme_namespace *ns, grub_disk_addr_t sector, + grub_size_t num_sectors, char *buf, int write) +{ + for (grub_size_t i = 0; i < num_sectors;) + { + grub_uint16_t blocks_remaining = num_sectors - i; + char *op_buf = buf + i * ns->block_size; + int blocks = + nvme_prpl_xfer (ns, sector + i, op_buf, blocks_remaining, write); + if (blocks < 0) + return GRUB_ERR_IO; + i += blocks; + } + return GRUB_ERR_NONE; +} + +static grub_err_t +grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector, + grub_size_t num_sectors, char *buf) +{ + return nvme_readwrite ((struct grub_nvme_namespace *) disk->data, sector, + num_sectors, buf, 0); +} + +static grub_err_t +grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector, + grub_size_t num_sectors, const char *buf) +{ + return nvme_readwrite ((struct grub_nvme_namespace *) disk->data, sector, + num_sectors, (char *) buf, 1); +} + +static struct grub_disk_dev grub_nvme_dev = { + .name = "nvme", + .id = GRUB_DISK_DEVICE_NVME_ID, + .disk_iterate = grub_nvme_iterate, + .disk_open = grub_nvme_open, + .disk_read = grub_nvme_read, + .disk_write = grub_nvme_write, + .next = 0 +}; + +GRUB_MOD_INIT (nvme) +{ + grub_stop_disk_firmware (); + grub_pci_iterate (grub_nvme_pci_probe, NULL); + grub_disk_dev_register (&grub_nvme_dev); +} + +GRUB_MOD_FINI (nvme) +{ + grub_disk_dev_unregister (&grub_nvme_dev); +} diff --git a/include/grub/disk.h b/include/grub/disk.h index fbf23df7f..186e76f0b 100644 --- a/include/grub/disk.h +++ b/include/grub/disk.h @@ -52,6 +52,7 @@ enum grub_disk_dev_id GRUB_DISK_DEVICE_UBOOTDISK_ID, GRUB_DISK_DEVICE_XEN, GRUB_DISK_DEVICE_OBDISK_ID, + GRUB_DISK_DEVICE_NVME_ID }; struct grub_disk; diff --git a/include/grub/nvme.h b/include/grub/nvme.h new file mode 100644 index 000000000..5f66aa232 --- /dev/null +++ b/include/grub/nvme.h @@ -0,0 +1,225 @@ +/* nvme.h - NVMe datastructures and constants */ +/* + * Copyright 2017 Amazon.com, Inc. or its affiliates. + * + * This file may be distributed under the terms of the GNU LGPLv3 license. + */ + +#ifndef GRUB_NVME_HEADER +#define GRUB_NVME_HEADER 1 + +#include <grub/pci.h> +#include <grub/types.h> + +/* The register file of a NVMe host controller. This struct follows the naming + scheme in the NVMe specification. */ +struct grub_nvme_reg +{ + grub_uint64_t cap; /* controller capabilities */ + grub_uint32_t vs; /* version */ + grub_uint32_t intms; /* interrupt mask set */ + grub_uint32_t intmc; /* interrupt mask clear */ + grub_uint32_t cc; /* controller configuration */ + grub_uint32_t _res0; + grub_uint32_t csts; /* controller status */ + grub_uint32_t _res1; + grub_uint32_t aqa; /* admin queue attributes */ + grub_uint64_t asq; /* admin submission queue base address */ + grub_uint64_t acq; /* admin completion queue base address */ +}; + +/* Submission queue entry */ +struct grub_nvme_sqe +{ + union + { + grub_uint32_t dword[16]; + struct + { + grub_uint32_t cdw0; /* Command DWORD 0 */ + grub_uint32_t nsid; /* Namespace ID */ + grub_uint64_t _res0; + grub_uint64_t mptr; /* metadata ptr */ + + grub_uint64_t dptr_prp1; + grub_uint64_t dptr_prp2; + }; + }; +}; + +/* Completion queue entry */ +struct grub_nvme_cqe +{ + union + { + grub_uint32_t dword[4]; + struct + { + grub_uint32_t cdw0; + grub_uint32_t _res0; + grub_uint16_t sq_head; + grub_uint16_t sq_id; + grub_uint16_t cid; + grub_uint16_t status; + }; + }; +}; + +/* The common part of every submission or completion queue. */ +struct grub_nvme_queue +{ + grub_uint32_t *dbl; /* doorbell */ + grub_uint16_t mask; /* length - 1 */ +}; + +struct grub_nvme_cq +{ + struct grub_nvme_queue common; + struct grub_nvme_cqe *cqe; + + /* We have read upto (but not including) this entry in the queue. */ + grub_uint16_t head; + + /* The current phase bit the controller uses to indicate that it has written + a new entry. This is inverted after each wrap. */ + unsigned phase:1; +}; + +struct grub_nvme_sq +{ + struct grub_nvme_queue common; + struct grub_nvme_sqe *sqe; + + /* Corresponding completion queue. We only support a single SQ per CQ. */ + struct grub_nvme_cq *cq; + + /* The last entry the controller has fetched. */ + grub_uint16_t head; + + /* The last value we have written to the tail doorbell. */ + grub_uint16_t tail; +}; + +struct grub_nvme_ctrl +{ + grub_pci_device_t pci; + struct grub_nvme_reg volatile *reg; + + grub_uint32_t ctrlnum; + + grub_uint32_t doorbell_stride; /* in bytes */ + + struct grub_nvme_sq admin_sq; + struct grub_nvme_cq admin_cq; + + grub_uint32_t ns_count; + + struct grub_nvme_sq io_sq; + struct grub_nvme_cq io_cq; +}; + +struct grub_nvme_namespace +{ + struct grub_nvme_namespace *next; + struct grub_nvme_namespace **prev; + + char *devname; + + grub_uint32_t nsnum; + + struct grub_nvme_ctrl *ctrl; + + grub_uint32_t ns_id; + + grub_uint64_t lba_count; /* The total amount of sectors. */ + + grub_uint32_t block_size; + grub_uint32_t metadata_size; + grub_uint32_t max_req_size; +}; + +/* Data structures for NVMe admin identify commands */ + +struct grub_nvme_identify_ctrl +{ + grub_uint16_t vid; + grub_uint16_t ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + + grub_uint8_t rab; + grub_uint8_t ieee[3]; + grub_uint8_t cmic; + grub_uint8_t mdts; + + char _boring[516 - 78]; + + grub_uint32_t nn; /* number of namespaces */ +}; + +struct grub_nvme_identify_ns_list +{ + grub_uint32_t ns_id[1024]; +}; + +struct grub_nvme_lba_format +{ + grub_uint16_t ms; + grub_uint8_t lbads; + grub_uint8_t rp; +}; + +struct grub_nvme_identify_ns +{ + grub_uint64_t nsze; + grub_uint64_t ncap; + grub_uint64_t nuse; + grub_uint8_t nsfeat; + grub_uint8_t nlbaf; + grub_uint8_t flbas; + + char _boring[128 - 27]; + + struct grub_nvme_lba_format lbaf[16]; +}; + +union nvme_identify +{ + struct grub_nvme_identify_ns ns; + struct grub_nvme_identify_ctrl ctrl; + struct grub_nvme_identify_ns_list ns_list; +}; + +/* NVMe constants */ + +#define GRUB_NVME_CAP_CSS_NVME (1ULL << 37) + +#define GRUB_NVME_CSTS_FATAL (1U << 1) +#define GRUB_NVME_CSTS_RDY (1U << 0) + +#define GRUB_NVME_CC_EN (1U << 0) + +#define GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U +#define GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U +#define GRUB_NVME_SQE_OPC_ADMIN_IDENTIFY 6U + +#define GRUB_NVME_SQE_OPC_IO_WRITE 1U +#define GRUB_NVME_SQE_OPC_IO_READ 2U + +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_NS 0U +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_CTRL 1U +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U + +#define GRUB_NVME_CQE_DW3_P (1U << 16) + +#define GRUB_NVME_PAGE_SIZE 4096 +#define GRUB_NVME_PAGE_MASK ~(GRUB_NVME_PAGE_SIZE - 1) + +/* Length for the queue entries. */ +#define GRUB_NVME_SQE_SIZE_LOG 6 +#define GRUB_NVME_CQE_SIZE_LOG 4 + +#endif + +/* EOF */ -- 2.39.2 _______________________________________________ Grub-devel mailing list Grub-devel@gnu.org https://lists.gnu.org/mailman/listinfo/grub-devel