Thank you for your effort but we already have nvme driver in the review queue. See my email from 16th of May
Le dim. 13 oct. 2024, 22:38, <k...@mkukri.xyz> a écrit : > From: Mate Kukri <k...@mkukri.xyz> > > It is based on existing SeaBIOS code, so the license is LGPLv3. > > Tested as a coreboot paload on the following targets: > - QEMU Q35 with emulated NVMe disk > - ASUS H610M-K with Intel 660p NVME disk > > Signed-off-by: Mate Kukri <k...@mkukri.xyz> > --- > Makefile.am | 2 +- > grub-core/Makefile.core.def | 6 + > grub-core/commands/nativedisk.c | 1 + > grub-core/disk/nvme.c | 861 ++++++++++++++++++++++++++++++++ > include/grub/disk.h | 1 + > include/grub/nvme.h | 225 +++++++++ > 6 files changed, 1095 insertions(+), 1 deletion(-) > create mode 100644 grub-core/disk/nvme.c > create mode 100644 include/grub/nvme.h > > diff --git a/Makefile.am b/Makefile.am > index 43635d5ff..2c86dbbf6 100644 > --- a/Makefile.am > +++ b/Makefile.am > @@ -434,7 +434,7 @@ if COND_i386_coreboot > FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst) > default_payload.elf: grub-mkstandalone grub-mkimage FORCE > test -f $@ && rm $@ || true > - pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O > i386-coreboot -o $@ --modules='ahci pata ehci uhci ohci usb_keyboard usbms > part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' > --install-modules='ls linux search configfile normal cbtime cbls memrw iorw > minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain > test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu > $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= > --themes= --locales= -d grub-core/ > /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg > + pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O > i386-coreboot -o $@ --modules='ahci pata nvme ehci uhci ohci usb_keyboard > usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' > --install-modules='ls linux search configfile normal cbtime cbls memrw iorw > minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain > test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu > $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= > --themes= --locales= -d grub-core/ > /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg > endif > > endif > diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def > index 063ef5dd7..7b4cf972a 100644 > --- a/grub-core/Makefile.core.def > +++ b/grub-core/Makefile.core.def > @@ -2606,3 +2606,9 @@ module = { > enable = efi; > depends = part_gpt; > }; > + > +module = { > + name = nvme; > + common = disk/nvme.c; > + enable = pci; > +}; > diff --git a/grub-core/commands/nativedisk.c > b/grub-core/commands/nativedisk.c > index 580c8d3b0..a2c766fbd 100644 > --- a/grub-core/commands/nativedisk.c > +++ b/grub-core/commands/nativedisk.c > @@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative) > case GRUB_DISK_DEVICE_ATA_ID: > case GRUB_DISK_DEVICE_SCSI_ID: > case GRUB_DISK_DEVICE_XEN: > + case GRUB_DISK_DEVICE_NVME_ID: > if (getnative) > break; > /* FALLTHROUGH */ > diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c > new file mode 100644 > index 000000000..b6a886464 > --- /dev/null > +++ b/grub-core/disk/nvme.c > @@ -0,0 +1,861 @@ > +/* nvme.c - NVMe support. */ > +/* > + * Based on SeaBIOS NVMe driver, ported to GRUB2 by Mate Kukri > + * > + * Copyright 2017 Amazon.com, Inc. or its affiliates. > + * Copyright 2024 Mate Kukri <k...@mkukri.xyz> > + * > + * This file may be distributed under the terms of the GNU LGPLv3 license. > + */ > + > +#include <grub/disk.h> > +#include <grub/dl.h> > +#include <grub/nvme.h> > +#include <grub/pci.h> > +#include <grub/time.h> > + > +GRUB_MOD_LICENSE ("GPLv3"); /* LGPLv3 is GPLv3 compatible */ > + > +static grub_uint32_t grub_nvme_ctrlcnt; > +static grub_uint32_t grub_nvme_nscnt; > + > +static struct grub_nvme_namespace *grub_nvme_namespaces; > + > +/* Page aligned "dma bounce buffer" of size GRUB_NVME_PAGE_SIZE */ > +static void *nvme_dma_buffer; > + > +static void * > +zalloc_page_aligned (grub_uint32_t size) > +{ > + void *res = grub_memalign (GRUB_NVME_PAGE_SIZE, size); > + if (res) > + grub_memset (res, 0, size); > + return res; > +} > + > +static void > +nvme_init_queue_common (struct grub_nvme_ctrl *ctrl, > + struct grub_nvme_queue *q, grub_uint16_t q_idx, > + grub_uint16_t length) > +{ > + grub_memset (q, 0, sizeof (*q)); > + q->dbl = > + (grub_uint32_t *) ((char *) ctrl->reg + 0x1000 + > + q_idx * ctrl->doorbell_stride); > + grub_dprintf ("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl); > + q->mask = length - 1; > +} > + > +static int > +nvme_init_sq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_sq *sq, > + grub_uint16_t q_idx, grub_uint16_t length, > + struct grub_nvme_cq *cq) > +{ > + nvme_init_queue_common (ctrl, &sq->common, q_idx, length); > + sq->sqe = zalloc_page_aligned (sizeof (*sq->sqe) * length); > + > + if (!sq->sqe) > + { > + return -1; > + } > + > + grub_dprintf ("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe); > + sq->cq = cq; > + sq->head = 0; > + sq->tail = 0; > + > + return 0; > +} > + > +static int > +nvme_init_cq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_cq *cq, > + grub_uint16_t q_idx, grub_uint16_t length) > +{ > + nvme_init_queue_common (ctrl, &cq->common, q_idx, length); > + cq->cqe = zalloc_page_aligned (sizeof (*cq->cqe) * length); > + if (!cq->cqe) > + { > + return -1; > + } > + > + cq->head = 0; > + > + /* All CQE phase bits are initialized to zero. This means initially we > wait > + for the host controller to set these to 1. */ > + cq->phase = 1; > + > + return 0; > +} > + > +static int > +nvme_poll_cq (struct grub_nvme_cq *cq) > +{ > + grub_uint32_t dw3 = *(volatile grub_uint32_t *) > &cq->cqe[cq->head].dword[3]; > + return (!!(dw3 & GRUB_NVME_CQE_DW3_P) == cq->phase); > +} > + > +static int > +nvme_is_cqe_success (const struct grub_nvme_cqe *cqe) > +{ > + return ((cqe->status >> 1) & 0xFF) == 0; > +} > + > +static struct grub_nvme_cqe > +nvme_error_cqe (void) > +{ > + struct grub_nvme_cqe r; > + > + /* 0xFF is a vendor specific status code != success. Should be okay for > + indicating failure. */ > + grub_memset (&r, 0xFF, sizeof (r)); > + return r; > +} > + > +static struct grub_nvme_cqe > +nvme_consume_cqe (struct grub_nvme_sq *sq) > +{ > + struct grub_nvme_cq *cq = sq->cq; > + > + if (!nvme_poll_cq (cq)) > + { > + /* Cannot consume a completion queue entry, if there is none ready. > */ > + return nvme_error_cqe (); > + } > + > + struct grub_nvme_cqe *cqe = &cq->cqe[cq->head]; > + grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask; > + grub_dprintf ("nvme", "cq %p head %u -> %u\n", cq, cq->head, > cq_next_head); > + if (cq_next_head < cq->head) > + { > + grub_dprintf ("nvme", "cq %p wrap\n", cq); > + cq->phase = ~cq->phase; > + } > + cq->head = cq_next_head; > + > + /* Update the submission queue head. */ > + if (cqe->sq_head != sq->head) > + { > + sq->head = cqe->sq_head; > + grub_dprintf ("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head); > + } > + > + /* Tell the controller that we consumed the completion. */ > + *(volatile grub_uint32_t *) cq->common.dbl = cq->head; > + > + return *cqe; > +} > + > +static struct grub_nvme_cqe > +nvme_wait (struct grub_nvme_sq *sq) > +{ > + const grub_uint64_t limit_ms = grub_get_time_ms () + 5000; > + > + while (!nvme_poll_cq (sq->cq)) > + { > + if (limit_ms < grub_get_time_ms ()) > + { > + grub_dprintf ("nvme", "NVME SQ wait timeout\n"); > + return nvme_error_cqe (); > + } > + } > + > + return nvme_consume_cqe (sq); > +} > + > +/* Returns the next submission queue entry (or NULL if the queue is > full). It > + also fills out Command Dword 0 and clears the rest. */ > +static struct grub_nvme_sqe * > +nvme_get_next_sqe (struct grub_nvme_sq *sq, grub_uint8_t opc, void > *metadata, > + void *data, void *data2) > +{ > + if (((sq->head + 1) & sq->common.mask) == sq->tail) > + { > + grub_dprintf ("nvme", "submission queue is full\n"); > + return NULL; > + } > + > + struct grub_nvme_sqe *sqe = &sq->sqe[sq->tail]; > + grub_dprintf ("nvme", "sq %p next_sqe %u\n", sq, sq->tail); > + > + grub_memset (sqe, 0, sizeof (*sqe)); > + sqe->cdw0 = opc | (sq->tail << 16 /* CID */ ); > + sqe->mptr = (grub_uint32_t) metadata; > + sqe->dptr_prp1 = (grub_uint32_t) data; > + sqe->dptr_prp2 = (grub_uint32_t) data2; > + > + return sqe; > +} > + > +/* Call this after you've filled out an sqe that you've got from > nvme_get_next_sqe. */ > +static void > +nvme_commit_sqe (struct grub_nvme_sq *sq) > +{ > + grub_dprintf ("nvme", "sq %p commit_sqe %u\n", sq, sq->tail); > + sq->tail = (sq->tail + 1) & sq->common.mask; > + *(volatile grub_uint32_t *) sq->common.dbl = sq->tail; > +} > + > +/* Perform an identify command on the admin queue and return the resulting > + buffer. This may be a NULL pointer, if something failed. This function > + cannot be used after initialization, because it uses buffers in tmp > zone. */ > +static union nvme_identify * > +nvme_admin_identify (struct grub_nvme_ctrl *ctrl, grub_uint8_t cns, > + grub_uint32_t nsid) > +{ > + union nvme_identify *identify_buf = zalloc_page_aligned (4096); > + if (!identify_buf) > + return NULL; > + > + struct grub_nvme_sqe *cmd_identify; > + cmd_identify = nvme_get_next_sqe (&ctrl->admin_sq, > + GRUB_NVME_SQE_OPC_ADMIN_IDENTIFY, NULL, > + identify_buf, NULL); > + if (!cmd_identify) > + goto error; > + > + cmd_identify->nsid = nsid; > + cmd_identify->dword[10] = cns; > + > + nvme_commit_sqe (&ctrl->admin_sq); > + > + struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq); > + > + if (!nvme_is_cqe_success (&cqe)) > + { > + goto error; > + } > + > + return identify_buf; > +error: > + grub_free (identify_buf); > + return NULL; > +} > + > +static struct grub_nvme_identify_ctrl * > +nvme_admin_identify_ctrl (struct grub_nvme_ctrl *ctrl) > +{ > + return &nvme_admin_identify (ctrl, GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, > + 0)->ctrl; > +} > + > +static struct grub_nvme_identify_ns * > +nvme_admin_identify_ns (struct grub_nvme_ctrl *ctrl, grub_uint32_t ns_id) > +{ > + return &nvme_admin_identify (ctrl, GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_NS, > + ns_id)->ns; > +} > + > +static void > +nvme_probe_ns (struct grub_nvme_ctrl *ctrl, grub_uint32_t ns_idx, > + grub_uint8_t mdts) > +{ > + grub_uint32_t ns_id = ns_idx + 1; > + > + struct grub_nvme_identify_ns *id = nvme_admin_identify_ns (ctrl, ns_id); > + if (!id) > + { > + grub_dprintf ("nvme", "NVMe couldn't identify namespace %u.\n", > ns_id); > + goto free_buffer; > + } > + > + grub_uint8_t current_lba_format = id->flbas & 0xF; > + if (current_lba_format > id->nlbaf) > + { > + grub_dprintf ("nvme", > + "NVMe NS %u: current LBA format %u is beyond what the " > + " namespace supports (%u)?\n", ns_id, > current_lba_format, > + id->nlbaf + 1); > + goto free_buffer; > + } > + > + if (!id->nsze) > + { > + grub_dprintf ("nvme", "NVMe NS %u is inactive.\n", ns_id); > + goto free_buffer; > + } > + > + if (!nvme_dma_buffer) > + { > + nvme_dma_buffer = zalloc_page_aligned (GRUB_NVME_PAGE_SIZE); > + if (!nvme_dma_buffer) > + { > + goto free_buffer; > + } > + } > + > + struct grub_nvme_namespace *ns = grub_malloc (sizeof (*ns)); > + if (!ns) > + { > + goto free_buffer; > + } > + grub_memset (ns, 0, sizeof (*ns)); > + ns->ctrl = ctrl; > + ns->ns_id = ns_id; > + ns->lba_count = id->nsze; > + > + struct grub_nvme_lba_format *fmt = &id->lbaf[current_lba_format]; > + > + ns->block_size = 1U << fmt->lbads; > + ns->metadata_size = fmt->ms; > + > + if (ns->block_size > GRUB_NVME_PAGE_SIZE) > + { > + /* If we see devices that trigger this path, we need to increase our > + buffer size. */ > + grub_free (ns); > + goto free_buffer; > + } > + > + if (mdts) > + { > + ns->max_req_size = > + ((1U << mdts) * GRUB_NVME_PAGE_SIZE) / ns->block_size; > + grub_dprintf ("nvme", "NVME NS %u max request size: %d sectors\n", > + ns_id, ns->max_req_size); > + } > + else > + { > + ns->max_req_size = -1U; > + } > + > + ns->devname = grub_xasprintf ("nvme%un%u", ctrl->ctrlnum, ns_id); > + ns->nsnum = grub_nvme_nscnt++; > + > + grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST > (ns)); > + > +free_buffer: > + grub_free (id); > +} > + > + > +/* Release memory allocated for a completion queue */ > +static void > +nvme_destroy_cq (struct grub_nvme_cq *cq) > +{ > + grub_free (cq->cqe); > + cq->cqe = NULL; > +} > + > +/* Release memory allocated for a submission queue */ > +static void > +nvme_destroy_sq (struct grub_nvme_sq *sq) > +{ > + grub_free (sq->sqe); > + sq->sqe = NULL; > +} > + > +/* Returns 0 on success. */ > +static int > +nvme_create_io_cq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_cq *cq, > + grub_uint16_t q_idx) > +{ > + int rc; > + struct grub_nvme_sqe *cmd_create_cq; > + grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); > + if (length > GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe)) > + length = GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe); > + > + rc = nvme_init_cq (ctrl, cq, q_idx, length); > + if (rc) > + { > + goto err; > + } > + > + cmd_create_cq = nvme_get_next_sqe (&ctrl->admin_sq, > + GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, > + NULL, cq->cqe, NULL); > + if (!cmd_create_cq) > + { > + goto err_destroy_cq; > + } > + > + cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1); > + cmd_create_cq->dword[11] = 1 /* physically contiguous */ ; > + > + nvme_commit_sqe (&ctrl->admin_sq); > + > + struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq); > + > + if (!nvme_is_cqe_success (&cqe)) > + { > + grub_dprintf ("nvme", "create io cq failed: %08x %08x %08x %08x\n", > + cqe.dword[0], cqe.dword[1], cqe.dword[2], > cqe.dword[3]); > + > + goto err_destroy_cq; > + } > + > + return 0; > + > +err_destroy_cq: > + nvme_destroy_cq (cq); > +err: > + return -1; > +} > + > +/* Returns 0 on success. */ > +static int > +nvme_create_io_sq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_sq *sq, > + grub_uint16_t q_idx, struct grub_nvme_cq *cq) > +{ > + int rc; > + struct grub_nvme_sqe *cmd_create_sq; > + grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); > + if (length > GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe)) > + length = GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe); > + > + rc = nvme_init_sq (ctrl, sq, q_idx, length, cq); > + if (rc) > + { > + goto err; > + } > + > + cmd_create_sq = nvme_get_next_sqe (&ctrl->admin_sq, > + GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, > + NULL, sq->sqe, NULL); > + if (!cmd_create_sq) > + { > + goto err_destroy_sq; > + } > + > + cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1); > + cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */ ; > + grub_dprintf ("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq, > + cmd_create_sq->dword[10], cmd_create_sq->dword[11]); > + > + nvme_commit_sqe (&ctrl->admin_sq); > + > + struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq); > + > + if (!nvme_is_cqe_success (&cqe)) > + { > + grub_dprintf ("nvme", "create io sq failed: %08x %08x %08x %08x\n", > + cqe.dword[0], cqe.dword[1], cqe.dword[2], > cqe.dword[3]); > + goto err_destroy_sq; > + } > + > + return 0; > + > +err_destroy_sq: > + nvme_destroy_sq (sq); > +err: > + return -1; > +} > + > +/* Reads count sectors into buf. The buffer cannot cross page boundaries. > */ > +static int > +nvme_io_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, void > *prp1, > + void *prp2, grub_uint16_t count, int write) > +{ > + if (((grub_uint32_t) prp1 & 0x3) || ((grub_uint32_t) prp2 & 0x3)) > + { > + /* Buffer is misaligned */ > + return -1; > + } > + > + struct grub_nvme_sqe *io_read = nvme_get_next_sqe (&ns->ctrl->io_sq, > + write ? > + > GRUB_NVME_SQE_OPC_IO_WRITE > + : > + > GRUB_NVME_SQE_OPC_IO_READ, > + NULL, prp1, prp2); > + io_read->nsid = ns->ns_id; > + io_read->dword[10] = (grub_uint32_t) lba; > + io_read->dword[11] = (grub_uint32_t) (lba >> 32); > + io_read->dword[12] = (1U << 31 /* limited retry */ ) | (count - 1); > + > + nvme_commit_sqe (&ns->ctrl->io_sq); > + > + struct grub_nvme_cqe cqe = nvme_wait (&ns->ctrl->io_sq); > + > + if (!nvme_is_cqe_success (&cqe)) > + { > + grub_dprintf ("nvme", "read io: %08x %08x %08x %08x\n", > + cqe.dword[0], cqe.dword[1], cqe.dword[2], > cqe.dword[3]); > + > + return -1; > + } > + > + grub_dprintf ("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id, > + write ? "write" : "read", lba, count); > + return count; > +} > + > +/* Transfer up to one page of data using the internal dma bounce buffer */ > +static int > +nvme_bounce_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, > + void *buf, grub_uint16_t count, int write) > +{ > + grub_uint16_t const max_blocks = GRUB_NVME_PAGE_SIZE / ns->block_size; > + grub_uint16_t blocks = count < max_blocks ? count : max_blocks; > + > + if (write) > + grub_memcpy (nvme_dma_buffer, buf, blocks * ns->block_size); > + > + int res = nvme_io_xfer (ns, lba, nvme_dma_buffer, NULL, blocks, write); > + > + if (!write && res >= 0) > + grub_memcpy (buf, nvme_dma_buffer, res * ns->block_size); > + > + return res; > +} > + > +#define GRUB_NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */ > + > +/* Transfer data using page list (if applicable) */ > +static int > +nvme_prpl_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, void > *buf, > + grub_uint16_t count, int write) > +{ > + grub_uint32_t base = (long) buf; > + grub_int32_t size; > + > + if (count > ns->max_req_size) > + count = ns->max_req_size; > + > + size = count * ns->block_size; > + /* Special case for transfers that fit into PRP1, but are unaligned */ > + if (((size + (base & ~GRUB_NVME_PAGE_MASK)) <= GRUB_NVME_PAGE_SIZE)) > + goto single; > + > + /* Every request has to be page aligned */ > + if (base & ~GRUB_NVME_PAGE_MASK) > + goto bounce; > + > + /* Make sure a full block fits into the last chunk */ > + if (size & (ns->block_size - 1ULL)) > + goto bounce; > + > + /* Build PRP list if we need to describe more than 2 pages */ > + if ((ns->block_size * count) > (GRUB_NVME_PAGE_SIZE * 2)) > + { > + grub_uint32_t prpl_len = 0; > + grub_uint64_t *prpl = nvme_dma_buffer; > + int first_page = 1; > + for (; size > 0; > + base += GRUB_NVME_PAGE_SIZE, size -= GRUB_NVME_PAGE_SIZE) > + { > + if (first_page) > + { > + /* First page is special */ > + first_page = 0; > + continue; > + } > + if (prpl_len >= GRUB_NVME_MAX_PRPL_ENTRIES) > + goto bounce; > + prpl[prpl_len++] = base; > + } > + return nvme_io_xfer (ns, lba, buf, prpl, count, write); > + } > + > + /* Directly embed the 2nd page if we only need 2 pages */ > + if ((ns->block_size * count) > GRUB_NVME_PAGE_SIZE) > + return nvme_io_xfer (ns, lba, buf, (char *) buf + GRUB_NVME_PAGE_SIZE, > + count, write); > + > +single: > + /* One page is enough, don't expose anything else */ > + return nvme_io_xfer (ns, lba, buf, NULL, count, write); > + > +bounce: > + /* Use bounce buffer to make transfer */ > + return nvme_bounce_xfer (ns, lba, buf, count, write); > +} > + > +static int > +nvme_create_io_queues (struct grub_nvme_ctrl *ctrl) > +{ > + if (nvme_create_io_cq (ctrl, &ctrl->io_cq, 3)) > + goto err; > + > + if (nvme_create_io_sq (ctrl, &ctrl->io_sq, 2, &ctrl->io_cq)) > + goto err_free_cq; > + > + return 0; > + > +err_free_cq: > + nvme_destroy_cq (&ctrl->io_cq); > +err: > + return -1; > +} > + > +/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */ > +static int > +nvme_wait_csts_rdy (struct grub_nvme_ctrl *ctrl, unsigned rdy) > +{ > + const grub_uint64_t limit_ms = > + grub_get_time_ms () + 500 /* ms */ * ((ctrl->reg->cap >> 24) & > 0xFFU); > + grub_uint32_t csts; > + > + while (rdy != ((csts = ctrl->reg->csts) & GRUB_NVME_CSTS_RDY)) > + { > + if (csts & GRUB_NVME_CSTS_FATAL) > + { > + grub_dprintf ("nvme", > + "NVMe fatal error during controller shutdown\n"); > + return -1; > + } > + > + if (limit_ms < grub_get_time_ms ()) > + { > + grub_dprintf ("nvme", "NVMe timeout waiting for CSTS.RDY\n"); > + return -1; > + } > + } > + > + return 0; > +} > + > +static int > +grub_nvme_controller_enable (struct grub_nvme_ctrl *ctrl) > +{ > + grub_pci_address_t addr; > + int rc; > + > + addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND); > + grub_pci_write_word (addr, > + grub_pci_read_word (addr) | > + GRUB_PCI_COMMAND_BUS_MASTER); > + > + /* Turn the controller off. */ > + ctrl->reg->cc = 0; > + if (nvme_wait_csts_rdy (ctrl, 0)) > + { > + grub_dprintf ("nvme", "NVMe fatal error during controller > shutdown\n"); > + return -1; > + } > + > + ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF); > + > + rc = nvme_init_cq (ctrl, &ctrl->admin_cq, 1, > + GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe)); > + if (rc) > + { > + return -1; > + } > + > + rc = nvme_init_sq (ctrl, &ctrl->admin_sq, 0, > + GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_sqe), > + &ctrl->admin_cq); > + if (rc) > + { > + goto err_destroy_admin_cq; > + } > + > + ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16 > + | ctrl->admin_sq.common.mask; > + > + ctrl->reg->asq = (grub_uint32_t) ctrl->admin_sq.sqe; > + ctrl->reg->acq = (grub_uint32_t) ctrl->admin_cq.cqe; > + > + grub_dprintf ("nvme", " admin submission queue: %p\n", > ctrl->admin_sq.sqe); > + grub_dprintf ("nvme", " admin completion queue: %p\n", > ctrl->admin_cq.cqe); > + > + ctrl->reg->cc = GRUB_NVME_CC_EN | (GRUB_NVME_CQE_SIZE_LOG << 20) > + | (GRUB_NVME_SQE_SIZE_LOG << 16 /* IOSQES */ ); > + > + if (nvme_wait_csts_rdy (ctrl, 1)) > + { > + grub_dprintf ("nvme", "NVMe fatal error while enabling > controller\n"); > + goto err_destroy_admin_sq; > + } > + > + /* The admin queue is set up and the controller is ready. Let's figure > out > + what namespaces we have. */ > + > + struct grub_nvme_identify_ctrl *identify = nvme_admin_identify_ctrl > (ctrl); > + > + if (!identify) > + { > + grub_dprintf ("nvme", "NVMe couldn't identify controller.\n"); > + goto err_destroy_admin_sq; > + } > + > + grub_dprintf ("nvme", "NVMe has %u namespace%s.\n", > + identify->nn, (identify->nn == 1) ? "" : "s"); > + > + ctrl->ns_count = identify->nn; > + grub_uint8_t mdts = identify->mdts; > + grub_free (identify); > + > + if ((ctrl->ns_count == 0) || nvme_create_io_queues (ctrl)) > + { > + /* No point to continue, if the controller says it doesn't have > + namespaces or we couldn't create I/O queues. */ > + goto err_destroy_admin_sq; > + } > + > + /* Give the controller a global number */ > + ctrl->ctrlnum = grub_nvme_ctrlcnt++; > + > + /* Populate namespace IDs */ > + for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) > + { > + nvme_probe_ns (ctrl, ns_idx, mdts); > + } > + > + grub_dprintf ("nvme", "NVMe initialization complete!\n"); > + return 0; > + > +err_destroy_admin_sq: > + nvme_destroy_sq (&ctrl->admin_sq); > +err_destroy_admin_cq: > + nvme_destroy_cq (&ctrl->admin_cq); > + return -1; > +} > + > +static int > +grub_nvme_pci_probe (grub_pci_device_t dev, grub_pci_id_t pciid > + __attribute__((unused)), void *data > + __attribute__((unused))) > +{ > + grub_pci_address_t addr; > + grub_uint32_t class, bar, version; > + struct grub_nvme_reg volatile *reg; > + > + class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS)); > + if (class >> 16 != 0x0108) > + return 0; > + if ((class >> 8 & 0xff) != 2) > + { /* as of NVM 1.0e */ > + grub_dprintf ("nvme", "Found incompatble NVMe: prog-if=%02x\n", > + class >> 8 & 0xff); > + return 0; > + } > + > + bar = > + grub_pci_read (grub_pci_make_address (dev, > GRUB_PCI_REG_ADDRESS_REG0)); > + reg = > + grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK, > + sizeof (*reg)); > + > + addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND); > + grub_pci_write_word (addr, > + grub_pci_read_word (addr) | > + GRUB_PCI_COMMAND_MEM_ENABLED); > + > + version = reg->vs; > + grub_dprintf ("nvme", "Found NVMe controller with version %u.%u.%u.\n", > + version >> 16, (version >> 8) & 0xFF, version & 0xFF); > + grub_dprintf ("nvme", " Capabilities %016llx\n", reg->cap); > + > + if (~reg->cap & GRUB_NVME_CAP_CSS_NVME) > + { > + grub_dprintf ("nvme", > + "Controller doesn't speak NVMe command set. > Skipping.\n"); > + goto err; > + } > + > + struct grub_nvme_ctrl *ctrl = grub_malloc (sizeof (*ctrl)); > + if (!ctrl) > + goto err; > + > + grub_memset (ctrl, 0, sizeof (*ctrl)); > + > + ctrl->reg = reg; > + ctrl->pci = dev; > + > + if (grub_nvme_controller_enable (ctrl)) > + goto err_free_ctrl; > + > + return 0; > + > +err_free_ctrl: > + grub_free (ctrl); > +err: > + grub_dprintf ("nvme", "Failed to enable NVMe controller.\n"); > + return 0; > +} > + > +static int > +grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data, > + grub_disk_pull_t pull) > +{ > + struct grub_nvme_namespace *ns; > + > + if (pull != GRUB_DISK_PULL_NONE) > + return 0; > + > + FOR_LIST_ELEMENTS (ns, grub_nvme_namespaces) > + if (hook (ns->devname, hook_data)) > + return 1; > + > + return 0; > +} > + > +static grub_err_t > +grub_nvme_open (const char *name __attribute ((unused)), > + grub_disk_t disk __attribute ((unused))) > +{ > + struct grub_nvme_namespace *ns; > + > + FOR_LIST_ELEMENTS (ns, grub_nvme_namespaces) > + if (grub_strcmp (ns->devname, name) == 0) > + break; > + > + if (!ns) > + return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device"); > + > + disk->total_sectors = ns->lba_count; > + disk->max_agglomerate = ns->max_req_size; > + > + disk->id = ns->nsnum; /* global id of the namespace */ > + > + disk->data = ns; > + > + return 0; > +} > + > +static grub_err_t > +nvme_readwrite (struct grub_nvme_namespace *ns, grub_disk_addr_t sector, > + grub_size_t num_sectors, char *buf, int write) > +{ > + for (grub_size_t i = 0; i < num_sectors;) > + { > + grub_uint16_t blocks_remaining = num_sectors - i; > + char *op_buf = buf + i * ns->block_size; > + int blocks = > + nvme_prpl_xfer (ns, sector + i, op_buf, blocks_remaining, write); > + if (blocks < 0) > + return GRUB_ERR_IO; > + i += blocks; > + } > + return GRUB_ERR_NONE; > +} > + > +static grub_err_t > +grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector, > + grub_size_t num_sectors, char *buf) > +{ > + return nvme_readwrite ((struct grub_nvme_namespace *) disk->data, > sector, > + num_sectors, buf, 0); > +} > + > +static grub_err_t > +grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector, > + grub_size_t num_sectors, const char *buf) > +{ > + return nvme_readwrite ((struct grub_nvme_namespace *) disk->data, > sector, > + num_sectors, (char *) buf, 1); > +} > + > +static struct grub_disk_dev grub_nvme_dev = { > + .name = "nvme", > + .id = GRUB_DISK_DEVICE_NVME_ID, > + .disk_iterate = grub_nvme_iterate, > + .disk_open = grub_nvme_open, > + .disk_read = grub_nvme_read, > + .disk_write = grub_nvme_write, > + .next = 0 > +}; > + > +GRUB_MOD_INIT (nvme) > +{ > + grub_stop_disk_firmware (); > + grub_pci_iterate (grub_nvme_pci_probe, NULL); > + grub_disk_dev_register (&grub_nvme_dev); > +} > + > +GRUB_MOD_FINI (nvme) > +{ > + grub_disk_dev_unregister (&grub_nvme_dev); > +} > diff --git a/include/grub/disk.h b/include/grub/disk.h > index fbf23df7f..186e76f0b 100644 > --- a/include/grub/disk.h > +++ b/include/grub/disk.h > @@ -52,6 +52,7 @@ enum grub_disk_dev_id > GRUB_DISK_DEVICE_UBOOTDISK_ID, > GRUB_DISK_DEVICE_XEN, > GRUB_DISK_DEVICE_OBDISK_ID, > + GRUB_DISK_DEVICE_NVME_ID > }; > > struct grub_disk; > diff --git a/include/grub/nvme.h b/include/grub/nvme.h > new file mode 100644 > index 000000000..5f66aa232 > --- /dev/null > +++ b/include/grub/nvme.h > @@ -0,0 +1,225 @@ > +/* nvme.h - NVMe datastructures and constants */ > +/* > + * Copyright 2017 Amazon.com, Inc. or its affiliates. > + * > + * This file may be distributed under the terms of the GNU LGPLv3 license. > + */ > + > +#ifndef GRUB_NVME_HEADER > +#define GRUB_NVME_HEADER 1 > + > +#include <grub/pci.h> > +#include <grub/types.h> > + > +/* The register file of a NVMe host controller. This struct follows the > naming > + scheme in the NVMe specification. */ > +struct grub_nvme_reg > +{ > + grub_uint64_t cap; /* controller capabilities */ > + grub_uint32_t vs; /* version */ > + grub_uint32_t intms; /* interrupt mask set */ > + grub_uint32_t intmc; /* interrupt mask clear */ > + grub_uint32_t cc; /* controller configuration */ > + grub_uint32_t _res0; > + grub_uint32_t csts; /* controller status */ > + grub_uint32_t _res1; > + grub_uint32_t aqa; /* admin queue attributes */ > + grub_uint64_t asq; /* admin submission queue base address */ > + grub_uint64_t acq; /* admin completion queue base address */ > +}; > + > +/* Submission queue entry */ > +struct grub_nvme_sqe > +{ > + union > + { > + grub_uint32_t dword[16]; > + struct > + { > + grub_uint32_t cdw0; /* Command DWORD 0 */ > + grub_uint32_t nsid; /* Namespace ID */ > + grub_uint64_t _res0; > + grub_uint64_t mptr; /* metadata ptr */ > + > + grub_uint64_t dptr_prp1; > + grub_uint64_t dptr_prp2; > + }; > + }; > +}; > + > +/* Completion queue entry */ > +struct grub_nvme_cqe > +{ > + union > + { > + grub_uint32_t dword[4]; > + struct > + { > + grub_uint32_t cdw0; > + grub_uint32_t _res0; > + grub_uint16_t sq_head; > + grub_uint16_t sq_id; > + grub_uint16_t cid; > + grub_uint16_t status; > + }; > + }; > +}; > + > +/* The common part of every submission or completion queue. */ > +struct grub_nvme_queue > +{ > + grub_uint32_t *dbl; /* doorbell */ > + grub_uint16_t mask; /* length - 1 */ > +}; > + > +struct grub_nvme_cq > +{ > + struct grub_nvme_queue common; > + struct grub_nvme_cqe *cqe; > + > + /* We have read upto (but not including) this entry in the queue. */ > + grub_uint16_t head; > + > + /* The current phase bit the controller uses to indicate that it has > written > + a new entry. This is inverted after each wrap. */ > + unsigned phase:1; > +}; > + > +struct grub_nvme_sq > +{ > + struct grub_nvme_queue common; > + struct grub_nvme_sqe *sqe; > + > + /* Corresponding completion queue. We only support a single SQ per CQ. > */ > + struct grub_nvme_cq *cq; > + > + /* The last entry the controller has fetched. */ > + grub_uint16_t head; > + > + /* The last value we have written to the tail doorbell. */ > + grub_uint16_t tail; > +}; > + > +struct grub_nvme_ctrl > +{ > + grub_pci_device_t pci; > + struct grub_nvme_reg volatile *reg; > + > + grub_uint32_t ctrlnum; > + > + grub_uint32_t doorbell_stride; /* in bytes */ > + > + struct grub_nvme_sq admin_sq; > + struct grub_nvme_cq admin_cq; > + > + grub_uint32_t ns_count; > + > + struct grub_nvme_sq io_sq; > + struct grub_nvme_cq io_cq; > +}; > + > +struct grub_nvme_namespace > +{ > + struct grub_nvme_namespace *next; > + struct grub_nvme_namespace **prev; > + > + char *devname; > + > + grub_uint32_t nsnum; > + > + struct grub_nvme_ctrl *ctrl; > + > + grub_uint32_t ns_id; > + > + grub_uint64_t lba_count; /* The total amount of sectors. */ > + > + grub_uint32_t block_size; > + grub_uint32_t metadata_size; > + grub_uint32_t max_req_size; > +}; > + > +/* Data structures for NVMe admin identify commands */ > + > +struct grub_nvme_identify_ctrl > +{ > + grub_uint16_t vid; > + grub_uint16_t ssvid; > + char sn[20]; > + char mn[40]; > + char fr[8]; > + > + grub_uint8_t rab; > + grub_uint8_t ieee[3]; > + grub_uint8_t cmic; > + grub_uint8_t mdts; > + > + char _boring[516 - 78]; > + > + grub_uint32_t nn; /* number of namespaces */ > +}; > + > +struct grub_nvme_identify_ns_list > +{ > + grub_uint32_t ns_id[1024]; > +}; > + > +struct grub_nvme_lba_format > +{ > + grub_uint16_t ms; > + grub_uint8_t lbads; > + grub_uint8_t rp; > +}; > + > +struct grub_nvme_identify_ns > +{ > + grub_uint64_t nsze; > + grub_uint64_t ncap; > + grub_uint64_t nuse; > + grub_uint8_t nsfeat; > + grub_uint8_t nlbaf; > + grub_uint8_t flbas; > + > + char _boring[128 - 27]; > + > + struct grub_nvme_lba_format lbaf[16]; > +}; > + > +union nvme_identify > +{ > + struct grub_nvme_identify_ns ns; > + struct grub_nvme_identify_ctrl ctrl; > + struct grub_nvme_identify_ns_list ns_list; > +}; > + > +/* NVMe constants */ > + > +#define GRUB_NVME_CAP_CSS_NVME (1ULL << 37) > + > +#define GRUB_NVME_CSTS_FATAL (1U << 1) > +#define GRUB_NVME_CSTS_RDY (1U << 0) > + > +#define GRUB_NVME_CC_EN (1U << 0) > + > +#define GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U > +#define GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U > +#define GRUB_NVME_SQE_OPC_ADMIN_IDENTIFY 6U > + > +#define GRUB_NVME_SQE_OPC_IO_WRITE 1U > +#define GRUB_NVME_SQE_OPC_IO_READ 2U > + > +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_NS 0U > +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_CTRL 1U > +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U > + > +#define GRUB_NVME_CQE_DW3_P (1U << 16) > + > +#define GRUB_NVME_PAGE_SIZE 4096 > +#define GRUB_NVME_PAGE_MASK ~(GRUB_NVME_PAGE_SIZE - 1) > + > +/* Length for the queue entries. */ > +#define GRUB_NVME_SQE_SIZE_LOG 6 > +#define GRUB_NVME_CQE_SIZE_LOG 4 > + > +#endif > + > +/* EOF */ > -- > 2.39.2 > > > _______________________________________________ > Grub-devel mailing list > Grub-devel@gnu.org > https://lists.gnu.org/mailman/listinfo/grub-devel >
_______________________________________________ Grub-devel mailing list Grub-devel@gnu.org https://lists.gnu.org/mailman/listinfo/grub-devel