Thank you for your effort but we already have nvme driver in the review
queue. See my email from 16th of May

Le dim. 13 oct. 2024, 22:38, <k...@mkukri.xyz> a écrit :

> From: Mate Kukri <k...@mkukri.xyz>
>
> It is based on existing SeaBIOS code, so the license is LGPLv3.
>
> Tested as a coreboot paload on the following targets:
> - QEMU Q35 with emulated NVMe disk
> - ASUS H610M-K with Intel 660p NVME disk
>
> Signed-off-by: Mate Kukri <k...@mkukri.xyz>
> ---
>  Makefile.am                     |   2 +-
>  grub-core/Makefile.core.def     |   6 +
>  grub-core/commands/nativedisk.c |   1 +
>  grub-core/disk/nvme.c           | 861 ++++++++++++++++++++++++++++++++
>  include/grub/disk.h             |   1 +
>  include/grub/nvme.h             | 225 +++++++++
>  6 files changed, 1095 insertions(+), 1 deletion(-)
>  create mode 100644 grub-core/disk/nvme.c
>  create mode 100644 include/grub/nvme.h
>
> diff --git a/Makefile.am b/Makefile.am
> index 43635d5ff..2c86dbbf6 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -434,7 +434,7 @@ if COND_i386_coreboot
>  FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst)
>  default_payload.elf: grub-mkstandalone grub-mkimage FORCE
>         test -f $@ && rm $@ || true
> -       pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O
> i386-coreboot -o $@ --modules='ahci pata ehci uhci ohci usb_keyboard usbms
> part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs'
> --install-modules='ls linux search configfile normal cbtime cbls memrw iorw
> minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain
> test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu
> $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts=
> --themes= --locales= -d grub-core/
> /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
> +       pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O
> i386-coreboot -o $@ --modules='ahci pata nvme ehci uhci ohci usb_keyboard
> usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs'
> --install-modules='ls linux search configfile normal cbtime cbls memrw iorw
> minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain
> test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu
> $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts=
> --themes= --locales= -d grub-core/
> /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
>  endif
>
>  endif
> diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def
> index 063ef5dd7..7b4cf972a 100644
> --- a/grub-core/Makefile.core.def
> +++ b/grub-core/Makefile.core.def
> @@ -2606,3 +2606,9 @@ module = {
>    enable = efi;
>    depends = part_gpt;
>  };
> +
> +module = {
> +  name = nvme;
> +  common = disk/nvme.c;
> +  enable = pci;
> +};
> diff --git a/grub-core/commands/nativedisk.c
> b/grub-core/commands/nativedisk.c
> index 580c8d3b0..a2c766fbd 100644
> --- a/grub-core/commands/nativedisk.c
> +++ b/grub-core/commands/nativedisk.c
> @@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative)
>      case GRUB_DISK_DEVICE_ATA_ID:
>      case GRUB_DISK_DEVICE_SCSI_ID:
>      case GRUB_DISK_DEVICE_XEN:
> +    case GRUB_DISK_DEVICE_NVME_ID:
>        if (getnative)
>         break;
>        /* FALLTHROUGH */
> diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c
> new file mode 100644
> index 000000000..b6a886464
> --- /dev/null
> +++ b/grub-core/disk/nvme.c
> @@ -0,0 +1,861 @@
> +/* nvme.c - NVMe support. */
> +/*
> + * Based on SeaBIOS NVMe driver, ported to GRUB2 by Mate Kukri
> + *
> + * Copyright 2017 Amazon.com, Inc. or its affiliates.
> + * Copyright 2024 Mate Kukri <k...@mkukri.xyz>
> + *
> + * This file may be distributed under the terms of the GNU LGPLv3 license.
> + */
> +
> +#include <grub/disk.h>
> +#include <grub/dl.h>
> +#include <grub/nvme.h>
> +#include <grub/pci.h>
> +#include <grub/time.h>
> +
> +GRUB_MOD_LICENSE ("GPLv3");    /* LGPLv3 is GPLv3 compatible */
> +
> +static grub_uint32_t grub_nvme_ctrlcnt;
> +static grub_uint32_t grub_nvme_nscnt;
> +
> +static struct grub_nvme_namespace *grub_nvme_namespaces;
> +
> +/* Page aligned "dma bounce buffer" of size GRUB_NVME_PAGE_SIZE */
> +static void *nvme_dma_buffer;
> +
> +static void *
> +zalloc_page_aligned (grub_uint32_t size)
> +{
> +  void *res = grub_memalign (GRUB_NVME_PAGE_SIZE, size);
> +  if (res)
> +    grub_memset (res, 0, size);
> +  return res;
> +}
> +
> +static void
> +nvme_init_queue_common (struct grub_nvme_ctrl *ctrl,
> +                       struct grub_nvme_queue *q, grub_uint16_t q_idx,
> +                       grub_uint16_t length)
> +{
> +  grub_memset (q, 0, sizeof (*q));
> +  q->dbl =
> +    (grub_uint32_t *) ((char *) ctrl->reg + 0x1000 +
> +                      q_idx * ctrl->doorbell_stride);
> +  grub_dprintf ("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
> +  q->mask = length - 1;
> +}
> +
> +static int
> +nvme_init_sq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_sq *sq,
> +             grub_uint16_t q_idx, grub_uint16_t length,
> +             struct grub_nvme_cq *cq)
> +{
> +  nvme_init_queue_common (ctrl, &sq->common, q_idx, length);
> +  sq->sqe = zalloc_page_aligned (sizeof (*sq->sqe) * length);
> +
> +  if (!sq->sqe)
> +    {
> +      return -1;
> +    }
> +
> +  grub_dprintf ("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
> +  sq->cq = cq;
> +  sq->head = 0;
> +  sq->tail = 0;
> +
> +  return 0;
> +}
> +
> +static int
> +nvme_init_cq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_cq *cq,
> +             grub_uint16_t q_idx, grub_uint16_t length)
> +{
> +  nvme_init_queue_common (ctrl, &cq->common, q_idx, length);
> +  cq->cqe = zalloc_page_aligned (sizeof (*cq->cqe) * length);
> +  if (!cq->cqe)
> +    {
> +      return -1;
> +    }
> +
> +  cq->head = 0;
> +
> +  /* All CQE phase bits are initialized to zero. This means initially we
> wait
> +     for the host controller to set these to 1. */
> +  cq->phase = 1;
> +
> +  return 0;
> +}
> +
> +static int
> +nvme_poll_cq (struct grub_nvme_cq *cq)
> +{
> +  grub_uint32_t dw3 = *(volatile grub_uint32_t *)
> &cq->cqe[cq->head].dword[3];
> +  return (!!(dw3 & GRUB_NVME_CQE_DW3_P) == cq->phase);
> +}
> +
> +static int
> +nvme_is_cqe_success (const struct grub_nvme_cqe *cqe)
> +{
> +  return ((cqe->status >> 1) & 0xFF) == 0;
> +}
> +
> +static struct grub_nvme_cqe
> +nvme_error_cqe (void)
> +{
> +  struct grub_nvme_cqe r;
> +
> +  /* 0xFF is a vendor specific status code != success. Should be okay for
> +     indicating failure. */
> +  grub_memset (&r, 0xFF, sizeof (r));
> +  return r;
> +}
> +
> +static struct grub_nvme_cqe
> +nvme_consume_cqe (struct grub_nvme_sq *sq)
> +{
> +  struct grub_nvme_cq *cq = sq->cq;
> +
> +  if (!nvme_poll_cq (cq))
> +    {
> +      /* Cannot consume a completion queue entry, if there is none ready.
> */
> +      return nvme_error_cqe ();
> +    }
> +
> +  struct grub_nvme_cqe *cqe = &cq->cqe[cq->head];
> +  grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask;
> +  grub_dprintf ("nvme", "cq %p head %u -> %u\n", cq, cq->head,
> cq_next_head);
> +  if (cq_next_head < cq->head)
> +    {
> +      grub_dprintf ("nvme", "cq %p wrap\n", cq);
> +      cq->phase = ~cq->phase;
> +    }
> +  cq->head = cq_next_head;
> +
> +  /* Update the submission queue head. */
> +  if (cqe->sq_head != sq->head)
> +    {
> +      sq->head = cqe->sq_head;
> +      grub_dprintf ("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head);
> +    }
> +
> +  /* Tell the controller that we consumed the completion. */
> +  *(volatile grub_uint32_t *) cq->common.dbl = cq->head;
> +
> +  return *cqe;
> +}
> +
> +static struct grub_nvme_cqe
> +nvme_wait (struct grub_nvme_sq *sq)
> +{
> +  const grub_uint64_t limit_ms = grub_get_time_ms () + 5000;
> +
> +  while (!nvme_poll_cq (sq->cq))
> +    {
> +      if (limit_ms < grub_get_time_ms ())
> +       {
> +         grub_dprintf ("nvme", "NVME SQ wait timeout\n");
> +         return nvme_error_cqe ();
> +       }
> +    }
> +
> +  return nvme_consume_cqe (sq);
> +}
> +
> +/* Returns the next submission queue entry (or NULL if the queue is
> full). It
> +   also fills out Command Dword 0 and clears the rest. */
> +static struct grub_nvme_sqe *
> +nvme_get_next_sqe (struct grub_nvme_sq *sq, grub_uint8_t opc, void
> *metadata,
> +                  void *data, void *data2)
> +{
> +  if (((sq->head + 1) & sq->common.mask) == sq->tail)
> +    {
> +      grub_dprintf ("nvme", "submission queue is full\n");
> +      return NULL;
> +    }
> +
> +  struct grub_nvme_sqe *sqe = &sq->sqe[sq->tail];
> +  grub_dprintf ("nvme", "sq %p next_sqe %u\n", sq, sq->tail);
> +
> +  grub_memset (sqe, 0, sizeof (*sqe));
> +  sqe->cdw0 = opc | (sq->tail << 16 /* CID */ );
> +  sqe->mptr = (grub_uint32_t) metadata;
> +  sqe->dptr_prp1 = (grub_uint32_t) data;
> +  sqe->dptr_prp2 = (grub_uint32_t) data2;
> +
> +  return sqe;
> +}
> +
> +/* Call this after you've filled out an sqe that you've got from
> nvme_get_next_sqe. */
> +static void
> +nvme_commit_sqe (struct grub_nvme_sq *sq)
> +{
> +  grub_dprintf ("nvme", "sq %p commit_sqe %u\n", sq, sq->tail);
> +  sq->tail = (sq->tail + 1) & sq->common.mask;
> +  *(volatile grub_uint32_t *) sq->common.dbl = sq->tail;
> +}
> +
> +/* Perform an identify command on the admin queue and return the resulting
> +   buffer. This may be a NULL pointer, if something failed. This function
> +   cannot be used after initialization, because it uses buffers in tmp
> zone. */
> +static union nvme_identify *
> +nvme_admin_identify (struct grub_nvme_ctrl *ctrl, grub_uint8_t cns,
> +                    grub_uint32_t nsid)
> +{
> +  union nvme_identify *identify_buf = zalloc_page_aligned (4096);
> +  if (!identify_buf)
> +    return NULL;
> +
> +  struct grub_nvme_sqe *cmd_identify;
> +  cmd_identify = nvme_get_next_sqe (&ctrl->admin_sq,
> +                                   GRUB_NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
> +                                   identify_buf, NULL);
> +  if (!cmd_identify)
> +    goto error;
> +
> +  cmd_identify->nsid = nsid;
> +  cmd_identify->dword[10] = cns;
> +
> +  nvme_commit_sqe (&ctrl->admin_sq);
> +
> +  struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq);
> +
> +  if (!nvme_is_cqe_success (&cqe))
> +    {
> +      goto error;
> +    }
> +
> +  return identify_buf;
> +error:
> +  grub_free (identify_buf);
> +  return NULL;
> +}
> +
> +static struct grub_nvme_identify_ctrl *
> +nvme_admin_identify_ctrl (struct grub_nvme_ctrl *ctrl)
> +{
> +  return &nvme_admin_identify (ctrl, GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_CTRL,
> +                              0)->ctrl;
> +}
> +
> +static struct grub_nvme_identify_ns *
> +nvme_admin_identify_ns (struct grub_nvme_ctrl *ctrl, grub_uint32_t ns_id)
> +{
> +  return &nvme_admin_identify (ctrl, GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_NS,
> +                              ns_id)->ns;
> +}
> +
> +static void
> +nvme_probe_ns (struct grub_nvme_ctrl *ctrl, grub_uint32_t ns_idx,
> +              grub_uint8_t mdts)
> +{
> +  grub_uint32_t ns_id = ns_idx + 1;
> +
> +  struct grub_nvme_identify_ns *id = nvme_admin_identify_ns (ctrl, ns_id);
> +  if (!id)
> +    {
> +      grub_dprintf ("nvme", "NVMe couldn't identify namespace %u.\n",
> ns_id);
> +      goto free_buffer;
> +    }
> +
> +  grub_uint8_t current_lba_format = id->flbas & 0xF;
> +  if (current_lba_format > id->nlbaf)
> +    {
> +      grub_dprintf ("nvme",
> +                   "NVMe NS %u: current LBA format %u is beyond what the "
> +                   " namespace supports (%u)?\n", ns_id,
> current_lba_format,
> +                   id->nlbaf + 1);
> +      goto free_buffer;
> +    }
> +
> +  if (!id->nsze)
> +    {
> +      grub_dprintf ("nvme", "NVMe NS %u is inactive.\n", ns_id);
> +      goto free_buffer;
> +    }
> +
> +  if (!nvme_dma_buffer)
> +    {
> +      nvme_dma_buffer = zalloc_page_aligned (GRUB_NVME_PAGE_SIZE);
> +      if (!nvme_dma_buffer)
> +       {
> +         goto free_buffer;
> +       }
> +    }
> +
> +  struct grub_nvme_namespace *ns = grub_malloc (sizeof (*ns));
> +  if (!ns)
> +    {
> +      goto free_buffer;
> +    }
> +  grub_memset (ns, 0, sizeof (*ns));
> +  ns->ctrl = ctrl;
> +  ns->ns_id = ns_id;
> +  ns->lba_count = id->nsze;
> +
> +  struct grub_nvme_lba_format *fmt = &id->lbaf[current_lba_format];
> +
> +  ns->block_size = 1U << fmt->lbads;
> +  ns->metadata_size = fmt->ms;
> +
> +  if (ns->block_size > GRUB_NVME_PAGE_SIZE)
> +    {
> +      /* If we see devices that trigger this path, we need to increase our
> +         buffer size. */
> +      grub_free (ns);
> +      goto free_buffer;
> +    }
> +
> +  if (mdts)
> +    {
> +      ns->max_req_size =
> +       ((1U << mdts) * GRUB_NVME_PAGE_SIZE) / ns->block_size;
> +      grub_dprintf ("nvme", "NVME NS %u max request size: %d sectors\n",
> +                   ns_id, ns->max_req_size);
> +    }
> +  else
> +    {
> +      ns->max_req_size = -1U;
> +    }
> +
> +  ns->devname = grub_xasprintf ("nvme%un%u", ctrl->ctrlnum, ns_id);
> +  ns->nsnum = grub_nvme_nscnt++;
> +
> +  grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST
> (ns));
> +
> +free_buffer:
> +  grub_free (id);
> +}
> +
> +
> +/* Release memory allocated for a completion queue */
> +static void
> +nvme_destroy_cq (struct grub_nvme_cq *cq)
> +{
> +  grub_free (cq->cqe);
> +  cq->cqe = NULL;
> +}
> +
> +/* Release memory allocated for a submission queue */
> +static void
> +nvme_destroy_sq (struct grub_nvme_sq *sq)
> +{
> +  grub_free (sq->sqe);
> +  sq->sqe = NULL;
> +}
> +
> +/* Returns 0 on success. */
> +static int
> +nvme_create_io_cq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_cq *cq,
> +                  grub_uint16_t q_idx)
> +{
> +  int rc;
> +  struct grub_nvme_sqe *cmd_create_cq;
> +  grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
> +  if (length > GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe))
> +    length = GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe);
> +
> +  rc = nvme_init_cq (ctrl, cq, q_idx, length);
> +  if (rc)
> +    {
> +      goto err;
> +    }
> +
> +  cmd_create_cq = nvme_get_next_sqe (&ctrl->admin_sq,
> +                                    GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_CQ,
> +                                    NULL, cq->cqe, NULL);
> +  if (!cmd_create_cq)
> +    {
> +      goto err_destroy_cq;
> +    }
> +
> +  cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
> +  cmd_create_cq->dword[11] = 1 /* physically contiguous */ ;
> +
> +  nvme_commit_sqe (&ctrl->admin_sq);
> +
> +  struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq);
> +
> +  if (!nvme_is_cqe_success (&cqe))
> +    {
> +      grub_dprintf ("nvme", "create io cq failed: %08x %08x %08x %08x\n",
> +                   cqe.dword[0], cqe.dword[1], cqe.dword[2],
> cqe.dword[3]);
> +
> +      goto err_destroy_cq;
> +    }
> +
> +  return 0;
> +
> +err_destroy_cq:
> +  nvme_destroy_cq (cq);
> +err:
> +  return -1;
> +}
> +
> +/* Returns 0 on success. */
> +static int
> +nvme_create_io_sq (struct grub_nvme_ctrl *ctrl, struct grub_nvme_sq *sq,
> +                  grub_uint16_t q_idx, struct grub_nvme_cq *cq)
> +{
> +  int rc;
> +  struct grub_nvme_sqe *cmd_create_sq;
> +  grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
> +  if (length > GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe))
> +    length = GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe);
> +
> +  rc = nvme_init_sq (ctrl, sq, q_idx, length, cq);
> +  if (rc)
> +    {
> +      goto err;
> +    }
> +
> +  cmd_create_sq = nvme_get_next_sqe (&ctrl->admin_sq,
> +                                    GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_SQ,
> +                                    NULL, sq->sqe, NULL);
> +  if (!cmd_create_sq)
> +    {
> +      goto err_destroy_sq;
> +    }
> +
> +  cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
> +  cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */ ;
> +  grub_dprintf ("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq,
> +               cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
> +
> +  nvme_commit_sqe (&ctrl->admin_sq);
> +
> +  struct grub_nvme_cqe cqe = nvme_wait (&ctrl->admin_sq);
> +
> +  if (!nvme_is_cqe_success (&cqe))
> +    {
> +      grub_dprintf ("nvme", "create io sq failed: %08x %08x %08x %08x\n",
> +                   cqe.dword[0], cqe.dword[1], cqe.dword[2],
> cqe.dword[3]);
> +      goto err_destroy_sq;
> +    }
> +
> +  return 0;
> +
> +err_destroy_sq:
> +  nvme_destroy_sq (sq);
> +err:
> +  return -1;
> +}
> +
> +/* Reads count sectors into buf. The buffer cannot cross page boundaries.
> */
> +static int
> +nvme_io_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, void
> *prp1,
> +             void *prp2, grub_uint16_t count, int write)
> +{
> +  if (((grub_uint32_t) prp1 & 0x3) || ((grub_uint32_t) prp2 & 0x3))
> +    {
> +      /* Buffer is misaligned */
> +      return -1;
> +    }
> +
> +  struct grub_nvme_sqe *io_read = nvme_get_next_sqe (&ns->ctrl->io_sq,
> +                                                    write ?
> +
> GRUB_NVME_SQE_OPC_IO_WRITE
> +                                                    :
> +
> GRUB_NVME_SQE_OPC_IO_READ,
> +                                                    NULL, prp1, prp2);
> +  io_read->nsid = ns->ns_id;
> +  io_read->dword[10] = (grub_uint32_t) lba;
> +  io_read->dword[11] = (grub_uint32_t) (lba >> 32);
> +  io_read->dword[12] = (1U << 31 /* limited retry */ ) | (count - 1);
> +
> +  nvme_commit_sqe (&ns->ctrl->io_sq);
> +
> +  struct grub_nvme_cqe cqe = nvme_wait (&ns->ctrl->io_sq);
> +
> +  if (!nvme_is_cqe_success (&cqe))
> +    {
> +      grub_dprintf ("nvme", "read io: %08x %08x %08x %08x\n",
> +                   cqe.dword[0], cqe.dword[1], cqe.dword[2],
> cqe.dword[3]);
> +
> +      return -1;
> +    }
> +
> +  grub_dprintf ("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id,
> +               write ? "write" : "read", lba, count);
> +  return count;
> +}
> +
> +/* Transfer up to one page of data using the internal dma bounce buffer */
> +static int
> +nvme_bounce_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba,
> +                 void *buf, grub_uint16_t count, int write)
> +{
> +  grub_uint16_t const max_blocks = GRUB_NVME_PAGE_SIZE / ns->block_size;
> +  grub_uint16_t blocks = count < max_blocks ? count : max_blocks;
> +
> +  if (write)
> +    grub_memcpy (nvme_dma_buffer, buf, blocks * ns->block_size);
> +
> +  int res = nvme_io_xfer (ns, lba, nvme_dma_buffer, NULL, blocks, write);
> +
> +  if (!write && res >= 0)
> +    grub_memcpy (buf, nvme_dma_buffer, res * ns->block_size);
> +
> +  return res;
> +}
> +
> +#define GRUB_NVME_MAX_PRPL_ENTRIES 15  /* Allows requests up to 64kb */
> +
> +/* Transfer data using page list (if applicable) */
> +static int
> +nvme_prpl_xfer (struct grub_nvme_namespace *ns, grub_uint64_t lba, void
> *buf,
> +               grub_uint16_t count, int write)
> +{
> +  grub_uint32_t base = (long) buf;
> +  grub_int32_t size;
> +
> +  if (count > ns->max_req_size)
> +    count = ns->max_req_size;
> +
> +  size = count * ns->block_size;
> +  /* Special case for transfers that fit into PRP1, but are unaligned */
> +  if (((size + (base & ~GRUB_NVME_PAGE_MASK)) <= GRUB_NVME_PAGE_SIZE))
> +    goto single;
> +
> +  /* Every request has to be page aligned */
> +  if (base & ~GRUB_NVME_PAGE_MASK)
> +    goto bounce;
> +
> +  /* Make sure a full block fits into the last chunk */
> +  if (size & (ns->block_size - 1ULL))
> +    goto bounce;
> +
> +  /* Build PRP list if we need to describe more than 2 pages */
> +  if ((ns->block_size * count) > (GRUB_NVME_PAGE_SIZE * 2))
> +    {
> +      grub_uint32_t prpl_len = 0;
> +      grub_uint64_t *prpl = nvme_dma_buffer;
> +      int first_page = 1;
> +      for (; size > 0;
> +          base += GRUB_NVME_PAGE_SIZE, size -= GRUB_NVME_PAGE_SIZE)
> +       {
> +         if (first_page)
> +           {
> +             /* First page is special */
> +             first_page = 0;
> +             continue;
> +           }
> +         if (prpl_len >= GRUB_NVME_MAX_PRPL_ENTRIES)
> +           goto bounce;
> +         prpl[prpl_len++] = base;
> +       }
> +      return nvme_io_xfer (ns, lba, buf, prpl, count, write);
> +    }
> +
> +  /* Directly embed the 2nd page if we only need 2 pages */
> +  if ((ns->block_size * count) > GRUB_NVME_PAGE_SIZE)
> +    return nvme_io_xfer (ns, lba, buf, (char *) buf + GRUB_NVME_PAGE_SIZE,
> +                        count, write);
> +
> +single:
> +  /* One page is enough, don't expose anything else */
> +  return nvme_io_xfer (ns, lba, buf, NULL, count, write);
> +
> +bounce:
> +  /* Use bounce buffer to make transfer */
> +  return nvme_bounce_xfer (ns, lba, buf, count, write);
> +}
> +
> +static int
> +nvme_create_io_queues (struct grub_nvme_ctrl *ctrl)
> +{
> +  if (nvme_create_io_cq (ctrl, &ctrl->io_cq, 3))
> +    goto err;
> +
> +  if (nvme_create_io_sq (ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
> +    goto err_free_cq;
> +
> +  return 0;
> +
> +err_free_cq:
> +  nvme_destroy_cq (&ctrl->io_cq);
> +err:
> +  return -1;
> +}
> +
> +/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
> +static int
> +nvme_wait_csts_rdy (struct grub_nvme_ctrl *ctrl, unsigned rdy)
> +{
> +  const grub_uint64_t limit_ms =
> +    grub_get_time_ms () + 500 /* ms */  * ((ctrl->reg->cap >> 24) &
> 0xFFU);
> +  grub_uint32_t csts;
> +
> +  while (rdy != ((csts = ctrl->reg->csts) & GRUB_NVME_CSTS_RDY))
> +    {
> +      if (csts & GRUB_NVME_CSTS_FATAL)
> +       {
> +         grub_dprintf ("nvme",
> +                       "NVMe fatal error during controller shutdown\n");
> +         return -1;
> +       }
> +
> +      if (limit_ms < grub_get_time_ms ())
> +       {
> +         grub_dprintf ("nvme", "NVMe timeout waiting for CSTS.RDY\n");
> +         return -1;
> +       }
> +    }
> +
> +  return 0;
> +}
> +
> +static int
> +grub_nvme_controller_enable (struct grub_nvme_ctrl *ctrl)
> +{
> +  grub_pci_address_t addr;
> +  int rc;
> +
> +  addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND);
> +  grub_pci_write_word (addr,
> +                      grub_pci_read_word (addr) |
> +                      GRUB_PCI_COMMAND_BUS_MASTER);
> +
> +  /* Turn the controller off. */
> +  ctrl->reg->cc = 0;
> +  if (nvme_wait_csts_rdy (ctrl, 0))
> +    {
> +      grub_dprintf ("nvme", "NVMe fatal error during controller
> shutdown\n");
> +      return -1;
> +    }
> +
> +  ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
> +
> +  rc = nvme_init_cq (ctrl, &ctrl->admin_cq, 1,
> +                    GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_cqe));
> +  if (rc)
> +    {
> +      return -1;
> +    }
> +
> +  rc = nvme_init_sq (ctrl, &ctrl->admin_sq, 0,
> +                    GRUB_NVME_PAGE_SIZE / sizeof (struct grub_nvme_sqe),
> +                    &ctrl->admin_cq);
> +  if (rc)
> +    {
> +      goto err_destroy_admin_cq;
> +    }
> +
> +  ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
> +    | ctrl->admin_sq.common.mask;
> +
> +  ctrl->reg->asq = (grub_uint32_t) ctrl->admin_sq.sqe;
> +  ctrl->reg->acq = (grub_uint32_t) ctrl->admin_cq.cqe;
> +
> +  grub_dprintf ("nvme", "  admin submission queue: %p\n",
> ctrl->admin_sq.sqe);
> +  grub_dprintf ("nvme", "  admin completion queue: %p\n",
> ctrl->admin_cq.cqe);
> +
> +  ctrl->reg->cc = GRUB_NVME_CC_EN | (GRUB_NVME_CQE_SIZE_LOG << 20)
> +    | (GRUB_NVME_SQE_SIZE_LOG << 16 /* IOSQES */ );
> +
> +  if (nvme_wait_csts_rdy (ctrl, 1))
> +    {
> +      grub_dprintf ("nvme", "NVMe fatal error while enabling
> controller\n");
> +      goto err_destroy_admin_sq;
> +    }
> +
> +  /* The admin queue is set up and the controller is ready. Let's figure
> out
> +     what namespaces we have. */
> +
> +  struct grub_nvme_identify_ctrl *identify = nvme_admin_identify_ctrl
> (ctrl);
> +
> +  if (!identify)
> +    {
> +      grub_dprintf ("nvme", "NVMe couldn't identify controller.\n");
> +      goto err_destroy_admin_sq;
> +    }
> +
> +  grub_dprintf ("nvme", "NVMe has %u namespace%s.\n",
> +               identify->nn, (identify->nn == 1) ? "" : "s");
> +
> +  ctrl->ns_count = identify->nn;
> +  grub_uint8_t mdts = identify->mdts;
> +  grub_free (identify);
> +
> +  if ((ctrl->ns_count == 0) || nvme_create_io_queues (ctrl))
> +    {
> +      /* No point to continue, if the controller says it doesn't have
> +         namespaces or we couldn't create I/O queues. */
> +      goto err_destroy_admin_sq;
> +    }
> +
> +  /* Give the controller a global number */
> +  ctrl->ctrlnum = grub_nvme_ctrlcnt++;
> +
> +  /* Populate namespace IDs */
> +  for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++)
> +    {
> +      nvme_probe_ns (ctrl, ns_idx, mdts);
> +    }
> +
> +  grub_dprintf ("nvme", "NVMe initialization complete!\n");
> +  return 0;
> +
> +err_destroy_admin_sq:
> +  nvme_destroy_sq (&ctrl->admin_sq);
> +err_destroy_admin_cq:
> +  nvme_destroy_cq (&ctrl->admin_cq);
> +  return -1;
> +}
> +
> +static int
> +grub_nvme_pci_probe (grub_pci_device_t dev, grub_pci_id_t pciid
> +                    __attribute__((unused)), void *data
> +                    __attribute__((unused)))
> +{
> +  grub_pci_address_t addr;
> +  grub_uint32_t class, bar, version;
> +  struct grub_nvme_reg volatile *reg;
> +
> +  class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS));
> +  if (class >> 16 != 0x0108)
> +    return 0;
> +  if ((class >> 8 & 0xff) != 2)
> +    {                          /* as of NVM 1.0e */
> +      grub_dprintf ("nvme", "Found incompatble NVMe: prog-if=%02x\n",
> +                   class >> 8 & 0xff);
> +      return 0;
> +    }
> +
> +  bar =
> +    grub_pci_read (grub_pci_make_address (dev,
> GRUB_PCI_REG_ADDRESS_REG0));
> +  reg =
> +    grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK,
> +                              sizeof (*reg));
> +
> +  addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND);
> +  grub_pci_write_word (addr,
> +                      grub_pci_read_word (addr) |
> +                      GRUB_PCI_COMMAND_MEM_ENABLED);
> +
> +  version = reg->vs;
> +  grub_dprintf ("nvme", "Found NVMe controller with version %u.%u.%u.\n",
> +               version >> 16, (version >> 8) & 0xFF, version & 0xFF);
> +  grub_dprintf ("nvme", "  Capabilities %016llx\n", reg->cap);
> +
> +  if (~reg->cap & GRUB_NVME_CAP_CSS_NVME)
> +    {
> +      grub_dprintf ("nvme",
> +                   "Controller doesn't speak NVMe command set.
> Skipping.\n");
> +      goto err;
> +    }
> +
> +  struct grub_nvme_ctrl *ctrl = grub_malloc (sizeof (*ctrl));
> +  if (!ctrl)
> +    goto err;
> +
> +  grub_memset (ctrl, 0, sizeof (*ctrl));
> +
> +  ctrl->reg = reg;
> +  ctrl->pci = dev;
> +
> +  if (grub_nvme_controller_enable (ctrl))
> +    goto err_free_ctrl;
> +
> +  return 0;
> +
> +err_free_ctrl:
> +  grub_free (ctrl);
> +err:
> +  grub_dprintf ("nvme", "Failed to enable NVMe controller.\n");
> +  return 0;
> +}
> +
> +static int
> +grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data,
> +                  grub_disk_pull_t pull)
> +{
> +  struct grub_nvme_namespace *ns;
> +
> +  if (pull != GRUB_DISK_PULL_NONE)
> +    return 0;
> +
> +  FOR_LIST_ELEMENTS (ns, grub_nvme_namespaces)
> +    if (hook (ns->devname, hook_data))
> +    return 1;
> +
> +  return 0;
> +}
> +
> +static grub_err_t
> +grub_nvme_open (const char *name __attribute ((unused)),
> +               grub_disk_t disk __attribute ((unused)))
> +{
> +  struct grub_nvme_namespace *ns;
> +
> +  FOR_LIST_ELEMENTS (ns, grub_nvme_namespaces)
> +    if (grub_strcmp (ns->devname, name) == 0)
> +    break;
> +
> +  if (!ns)
> +    return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device");
> +
> +  disk->total_sectors = ns->lba_count;
> +  disk->max_agglomerate = ns->max_req_size;
> +
> +  disk->id = ns->nsnum;                /* global id of the namespace */
> +
> +  disk->data = ns;
> +
> +  return 0;
> +}
> +
> +static grub_err_t
> +nvme_readwrite (struct grub_nvme_namespace *ns, grub_disk_addr_t sector,
> +               grub_size_t num_sectors, char *buf, int write)
> +{
> +  for (grub_size_t i = 0; i < num_sectors;)
> +    {
> +      grub_uint16_t blocks_remaining = num_sectors - i;
> +      char *op_buf = buf + i * ns->block_size;
> +      int blocks =
> +       nvme_prpl_xfer (ns, sector + i, op_buf, blocks_remaining, write);
> +      if (blocks < 0)
> +       return GRUB_ERR_IO;
> +      i += blocks;
> +    }
> +  return GRUB_ERR_NONE;
> +}
> +
> +static grub_err_t
> +grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector,
> +               grub_size_t num_sectors, char *buf)
> +{
> +  return nvme_readwrite ((struct grub_nvme_namespace *) disk->data,
> sector,
> +                        num_sectors, buf, 0);
> +}
> +
> +static grub_err_t
> +grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector,
> +                grub_size_t num_sectors, const char *buf)
> +{
> +  return nvme_readwrite ((struct grub_nvme_namespace *) disk->data,
> sector,
> +                        num_sectors, (char *) buf, 1);
> +}
> +
> +static struct grub_disk_dev grub_nvme_dev = {
> +  .name = "nvme",
> +  .id = GRUB_DISK_DEVICE_NVME_ID,
> +  .disk_iterate = grub_nvme_iterate,
> +  .disk_open = grub_nvme_open,
> +  .disk_read = grub_nvme_read,
> +  .disk_write = grub_nvme_write,
> +  .next = 0
> +};
> +
> +GRUB_MOD_INIT (nvme)
> +{
> +  grub_stop_disk_firmware ();
> +  grub_pci_iterate (grub_nvme_pci_probe, NULL);
> +  grub_disk_dev_register (&grub_nvme_dev);
> +}
> +
> +GRUB_MOD_FINI (nvme)
> +{
> +  grub_disk_dev_unregister (&grub_nvme_dev);
> +}
> diff --git a/include/grub/disk.h b/include/grub/disk.h
> index fbf23df7f..186e76f0b 100644
> --- a/include/grub/disk.h
> +++ b/include/grub/disk.h
> @@ -52,6 +52,7 @@ enum grub_disk_dev_id
>      GRUB_DISK_DEVICE_UBOOTDISK_ID,
>      GRUB_DISK_DEVICE_XEN,
>      GRUB_DISK_DEVICE_OBDISK_ID,
> +    GRUB_DISK_DEVICE_NVME_ID
>    };
>
>  struct grub_disk;
> diff --git a/include/grub/nvme.h b/include/grub/nvme.h
> new file mode 100644
> index 000000000..5f66aa232
> --- /dev/null
> +++ b/include/grub/nvme.h
> @@ -0,0 +1,225 @@
> +/* nvme.h - NVMe datastructures and constants */
> +/*
> + * Copyright 2017 Amazon.com, Inc. or its affiliates.
> + *
> + * This file may be distributed under the terms of the GNU LGPLv3 license.
> + */
> +
> +#ifndef GRUB_NVME_HEADER
> +#define GRUB_NVME_HEADER  1
> +
> +#include <grub/pci.h>
> +#include <grub/types.h>
> +
> +/* The register file of a NVMe host controller. This struct follows the
> naming
> +   scheme in the NVMe specification. */
> +struct grub_nvme_reg
> +{
> +  grub_uint64_t cap;           /* controller capabilities */
> +  grub_uint32_t vs;            /* version */
> +  grub_uint32_t intms;         /* interrupt mask set */
> +  grub_uint32_t intmc;         /* interrupt mask clear */
> +  grub_uint32_t cc;            /* controller configuration */
> +  grub_uint32_t _res0;
> +  grub_uint32_t csts;          /* controller status */
> +  grub_uint32_t _res1;
> +  grub_uint32_t aqa;           /* admin queue attributes */
> +  grub_uint64_t asq;           /* admin submission queue base address */
> +  grub_uint64_t acq;           /* admin completion queue base address */
> +};
> +
> +/* Submission queue entry */
> +struct grub_nvme_sqe
> +{
> +  union
> +  {
> +    grub_uint32_t dword[16];
> +    struct
> +    {
> +      grub_uint32_t cdw0;      /* Command DWORD 0 */
> +      grub_uint32_t nsid;      /* Namespace ID */
> +      grub_uint64_t _res0;
> +      grub_uint64_t mptr;      /* metadata ptr */
> +
> +      grub_uint64_t dptr_prp1;
> +      grub_uint64_t dptr_prp2;
> +    };
> +  };
> +};
> +
> +/* Completion queue entry */
> +struct grub_nvme_cqe
> +{
> +  union
> +  {
> +    grub_uint32_t dword[4];
> +    struct
> +    {
> +      grub_uint32_t cdw0;
> +      grub_uint32_t _res0;
> +      grub_uint16_t sq_head;
> +      grub_uint16_t sq_id;
> +      grub_uint16_t cid;
> +      grub_uint16_t status;
> +    };
> +  };
> +};
> +
> +/* The common part of every submission or completion queue. */
> +struct grub_nvme_queue
> +{
> +  grub_uint32_t *dbl;          /* doorbell */
> +  grub_uint16_t mask;          /* length - 1 */
> +};
> +
> +struct grub_nvme_cq
> +{
> +  struct grub_nvme_queue common;
> +  struct grub_nvme_cqe *cqe;
> +
> +  /* We have read upto (but not including) this entry in the queue. */
> +  grub_uint16_t head;
> +
> +  /* The current phase bit the controller uses to indicate that it has
> written
> +     a new entry. This is inverted after each wrap. */
> +  unsigned phase:1;
> +};
> +
> +struct grub_nvme_sq
> +{
> +  struct grub_nvme_queue common;
> +  struct grub_nvme_sqe *sqe;
> +
> +  /* Corresponding completion queue. We only support a single SQ per CQ.
> */
> +  struct grub_nvme_cq *cq;
> +
> +  /* The last entry the controller has fetched. */
> +  grub_uint16_t head;
> +
> +  /* The last value we have written to the tail doorbell. */
> +  grub_uint16_t tail;
> +};
> +
> +struct grub_nvme_ctrl
> +{
> +  grub_pci_device_t pci;
> +  struct grub_nvme_reg volatile *reg;
> +
> +  grub_uint32_t ctrlnum;
> +
> +  grub_uint32_t doorbell_stride;       /* in bytes */
> +
> +  struct grub_nvme_sq admin_sq;
> +  struct grub_nvme_cq admin_cq;
> +
> +  grub_uint32_t ns_count;
> +
> +  struct grub_nvme_sq io_sq;
> +  struct grub_nvme_cq io_cq;
> +};
> +
> +struct grub_nvme_namespace
> +{
> +  struct grub_nvme_namespace *next;
> +  struct grub_nvme_namespace **prev;
> +
> +  char *devname;
> +
> +  grub_uint32_t nsnum;
> +
> +  struct grub_nvme_ctrl *ctrl;
> +
> +  grub_uint32_t ns_id;
> +
> +  grub_uint64_t lba_count;     /* The total amount of sectors. */
> +
> +  grub_uint32_t block_size;
> +  grub_uint32_t metadata_size;
> +  grub_uint32_t max_req_size;
> +};
> +
> +/* Data structures for NVMe admin identify commands */
> +
> +struct grub_nvme_identify_ctrl
> +{
> +  grub_uint16_t vid;
> +  grub_uint16_t ssvid;
> +  char sn[20];
> +  char mn[40];
> +  char fr[8];
> +
> +  grub_uint8_t rab;
> +  grub_uint8_t ieee[3];
> +  grub_uint8_t cmic;
> +  grub_uint8_t mdts;
> +
> +  char _boring[516 - 78];
> +
> +  grub_uint32_t nn;            /* number of namespaces */
> +};
> +
> +struct grub_nvme_identify_ns_list
> +{
> +  grub_uint32_t ns_id[1024];
> +};
> +
> +struct grub_nvme_lba_format
> +{
> +  grub_uint16_t ms;
> +  grub_uint8_t lbads;
> +  grub_uint8_t rp;
> +};
> +
> +struct grub_nvme_identify_ns
> +{
> +  grub_uint64_t nsze;
> +  grub_uint64_t ncap;
> +  grub_uint64_t nuse;
> +  grub_uint8_t nsfeat;
> +  grub_uint8_t nlbaf;
> +  grub_uint8_t flbas;
> +
> +  char _boring[128 - 27];
> +
> +  struct grub_nvme_lba_format lbaf[16];
> +};
> +
> +union nvme_identify
> +{
> +  struct grub_nvme_identify_ns ns;
> +  struct grub_nvme_identify_ctrl ctrl;
> +  struct grub_nvme_identify_ns_list ns_list;
> +};
> +
> +/* NVMe constants */
> +
> +#define GRUB_NVME_CAP_CSS_NVME (1ULL << 37)
> +
> +#define GRUB_NVME_CSTS_FATAL   (1U <<  1)
> +#define GRUB_NVME_CSTS_RDY     (1U <<  0)
> +
> +#define GRUB_NVME_CC_EN        (1U <<  0)
> +
> +#define GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U
> +#define GRUB_NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U
> +#define GRUB_NVME_SQE_OPC_ADMIN_IDENTIFY     6U
> +
> +#define GRUB_NVME_SQE_OPC_IO_WRITE 1U
> +#define GRUB_NVME_SQE_OPC_IO_READ  2U
> +
> +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_NS       0U
> +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_ID_CTRL     1U
> +#define GRUB_NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U
> +
> +#define GRUB_NVME_CQE_DW3_P (1U << 16)
> +
> +#define GRUB_NVME_PAGE_SIZE 4096
> +#define GRUB_NVME_PAGE_MASK ~(GRUB_NVME_PAGE_SIZE - 1)
> +
> +/* Length for the queue entries. */
> +#define GRUB_NVME_SQE_SIZE_LOG 6
> +#define GRUB_NVME_CQE_SIZE_LOG 4
> +
> +#endif
> +
> +/* EOF */
> --
> 2.39.2
>
>
> _______________________________________________
> Grub-devel mailing list
> Grub-devel@gnu.org
> https://lists.gnu.org/mailman/listinfo/grub-devel
>
_______________________________________________
Grub-devel mailing list
Grub-devel@gnu.org
https://lists.gnu.org/mailman/listinfo/grub-devel

Reply via email to