Quoting "Matthew R. Ochs" <mro...@linux.vnet.ibm.com>:

Add superpipe supporting infrastructure to device driver for the IBM CXL
Flash adapter. This patch allows userspace applications to take advantage
of the accelerated I/O features that this adapter provides and bypass the
traditional filesystem stack.

Signed-off-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <ma...@linux.vnet.ibm.com>
---
Documentation/ioctl/ioctl-number.txt |    1 +
Documentation/powerpc/cxlflash.txt   |  297 +++++
drivers/scsi/cxlflash/Makefile       |    2 +-
drivers/scsi/cxlflash/common.h       |   19 +
drivers/scsi/cxlflash/main.c         |   21 +-
drivers/scsi/cxlflash/superpipe.c | 2206 ++++++++++++++++++++++++++++++++++
drivers/scsi/cxlflash/superpipe.h    |  127 ++
include/uapi/scsi/Kbuild             |    1 +
include/uapi/scsi/cxlflash_ioctl.h   |  139 +++
9 files changed, 2810 insertions(+), 3 deletions(-)
create mode 100644 Documentation/powerpc/cxlflash.txt
create mode 100644 drivers/scsi/cxlflash/superpipe.c
create mode 100644 drivers/scsi/cxlflash/superpipe.h
create mode 100644 include/uapi/scsi/cxlflash_ioctl.h





diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
new file mode 100644
index 0000000..802f1f5
--- /dev/null
+++ b/drivers/scsi/cxlflash/superpipe.c

+struct ctx_info *get_context(struct cxlflash_cfg *cfg, u64 rctxid,
+                            void *arg, enum ctx_ctrl ctx_ctrl)
+{
+       struct ctx_info *ctxi = NULL;
+       struct lun_access *lun_access = NULL;
+       struct file *file = NULL;
+       struct llun_info *lli = arg;
+       u64 ctxid = DECODE_CTXID(rctxid);
+       int rc;
+       pid_t pid = current->tgid, ctxpid = 0;
+
+       if (ctx_ctrl & CTX_CTRL_FILE) {
+               lli = NULL;
+               file = (struct file *)arg;
+       }
+
+       if (ctx_ctrl & CTX_CTRL_CLONE)
+               pid = current->parent->tgid;
+
+       if (likely(ctxid < MAX_CONTEXT)) {
+retry:
+               rc = mutex_lock_interruptible(&cfg->ctx_tbl_list_mutex);
+               if (rc)
+                       goto out;
+

if (mutex_lock_interruptible(&cfg->ctx_tbl_list_mutex))
       goto out;
or  return ctxi;

+               ctxi = cfg->ctx_tbl[ctxid];
+               if (ctxi)
+                       if ((file && (ctxi->file != file)) ||
+                           (!file && (ctxi->ctxid != rctxid)))
+                               ctxi = NULL;
+

Should you combine two "if" to one "if"?

+               if ((ctx_ctrl & CTX_CTRL_ERR) ||
+                   (!ctxi && (ctx_ctrl & CTX_CTRL_ERR_FALLBACK)))
+                       ctxi = find_error_context(cfg, rctxid, file);
+               if (!ctxi) {
+                       mutex_unlock(&cfg->ctx_tbl_list_mutex);
+                       goto out;
+               }
+
+               /*
+                * Need to acquire ownership of the context while still under
+                * the table/list lock to serialize with a remove thread. Use
+                * the 'try' to avoid stalling the table/list lock for a single
+                * context.
+                */
+               rc = mutex_trylock(&ctxi->mutex);
+               mutex_unlock(&cfg->ctx_tbl_list_mutex);
+               if (!rc)
+                       goto retry;
+
+               if (ctxi->unavail)
+                       goto denied;
+
+               ctxpid = ctxi->pid;
+               if (likely(!(ctx_ctrl & CTX_CTRL_NOPID)))
+                       if (pid != ctxpid)
+                               goto denied;

Should you combine above two "if" to one "if"?

+
+               if (lli) {
+                       list_for_each_entry(lun_access, &ctxi->luns, list)
+                               if (lun_access->lli == lli)
+                                       goto out;
+                       goto denied;
+               }
+       }
+
+out:
+       pr_debug("%s: rctxid=%016llX ctxinfo=%p ctxpid=%u pid=%u ctx_ctrl=%u\n",
+                __func__, rctxid, ctxi, ctxpid, pid, ctx_ctrl);
+
+       return ctxi;
+
+denied:
+       mutex_unlock(&ctxi->mutex);
+       ctxi = NULL;
+       goto out;
+}

+/**
+ * cxlflash_lun_attach() - attaches a user to a LUN and manages the LUN's mode
+ * @gli:       LUN to attach.
+ * @mode:      Desired mode of the LUN.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int cxlflash_lun_attach(struct glun_info *gli, enum lun_mode mode)
+{
+       int rc = 0;
+
+       spin_lock(&gli->slock);
+       if (gli->mode == MODE_NONE)
+               gli->mode = mode;
+       else if (gli->mode != mode) {
+               pr_err("%s: LUN operating in mode %d, requested mode %d\n",
+                      __func__, gli->mode, mode);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       gli->users++;
+       WARN_ON(gli->users <= 0);

Does "gli->users" have upper limit?

+out:
+       pr_debug("%s: Returning rc=%d gli->mode=%u gli->users=%u\n",
+                __func__, rc, gli->mode, gli->users);
+       spin_unlock(&gli->slock);
+       return rc;
+}
+
+/**
+ * cxlflash_lun_detach() - detaches a user from a LUN and resets the LUN's mode
+ * @gli:       LUN to detach.
+ *
+ * When resetting the mode, terminate block allocation resources as they
+ * are no longer required (service is safe to call even when block allocation + * resources were not present - such as when transitioning from physical mode).
+ * These resources will be reallocated when needed (subsequent transition to
+ * virtual mode).
+ */
+void cxlflash_lun_detach(struct glun_info *gli)
+{
+       spin_lock(&gli->slock);
+       WARN_ON(gli->mode == MODE_NONE);
+       if (--gli->users == 0)
+               gli->mode = MODE_NONE;
+       pr_debug("%s: gli->users=%u\n", __func__, gli->users);
+       WARN_ON(gli->users < 0);

do you like to add a pr_debug(....) here?

+       spin_unlock(&gli->slock);
+}
+
+/**
+ * _cxlflash_disk_release() - releases the specified resource entry
+ * @sdev:      SCSI device associated with LUN.
+ * @ctxi:      Context owning resources.
+ * @release:   Release ioctl data structure.
+ *
+ * For LUN's in virtual mode, the virtual lun associated with the specified
+ * resource handle is resized to 0 prior to releasing the RHTE. Note that the + * AFU sync should _not_ be performed when the context is sitting on the error + * recovery list. A context on the error recovery list is not known to the AFU + * due to reset. When the context is recovered, it will be reattached and made
+ * known again to the AFU.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int _cxlflash_disk_release(struct scsi_device *sdev,
+                          struct ctx_info *ctxi,
+                          struct dk_cxlflash_release *release)
+{
+       struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)sdev->host->hostdata;
+       struct llun_info *lli = sdev->hostdata;
+       struct glun_info *gli = lli->parent;
+       struct afu *afu = cfg->afu;
+       bool unlock_ctx = false;
+
+       res_hndl_t rhndl = release->rsrc_handle;
+
+       int rc = 0;
+       u64 ctxid = DECODE_CTXID(release->context_id),
+           rctxid = release->context_id;
+
+       struct sisl_rht_entry *rhte;
+       struct sisl_rht_entry_f1 *rhte_f1;
+
+       pr_debug("%s: ctxid=%llu rhndl=0x%llx gli->mode=%u gli->users=%u\n",
+                __func__, ctxid, release->rsrc_handle, gli->mode, gli->users);
+
+       if (!ctxi) {
+               ctxi = get_context(cfg, rctxid, lli, CTX_CTRL_ERR_FALLBACK);
+               if (unlikely(!ctxi)) {
+                       pr_err("%s: Bad context! (%llu)\n", __func__, ctxid);
+                       rc = -EINVAL;
+                       goto out;
+               }
+
+               unlock_ctx = true;
+       }
+
+       rhte = get_rhte(ctxi, rhndl, lli);
+       if (unlikely(!rhte)) {
+               pr_err("%s: Bad resource handle! (%d)\n", __func__, rhndl);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       /*
+        * Resize to 0 for virtual LUNS by setting the size
+        * to 0. This will clear LXT_START and LXT_CNT fields
+        * in the RHT entry and properly sync with the AFU.
+        *
+        * Afterwards we clear the remaining fields.
+        */
+       switch (gli->mode) {
+       case MODE_PHYSICAL:
+               /*
+                * Clear the Format 1 RHT entry for direct access
+                * (physical LUN) using the synchronization sequence
+                * defined in the SISLite specification.
+                */
+               rhte_f1 = (struct sisl_rht_entry_f1 *)rhte;
+
+               rhte_f1->valid = 0;
+               dma_wmb(); /* Make revocation of RHT entry visible */
+
+               rhte_f1->lun_id = 0;
+               dma_wmb(); /* Make clearing of LUN id visible */
+
+               rhte_f1->dw = 0;
+               dma_wmb(); /* Make RHT entry bottom-half clearing visible */
+
+               if (!ctxi->err_recovery_active)
+                       cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
+               break;
+       default:
+               WARN(1, "Unsupported LUN mode!");
+               goto out;
+       }
+
+       rhte_checkin(ctxi, rhte);
+       cxlflash_lun_detach(gli);
+
+out:
+       if (unlock_ctx)
+               mutex_unlock(&ctxi->mutex);

Should "mutex_lock(&ctxi->mutex);" in the same function?

+       pr_debug("%s: returning rc=%d\n", __func__, rc);
+       return rc;
+}
+

+ * create_context() - allocates and initializes a context
+ * @cfg:       Internal structure associated with the host.
+ * @ctx:       Previously obtained CXL context reference.
+ * @ctxid:     Previously obtained process element associated with CXL context.
+ * @adap_fd:   Previously obtained adapter fd associated with CXL context.
+ * @file:      Previously obtained file associated with CXL context.
+ * @perms:     User-specified permissions.
+ *
+ * The context's mutex is locked when an allocated context is returned.
+ *
+ * Return: Allocated context on success, NULL on failure
+ */
+static struct ctx_info *create_context(struct cxlflash_cfg *cfg,
+                                      struct cxl_context *ctx, int ctxid,
+                                      int adap_fd, struct file *file,
+                                      u32 perms)
+{
+       char *tmp = NULL;
+       size_t size;
+       struct afu *afu = cfg->afu;
+       struct ctx_info *ctxi = NULL;
+       struct sisl_rht_entry *rhte;
+
+       size = (MAX_RHT_PER_CONTEXT * sizeof(*ctxi->rht_lun));
+       size += sizeof(*ctxi);
+

Combine above two lines code into one line code?

+       tmp = kzalloc(size, GFP_KERNEL);
+       if (unlikely(!tmp)) {
+               pr_err("%s: Unable to allocate context! (%ld)\n",
+                      __func__, size);
+               goto out;
+       }
+
+       rhte = (struct sisl_rht_entry *)get_zeroed_page(GFP_KERNEL);
+       if (unlikely(!rhte)) {
+               pr_err("%s: Unable to allocate RHT!\n", __func__);
+               goto err;
+       }
+
+       ctxi = (struct ctx_info *)tmp;
+       tmp += sizeof(*ctxi);
+       ctxi->rht_lun = (struct llun_info **)tmp;

Combine above two lines code into one line code?

+       ctxi->rht_start = rhte;
+       ctxi->rht_perms = perms;
+
+       ctxi->ctrl_map = &afu->afu_map->ctrls[ctxid].ctrl;
+       ctxi->ctxid = ENCODE_CTXID(ctxi, ctxid);
+       ctxi->lfd = adap_fd;
+       ctxi->pid = current->tgid; /* tgid = pid */
+       ctxi->ctx = ctx;
+       ctxi->file = file;
+       mutex_init(&ctxi->mutex);
+       INIT_LIST_HEAD(&ctxi->luns);
+       INIT_LIST_HEAD(&ctxi->list); /* initialize for list_empty() */
+
+       atomic_inc(&cfg->num_user_contexts);
+       mutex_lock(&ctxi->mutex);
+out:

Is it ok to call "mutex_lock(&ctxi->mutex);" in the function which calling create_context"?

+       return ctxi;
+
+err:
+       kfree(tmp);
+       goto out;
+}
+

+               mutex_unlock(&cfg->ctx_tbl_list_mutex);
+               mutex_unlock(&ctxi->mutex);
+
+               lfd = ctxi->lfd;
+               destroy_context(cfg, ctxi);
+               ctxi = NULL;
+               unlock_ctx = false;
+
+               /*
+                * As a last step, clean up external resources when not
+                * already on an external cleanup thread, ie: close(adap_fd).
+                *
+                * NOTE: this will free up the context from the CXL services,
+                * allowing it to dole out the same context_id on a future
+                * (or even currently in-flight) disk_attach operation.
+                */
+               if (lfd != -1)
+                       sys_close(lfd);
+       }
+
+out:
+       if (unlock_ctx)
+               mutex_unlock(&ctxi->mutex);
+       pr_debug("%s: returning rc=%d\n", __func__, rc);
+       return rc;
+}
+

+/**
+ * cxlflash_manage_lun() - handles lun management activities
+ * @sdev:      SCSI device associated with LUN.
+ * @manage:    Manage ioctl data structure.
+ *
+ * This routine is used to notify the driver about a LUN's WWID and associate
+ * SCSI devices (sdev) with a global LUN instance. Additionally it serves to
+ * change a LUN's operating mode: legacy or superpipe.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int cxlflash_manage_lun(struct scsi_device *sdev,
+                              struct dk_cxlflash_manage_lun *manage)
+{
+       int rc = 0;
+       struct llun_info *lli = NULL;
+       u64 flags = manage->hdr.flags;
+       u32 chan = sdev->channel;
+
+       lli = lookup_lun(sdev, manage->wwid);
+       pr_debug("%s: ENTER: WWID = %016llX%016llX, flags = %016llX li = %p\n",
+                __func__, get_unaligned_le64(&manage->wwid[0]),
+                get_unaligned_le64(&manage->wwid[8]),
+                manage->hdr.flags, lli);
+       if (unlikely(!lli)) {
+               rc = -ENOMEM;
+               goto out;
+       }
+

Move pr_debug(...) under if leg?

+       if (flags & DK_CXLFLASH_MANAGE_LUN_ENABLE_SUPERPIPE) {
+               if (lli->newly_created)
+                       lli->port_sel = CHAN2PORT(chan);
+               else
+                       lli->port_sel = BOTH_PORTS;
+               /* Store off lun in unpacked, AFU-friendly format */
+               lli->lun_id[chan] = lun_to_lunid(sdev->lun);
+               sdev->hostdata = lli;
+       } else if (flags & DK_CXLFLASH_MANAGE_LUN_DISABLE_SUPERPIPE) {
+               if (lli->parent->mode != MODE_NONE)
+                       rc = -EBUSY;
+               else
+                       sdev->hostdata = NULL;
+       }
+
+out:
+       pr_debug("%s: returning rc=%d\n", __func__, rc);
+       return rc;
+}
+
+/**
+ * check_state() - checks and responds to the current adapter state
+ * @cfg:       Internal structure associated with the host.
+ *
+ * This routine can block and should only be used on process context.
+ * Note that when waking up from waiting in limbo, the state is unknown
+ * and must be checked again before proceeding.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int check_state(struct cxlflash_cfg *cfg)
+{
+       int rc = 0;
+
+retry:
+       switch (cfg->state) {
+       case STATE_LIMBO:
+               pr_debug("%s: Limbo, going to wait...\n", __func__);
+               rc = wait_event_interruptible(cfg->limbo_waitq,
+                                             cfg->state != STATE_LIMBO);
+               if (unlikely(rc))
+                       goto out;
+               goto retry;
+       case STATE_FAILTERM:
+               pr_debug("%s: Failed/Terminating!\n", __func__);
+               rc = -ENODEV;
+               goto out;

changed "goto out" to "break"?

+       default:
+               break;
+       }
+out:
+       return rc;
+ * cxlflash_afu_recover() - initiates AFU recovery
+ * @sdev:      SCSI device associated with LUN.
+ * @recover:   Recover ioctl data structure.
+ *
+ * Only a single recovery is allowed at a time to avoid exhausting CXL
+ * resources (leading to recovery failure) in the event that we're up
+ * against the maximum number of contexts limit. For similar reasons,
+ * a context recovery is retried if there are multiple recoveries taking
+ * place at the same time and the failure was due to CXL services being
+ * unable to keep up.
+ *
+ * Because a user can detect an error condition before the kernel, it is
+ * quite possible for this routine to act as the kernel's EEH detection
+ * source (MMIO read of mbox_r). Because of this, there is a window of
+ * time where an EEH might have been detected but not yet 'serviced'
+ * (callback invoked, causing the device to enter limbo state). To avoid
+ * looping in this routine during that window, a 1 second sleep is in place
+ * between the time the MMIO failure is detected and the time a wait on the
+ * limbo wait queue is attempted via check_state().
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int cxlflash_afu_recover(struct scsi_device *sdev,
+                               struct dk_cxlflash_recover_afu *recover)
+{
+       struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)sdev->host->hostdata;
+       struct llun_info *lli = sdev->hostdata;
+       struct afu *afu = cfg->afu;
+       struct ctx_info *ctxi = NULL;
+       struct mutex *mutex = &cfg->ctx_recovery_mutex;
+       u64 ctxid = DECODE_CTXID(recover->context_id),
+           rctxid = recover->context_id;
+       long reg;
+       int lretry = 20; /* up to 2 seconds */
+       int rc = 0;
+
+       atomic_inc(&cfg->recovery_threads);
+       rc = mutex_lock_interruptible(mutex);
+       if (rc)
+               goto out;

change it to "if (mutex_lock_interruptible(mutex))":, If fails here, why need to unlock_mutex(mutex) in "out:"? How about just return error?

+
+       pr_debug("%s: reason 0x%016llX rctxid=%016llX\n", __func__,
+                recover->reason, rctxid);
+
+retry:
+       /* Ensure that this process is attached to the context */
+       ctxi = get_context(cfg, rctxid, lli, CTX_CTRL_ERR_FALLBACK);
+       if (unlikely(!ctxi)) {
+               pr_err("%s: Bad context! (%llu)\n", __func__, ctxid);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (ctxi->err_recovery_active) {
+retry_recover:
+               rc = recover_context(cfg, ctxi);
+               if (unlikely(rc)) {
+                       pr_err("%s: Recovery failed for context %llu (rc=%d)\n",
+                              __func__, ctxid, rc);
+                       if ((rc == -ENODEV) &&
+                           ((atomic_read(&cfg->recovery_threads) > 1) ||
+                            (lretry--))) {
+                               pr_debug("%s: Going to try again!\n", __func__);
+                               mutex_unlock(mutex);
+                               msleep(100);
+                               rc = mutex_lock_interruptible(mutex);
+                               if (rc)
+                                       goto out;

Same here


+                               goto retry_recover;
+                       }
+
+                       goto out;
+               }
+
+               ctxi->err_recovery_active = false;
+               recover->context_id = ctxi->ctxid;
+               recover->adap_fd = ctxi->lfd;
+               recover->mmio_size = sizeof(afu->afu_map->hosts[0].harea);
+               recover->hdr.return_flags |=
+                       DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET;
+               goto out;
+       }
+
+       /* Test if in error state */
+       reg = readq_be(&afu->ctrl_map->mbox_r);
+       if (reg == -1) {
+               pr_info("%s: MMIO read fail! Wait for recovery...\n", __func__);
+               mutex_unlock(&ctxi->mutex);
+               ctxi = NULL;
+               ssleep(1);
+               rc = check_state(cfg);
+               if (unlikely(rc))
+                       goto out;
+               goto retry;
+       }
+

+       pr_debug("%s: MMIO working, no recovery required!\n", __func__);
+out:
+       if (likely(ctxi))
+               mutex_unlock(&ctxi->mutex);
+       mutex_unlock(mutex);
+       atomic_dec_if_positive(&cfg->recovery_threads);
+       return rc;
+}
+

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to