From: Nipun Gupta <>

The patch adds support for portal migration by disabling stashing
for the portals which is used in the non-affined threads, or on
threads affined to multiple cores

Signed-off-by: Nipun Gupta <>
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.c      |  83 +--
 .../fslmc/qbman/include/fsl_qbman_portal.h    |   8 +-
 drivers/bus/fslmc/qbman/qbman_portal.c        | 554 +++++++++++++++++-
 drivers/bus/fslmc/qbman/qbman_portal.h        |  19 +-
 drivers/bus/fslmc/qbman/qbman_sys.h           | 135 ++++-
 5 files changed, 717 insertions(+), 82 deletions(-)

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c 
index 054d45306..2102d2981 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -53,10 +53,6 @@ static uint32_t io_space_count;
 /* Variable to store DPAA2 platform type */
 uint32_t dpaa2_svr_family;
-/* Physical core id for lcores running on dpaa2. */
-/* DPAA2 only support 1 lcore to 1 phy cpu mapping */
-static unsigned int dpaa2_cpu[RTE_MAX_LCORE];
 /* Variable to store DPAA2 DQRR size */
 uint8_t dpaa2_dqrr_size;
 /* Variable to store DPAA2 EQCR size */
@@ -159,7 +155,7 @@ dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, 
int cpu_id)
-       cpu_mask = cpu_mask << dpaa2_cpu[cpu_id];
+       cpu_mask = cpu_mask << cpu_id;
        snprintf(command, COMMAND_LEN, "echo %X > /proc/irq/%s/smp_affinity",
                 cpu_mask, token);
        ret = system(command);
@@ -228,17 +224,9 @@ static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev 
 static int
-dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev)
+dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
        int sdest, ret;
-       int cpu_id;
-       /* Set the Stashing Destination */
-       cpu_id = dpaa2_get_core_id();
-       if (cpu_id < 0) {
-               DPAA2_BUS_ERR("Thread not affined to a single core");
-               return -1;
-       }
        /* Set the STASH Destination depending on Current CPU ID.
         * Valid values of SDEST are 4,5,6,7. Where,
@@ -277,6 +265,7 @@ static void dpaa2_put_qbman_swp(struct dpaa2_dpio_dev 
 static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
        struct dpaa2_dpio_dev *dpio_dev = NULL;
+       int cpu_id;
        int ret;
        /* Get DPIO dev handle from list using index */
@@ -292,11 +281,19 @@ static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
        DPAA2_BUS_DEBUG("New Portal %p (%d) affined thread - %lu",
                        dpio_dev, dpio_dev->index, syscall(SYS_gettid));
-       ret = dpaa2_configure_stashing(dpio_dev);
-       if (ret) {
-               DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
-               rte_atomic16_clear(&dpio_dev->ref_count);
-               return NULL;
+       /* Set the Stashing Destination */
+       cpu_id = dpaa2_get_core_id();
+       if (cpu_id < 0) {
+               DPAA2_BUS_WARN("Thread not affined to a single core");
+               if (dpaa2_svr_family != SVR_LX2160A)
+                       qbman_swp_update(dpio_dev->sw_portal, 1);
+       } else {
+               ret = dpaa2_configure_stashing(dpio_dev, cpu_id);
+               if (ret) {
+                       DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
+                       rte_atomic16_clear(&dpio_dev->ref_count);
+                       return NULL;
+               }
        ret = pthread_setspecific(dpaa2_portal_key, (void *)dpio_dev);
@@ -363,46 +360,6 @@ static void dpaa2_portal_finish(void *arg)
        pthread_setspecific(dpaa2_portal_key, NULL);
- * This checks for not supported lcore mappings as well as get the physical
- * cpuid for the lcore.
- * one lcore can only map to 1 cpu i.e. 1@10-14 not supported.
- * one cpu can be mapped to more than one lcores.
- */
-static int
-       unsigned int lcore_id, i;
-       int ret = 0;
-       for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-               dpaa2_cpu[lcore_id] = 0xffffffff;
-       for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-               rte_cpuset_t cpuset = rte_lcore_cpuset(lcore_id);
-               for (i = 0; i < CPU_SETSIZE; i++) {
-                       if (!CPU_ISSET(i, &cpuset))
-                               continue;
-                       if (i >= RTE_MAX_LCORE) {
-                               DPAA2_BUS_ERR("ERR:lcore map to core %u (>= %u) 
not supported",
-                                       i, RTE_MAX_LCORE);
-                               ret = -1;
-                               continue;
-                       }
-                       RTE_LOG(DEBUG, EAL, "lcore id = %u cpu=%u\n",
-                               lcore_id, i);
-                       if (dpaa2_cpu[lcore_id] != 0xffffffff) {
-                               DPAA2_BUS_ERR("ERR:lcore map to multi-cpu not 
-                               ret = -1;
-                               continue;
-                       }
-                       dpaa2_cpu[lcore_id] = i;
-               }
-       }
-       return ret;
 static int
 dpaa2_create_dpio_device(int vdev_fd,
                         struct vfio_device_info *obj_info,
@@ -413,7 +370,6 @@ dpaa2_create_dpio_device(int vdev_fd,
        struct qbman_swp_desc p_des;
        struct dpio_attr attr;
        int ret;
-       static int check_lcore_cpuset;
        if (obj_info->num_regions < NUM_DPIO_REGIONS) {
                DPAA2_BUS_ERR("Not sufficient number of DPIO regions");
@@ -433,13 +389,6 @@ dpaa2_create_dpio_device(int vdev_fd,
        /* Using single portal  for all devices */
        dpio_dev->mc_portal = rte_mcp_ptr_list[MC_PORTAL_INDEX];
-       if (!check_lcore_cpuset) {
-               check_lcore_cpuset = 1;
-               if (dpaa2_check_lcore_cpuset() < 0)
-                       goto err;
-       }
        dpio_dev->dpio = rte_zmalloc(NULL, sizeof(struct fsl_mc_io),
        if (!dpio_dev->dpio) {
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h 
index 88f0a9968..0d6364d99 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright (C) 2014 Freescale Semiconductor, Inc.
- * Copyright 2015-2019 NXP
+ * Copyright 2015-2020 NXP
@@ -43,6 +43,12 @@ extern uint32_t dpaa2_svr_family;
 struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
+ * qbman_swp_update() - Update portal cacheability attributes.
+ * @p: the given qbman swp portal
+ */
+int qbman_swp_update(struct qbman_swp *p, int stash_off);
  * qbman_swp_finish() - Create and destroy a functional object representing
  * the given QBMan portal descriptor.
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c 
index d4223bdc8..a06b88dd2 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
@@ -82,6 +82,10 @@ qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd);
 static int
+qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd);
+static int
 qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd);
@@ -99,6 +103,12 @@ qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
                uint32_t *flags,
                int num_frames);
 static int
+qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames);
+static int
 qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd,
@@ -118,6 +128,12 @@ qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp 
                uint32_t *flags,
                int num_frames);
 static int
+qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames);
+static int
 qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                struct qbman_fd **fd,
@@ -135,6 +151,11 @@ qbman_swp_enqueue_multiple_desc_cinh_direct(struct 
qbman_swp *s,
                const struct qbman_fd *fd,
                int num_frames);
 static int
+qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames);
+static int
 qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd,
@@ -143,9 +164,12 @@ qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp 
 static int
 qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
 static int
+qbman_swp_pull_cinh_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
+static int
 qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d);
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s);
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp 
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s);
 static int
@@ -153,6 +177,10 @@ qbman_swp_release_direct(struct qbman_swp *s,
                const struct qbman_release_desc *d,
                const uint64_t *buffers, unsigned int num_buffers);
 static int
+qbman_swp_release_cinh_direct(struct qbman_swp *s,
+               const struct qbman_release_desc *d,
+               const uint64_t *buffers, unsigned int num_buffers);
+static int
 qbman_swp_release_mem_back(struct qbman_swp *s,
                const struct qbman_release_desc *d,
                const uint64_t *buffers, unsigned int num_buffers);
@@ -327,6 +355,28 @@ struct qbman_swp *qbman_swp_init(const struct 
qbman_swp_desc *d)
        return p;
+int qbman_swp_update(struct qbman_swp *p, int stash_off)
+       const struct qbman_swp_desc *d = &p->desc;
+       struct qbman_swp_sys *s = &p->sys;
+       int ret;
+       /* Nothing needs to be done for QBMAN rev > 5000 with fast access */
+       if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access))
+               return 0;
+       ret = qbman_swp_sys_update(s, d, p->dqrr.dqrr_size, stash_off);
+       if (ret) {
+               pr_err("qbman_swp_sys_init() failed %d\n", ret);
+               return ret;
+       }
+       p->stash_off = stash_off;
+       return 0;
 void qbman_swp_finish(struct qbman_swp *p)
@@ -462,6 +512,27 @@ void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, 
uint8_t cmd_verb)
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
+       uint8_t *v = cmd;
+       QBMAN_BUG_ON(!(p->mc.check != swp_mc_can_submit));
+       /* TBD: "|=" is going to hurt performance. Need to move as many fields
+        * out of word zero, and for those that remain, the "OR" needs to occur
+        * at the caller side. This debug check helps to catch cases where the
+        * caller wants to OR but has forgotten to do so.
+        */
+       QBMAN_BUG_ON((*v & cmd_verb) != *v);
+       dma_wmb();
+       *v = cmd_verb | p->mc.valid_bit;
+       qbman_cinh_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+       clean(cmd);
+       p->mc.check = swp_mc_can_poll;
 void *qbman_swp_mc_result(struct qbman_swp *p)
        uint32_t *ret, verb;
@@ -500,6 +571,27 @@ void *qbman_swp_mc_result(struct qbman_swp *p)
        return ret;
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p)
+       uint32_t *ret, verb;
+       QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
+       ret = qbman_cinh_read_shadow(&p->sys,
+                             QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+       /* Remove the valid-bit -
+        * command completed iff the rest is non-zero
+        */
+       verb = ret[0] & ~QB_VALID_BIT;
+       if (!verb)
+               return NULL;
+       p->mc.valid_bit ^= QB_VALID_BIT;
+       p->mc.check = swp_mc_can_start;
+       return ret;
 /* Enqueue */
@@ -640,6 +732,16 @@ static inline void qbman_write_eqcr_am_rt_register(struct 
qbman_swp *p,
+static void memcpy_byte_by_byte(void *to, const void *from, size_t n)
+       const uint8_t *src = from;
+       volatile uint8_t *dest = to;
+       size_t i;
+       for (i = 0; i < n; i++)
+               dest[i] = src[i];
 static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
                                               const struct qbman_eq_desc *d,
@@ -754,7 +856,7 @@ static int qbman_swp_enqueue_ring_mode_cinh_direct(
                        return -EBUSY;
-       p = qbman_cena_write_start_wo_shadow(&s->sys,
+       p = qbman_cinh_write_start_wo_shadow(&s->sys,
                        QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
        memcpy(&p[1], &cl[1], 28);
        memcpy(&p[8], fd, sizeof(*fd));
@@ -762,8 +864,44 @@ static int qbman_swp_enqueue_ring_mode_cinh_direct(
        /* Set the verb byte, have to substitute in the valid-bit */
        p[0] = cl[0] | s->eqcr.pi_vb;
-       qbman_cena_write_complete_wo_shadow(&s->sys,
+       s->eqcr.pi++;
+       s->eqcr.pi &= full_mask;
+       s->eqcr.available--;
+       if (!(s->eqcr.pi & half_mask))
+               s->eqcr.pi_vb ^= QB_VALID_BIT;
+       return 0;
+static int qbman_swp_enqueue_ring_mode_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd)
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, full_mask, half_mask;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->;
+               s-> = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->;
+               if (!s->eqcr.available)
+                       return -EBUSY;
+       }
+       p = qbman_cinh_write_start_wo_shadow(&s->sys,
                        QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+       memcpy_byte_by_byte(&p[1], &cl[1], 28);
+       memcpy_byte_by_byte(&p[8], fd, sizeof(*fd));
+       lwsync();
+       /* Set the verb byte, have to substitute in the valid-bit */
+       p[0] = cl[0] | s->eqcr.pi_vb;
        s->eqcr.pi &= full_mask;
@@ -815,7 +953,10 @@ static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
                                       const struct qbman_eq_desc *d,
                                       const struct qbman_fd *fd)
-       return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+       if (!s->stash_off)
+               return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+       else
+               return qbman_swp_enqueue_ring_mode_cinh_direct(s, d, fd);
 int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
@@ -966,6 +1107,67 @@ static int qbman_swp_enqueue_multiple_cinh_direct(
        return num_enqueued;
+static int qbman_swp_enqueue_multiple_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames)
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->;
+               s-> = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->;
+               if (!s->eqcr.available)
+                       return 0;
+       }
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy_byte_by_byte(&p[1], &cl[1], 28);
+               memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
+               eqcr_pi++;
+       }
+       lwsync();
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+       return num_enqueued;
 static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
                                               const struct qbman_eq_desc *d,
                                               const struct qbman_fd *fd,
@@ -1025,7 +1227,12 @@ inline int qbman_swp_enqueue_multiple(struct qbman_swp 
                                      uint32_t *flags,
                                      int num_frames)
-       return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, num_frames);
+       if (!s->stash_off)
+               return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags,
+                                               num_frames);
+       else
+               return qbman_swp_enqueue_multiple_cinh_direct(s, d, fd, flags,
+                                               num_frames);
 static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
@@ -1167,6 +1374,67 @@ static int qbman_swp_enqueue_multiple_fd_cinh_direct(
        return num_enqueued;
+static int qbman_swp_enqueue_multiple_fd_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames)
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->;
+               s-> = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->;
+               if (!s->eqcr.available)
+                       return 0;
+       }
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy_byte_by_byte(&p[1], &cl[1], 28);
+               memcpy_byte_by_byte(&p[8], fd[i], sizeof(struct qbman_fd));
+               eqcr_pi++;
+       }
+       lwsync();
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+       return num_enqueued;
 static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
                                                  const struct qbman_eq_desc *d,
                                                  struct qbman_fd **fd,
@@ -1233,7 +1501,12 @@ inline int qbman_swp_enqueue_multiple_fd(struct 
qbman_swp *s,
                                         uint32_t *flags,
                                         int num_frames)
-       return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags, num_frames);
+       if (!s->stash_off)
+               return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags,
+                                       num_frames);
+       else
+               return qbman_swp_enqueue_multiple_fd_cinh_direct(s, d, fd,
+                                       flags, num_frames);
 static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
@@ -1365,6 +1638,62 @@ static int qbman_swp_enqueue_multiple_desc_cinh_direct(
        return num_enqueued;
+static int qbman_swp_enqueue_multiple_desc_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames)
+       uint32_t *p;
+       const uint32_t *cl;
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->;
+               s-> = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                                       eqcr_ci, s->;
+               if (!s->eqcr.available)
+                       return 0;
+       }
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
+               memcpy_byte_by_byte(&p[1], &cl[1], 28);
+               memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
+               eqcr_pi++;
+       }
+       lwsync();
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+       return num_enqueued;
 static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
                                        const struct qbman_eq_desc *d,
                                        const struct qbman_fd *fd,
@@ -1426,7 +1755,13 @@ inline int qbman_swp_enqueue_multiple_desc(struct 
qbman_swp *s,
                                           const struct qbman_fd *fd,
                                           int num_frames)
-       return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, num_frames);
+       if (!s->stash_off)
+               return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd,
+                                       num_frames);
+       else
+               return qbman_swp_enqueue_multiple_desc_cinh_direct(s, d, fd,
+                                       num_frames);
@@ -1574,6 +1909,30 @@ static int qbman_swp_pull_direct(struct qbman_swp *s,
        return 0;
+static int qbman_swp_pull_cinh_direct(struct qbman_swp *s,
+                                struct qbman_pull_desc *d)
+       uint32_t *p;
+       uint32_t *cl = qb_cl(d);
+       if (!atomic_dec_and_test(&s->vdq.busy)) {
+               atomic_inc(&s->vdq.busy);
+               return -EBUSY;
+       }
+       d->pull.tok = s->sys.idx + 1;
+       s-> = (void *)(size_t)d->pull.rsp_addr_virt;
+       p = qbman_cinh_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR);
+       memcpy_byte_by_byte(&p[1], &cl[1], 12);
+       /* Set the verb byte, have to substitute in the valid-bit */
+       lwsync();
+       p[0] = cl[0] | s->vdq.valid_bit;
+       s->vdq.valid_bit ^= QB_VALID_BIT;
+       return 0;
 static int qbman_swp_pull_mem_back(struct qbman_swp *s,
                                   struct qbman_pull_desc *d)
@@ -1601,7 +1960,10 @@ static int qbman_swp_pull_mem_back(struct qbman_swp *s,
 inline int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
-       return qbman_swp_pull_ptr(s, d);
+       if (!s->stash_off)
+               return qbman_swp_pull_ptr(s, d);
+       else
+               return qbman_swp_pull_cinh_direct(s, d);
@@ -1638,7 +2000,10 @@ void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s)
 inline const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
-       return qbman_swp_dqrr_next_ptr(s);
+       if (!s->stash_off)
+               return qbman_swp_dqrr_next_ptr(s);
+       else
+               return qbman_swp_dqrr_next_cinh_direct(s);
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
@@ -1718,6 +2083,81 @@ const struct qbman_result 
*qbman_swp_dqrr_next_direct(struct qbman_swp *s)
        return p;
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s)
+       uint32_t verb;
+       uint32_t response_verb;
+       uint32_t flags;
+       const struct qbman_result *p;
+       /* Before using valid-bit to detect if something is there, we have to
+        * handle the case of the DQRR reset bug...
+        */
+       if (s->dqrr.reset_bug) {
+               /* We pick up new entries by cache-inhibited producer index,
+                * which means that a non-coherent mapping would require us to
+                * invalidate and read *only* once that PI has indicated that
+                * there's an entry here. The first trip around the DQRR ring
+                * will be much less efficient than all subsequent trips around
+                * it...
+                */
+               uint8_t pi = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_DQPI) &
+                            QMAN_DQRR_PI_MASK;
+               /* there are new entries if pi != next_idx */
+               if (pi == s->dqrr.next_idx)
+                       return NULL;
+               /* if next_idx is/was the last ring index, and 'pi' is
+                * different, we can disable the workaround as all the ring
+                * entries have now been DMA'd to so valid-bit checking is
+                * repaired. Note: this logic needs to be based on next_idx
+                * (which increments one at a time), rather than on pi (which
+                * can burst and wrap-around between our snapshots of it).
+                */
+               QBMAN_BUG_ON((s->dqrr.dqrr_size - 1) < 0);
+               if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1u)) {
+                       pr_debug("DEBUG: next_idx=%d, pi=%d, clear reset bug\n",
+                                s->dqrr.next_idx, pi);
+                       s->dqrr.reset_bug = 0;
+               }
+       }
+       p = qbman_cinh_read_wo_shadow(&s->sys,
+                       QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+       verb = p->dq.verb;
+       /* If the valid-bit isn't of the expected polarity, nothing there. Note,
+        * in the DQRR reset bug workaround, we shouldn't need to skip these
+        * check, because we've already determined that a new entry is available
+        * and we've invalidated the cacheline before reading it, so the
+        * valid-bit behaviour is repaired and should tell us what we already
+        * knew from reading PI.
+        */
+       if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit)
+               return NULL;
+       /* There's something there. Move "next_idx" attention to the next ring
+        * entry (and prefetch it) before returning what we found.
+        */
+       s->dqrr.next_idx++;
+       if (s->dqrr.next_idx == s->dqrr.dqrr_size) {
+               s->dqrr.next_idx = 0;
+               s->dqrr.valid_bit ^= QB_VALID_BIT;
+       }
+       /* If this is the final response to a volatile dequeue command
+        * indicate that the vdq is no longer busy
+        */
+       flags = p->dq.stat;
+       response_verb = verb & QBMAN_RESPONSE_VERB_MASK;
+       if ((response_verb == QBMAN_RESULT_DQ) &&
+           (flags & QBMAN_DQ_STAT_VOLATILE) &&
+           (flags & QBMAN_DQ_STAT_EXPIRED))
+               atomic_inc(&s->vdq.busy);
+       return p;
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s)
        uint32_t verb;
@@ -2096,6 +2536,37 @@ static int qbman_swp_release_direct(struct qbman_swp *s,
        return 0;
+static int qbman_swp_release_cinh_direct(struct qbman_swp *s,
+                                   const struct qbman_release_desc *d,
+                                   const uint64_t *buffers,
+                                   unsigned int num_buffers)
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR);
+       pr_debug("RAR=%08x\n", rar);
+       if (!RAR_SUCCESS(rar))
+               return -EBUSY;
+       QBMAN_BUG_ON(!num_buffers || (num_buffers > 7));
+       /* Start the release command */
+       p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                                    QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+       /* Copy the caller's buffer pointers to the command */
+       memcpy_byte_by_byte(&p[2], buffers, num_buffers * sizeof(uint64_t));
+       /* Set the verb byte, have to substitute in the valid-bit and the
+        * number of buffers.
+        */
+       lwsync();
+       p[0] = cl[0] | RAR_VB(rar) | num_buffers;
+       return 0;
 static int qbman_swp_release_mem_back(struct qbman_swp *s,
                                      const struct qbman_release_desc *d,
                                      const uint64_t *buffers,
@@ -2134,7 +2605,11 @@ inline int qbman_swp_release(struct qbman_swp *s,
                             const uint64_t *buffers,
                             unsigned int num_buffers)
-       return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+       if (!s->stash_off)
+               return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+       else
+               return qbman_swp_release_cinh_direct(s, d, buffers,
+                                               num_buffers);
@@ -2157,8 +2632,8 @@ struct qbman_acquire_rslt {
        uint64_t buf[7];
-int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
-                     unsigned int num_buffers)
+static int qbman_swp_acquire_direct(struct qbman_swp *s, uint16_t bpid,
+                               uint64_t *buffers, unsigned int num_buffers)
        struct qbman_acquire_desc *p;
        struct qbman_acquire_rslt *r;
@@ -2202,6 +2677,61 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t 
bpid, uint64_t *buffers,
        return (int)r->num;
+static int qbman_swp_acquire_cinh_direct(struct qbman_swp *s, uint16_t bpid,
+                       uint64_t *buffers, unsigned int num_buffers)
+       struct qbman_acquire_desc *p;
+       struct qbman_acquire_rslt *r;
+       if (!num_buffers || (num_buffers > 7))
+               return -EINVAL;
+       /* Start the management command */
+       p = qbman_swp_mc_start(s);
+       if (!p)
+               return -EBUSY;
+       /* Encode the caller-provided attributes */
+       p->bpid = bpid;
+       p->num = num_buffers;
+       /* Complete the management command */
+       r = qbman_swp_mc_complete_cinh(s, p, QBMAN_MC_ACQUIRE);
+       if (!r) {
+               pr_err("qbman: acquire from BPID %d failed, no response\n",
+                      bpid);
+               return -EIO;
+       }
+       /* Decode the outcome */
+       /* Determine success or failure */
+       if (r->rslt != QBMAN_MC_RSLT_OK) {
+               pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n",
+                      bpid, r->rslt);
+               return -EIO;
+       }
+       QBMAN_BUG_ON(r->num > num_buffers);
+       /* Copy the acquired buffers to the caller's array */
+       u64_from_le32_copy(buffers, &r->buf[0], r->num);
+       return (int)r->num;
+int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
+                     unsigned int num_buffers)
+       if (!s->stash_off)
+               return qbman_swp_acquire_direct(s, bpid, buffers, num_buffers);
+       else
+               return qbman_swp_acquire_cinh_direct(s, bpid, buffers,
+                                       num_buffers);
 /* FQ management */
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.h 
index 3aaacae52..1cf791830 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/qbman_portal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
@@ -102,6 +102,7 @@ struct qbman_swp {
                uint32_t ci;
                int available;
        } eqcr;
+       uint8_t stash_off;
 /* -------------------------- */
@@ -118,7 +119,9 @@ struct qbman_swp {
 void *qbman_swp_mc_start(struct qbman_swp *p);
 void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb);
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t 
 void *qbman_swp_mc_result(struct qbman_swp *p);
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p);
 /* Wraps up submit + poll-for-result */
 static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
@@ -135,6 +138,20 @@ static inline void *qbman_swp_mc_complete(struct qbman_swp 
*swp, void *cmd,
        return cmd;
+static inline void *qbman_swp_mc_complete_cinh(struct qbman_swp *swp, void 
+                                         uint8_t cmd_verb)
+       int loopvar = 1000;
+       qbman_swp_mc_submit_cinh(swp, cmd, cmd_verb);
+       do {
+               cmd = qbman_swp_mc_result_cinh(swp);
+       } while (!cmd && loopvar--);
+       QBMAN_BUG_ON(!loopvar);
+       return cmd;
 /* ---------------------- */
 /* Descriptors/cachelines */
 /* ---------------------- */
diff --git a/drivers/bus/fslmc/qbman/qbman_sys.h 
index 55449edf3..61f817c47 100644
--- a/drivers/bus/fslmc/qbman/qbman_sys.h
+++ b/drivers/bus/fslmc/qbman/qbman_sys.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2019 NXP
+ * Copyright 2019-2020 NXP
 /* qbman_sys_decl.h and qbman_sys.h are the two platform-specific files in the
  * driver. They are only included via qbman_private.h, which is itself a
@@ -190,6 +190,34 @@ static inline void qbman_cinh_write(struct qbman_swp_sys 
*s, uint32_t offset,
+static inline void *qbman_cinh_write_start_wo_shadow(struct qbman_swp_sys *s,
+                                                    uint32_t offset)
+       pr_info("qbman_cinh_write_start(%p:%d:0x%03x)\n",
+               s->addr_cinh, s->idx, offset);
+       QBMAN_BUG_ON(offset & 63);
+       return (s->addr_cinh + offset);
+static inline void qbman_cinh_write_complete(struct qbman_swp_sys *s,
+                                            uint32_t offset, void *cmd)
+       const uint32_t *shadow = cmd;
+       int loop;
+       pr_info("qbman_cinh_write_complete(%p:%d:0x%03x) %p\n",
+               s->addr_cinh, s->idx, offset, shadow);
+       hexdump(cmd, 64);
+       for (loop = 15; loop >= 1; loop--)
+               __raw_writel(shadow[loop], s->addr_cinh +
+                                        offset + loop * 4);
+       lwsync();
+       __raw_writel(shadow[0], s->addr_cinh + offset);
 static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t 
        uint32_t reg = __raw_readl(s->addr_cinh + offset);
@@ -200,6 +228,35 @@ static inline uint32_t qbman_cinh_read(struct 
qbman_swp_sys *s, uint32_t offset)
        return reg;
+static inline void *qbman_cinh_read_shadow(struct qbman_swp_sys *s,
+                                          uint32_t offset)
+       uint32_t *shadow = (uint32_t *)(s->cena + offset);
+       unsigned int loop;
+       pr_info(" %s (%p:%d:0x%03x) %p\n", __func__,
+               s->addr_cinh, s->idx, offset, shadow);
+       for (loop = 0; loop < 16; loop++)
+               shadow[loop] = __raw_readl(s->addr_cinh + offset
+                                       + loop * 4);
+       hexdump(shadow, 64);
+       return shadow;
+static inline void *qbman_cinh_read_wo_shadow(struct qbman_swp_sys *s,
+                                             uint32_t offset)
+       pr_info("qbman_cinh_read(%p:%d:0x%03x)\n",
+               s->addr_cinh, s->idx, offset);
+       return s->addr_cinh + offset;
 static inline void *qbman_cena_write_start(struct qbman_swp_sys *s,
                                           uint32_t offset)
@@ -476,6 +533,82 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys 
        return 0;
+static inline int qbman_swp_sys_update(struct qbman_swp_sys *s,
+                                    const struct qbman_swp_desc *d,
+                                    uint8_t dqrr_size,
+                                    int stash_off)
+       uint32_t reg;
+       int i;
+       int cena_region_size = 4*1024;
+       uint8_t est = 1;
+#ifdef RTE_ARCH_64
+       uint8_t wn = CENA_WRITE_ENABLE;
+       uint8_t wn = CINH_WRITE_ENABLE;
+       if (stash_off)
+               wn = CINH_WRITE_ENABLE;
+       QBMAN_BUG_ON(d->idx < 0);
+       /* We should never be asked to initialise for a portal that isn't in
+        * the power-on state. (Ie. don't forget to reset portals when they are
+        * decommissioned!)
+        */
+       reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+       QBMAN_BUG_ON(reg);
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access))
+               memset(s->addr_cena, 0, cena_region_size);
+       else {
+               /* Invalidate the portal memory.
+                * This ensures no stale cache lines
+                */
+               for (i = 0; i < cena_region_size; i += 64)
+                       dccivac(s->addr_cena + i);
+       }
+       if (dpaa2_svr_family == SVR_LS1080A)
+               est = 0;
+       if (s->eqcr_mode == qman_eqcr_vb_array) {
+               reg = qbman_set_swp_cfg(dqrr_size, wn,
+                                       0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
+       } else {
+               if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 &&
+                           (d->cena_access_mode == qman_cena_fastest_access))
+                       reg = qbman_set_swp_cfg(dqrr_size, wn,
+                                               1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
+               else
+                       reg = qbman_set_swp_cfg(dqrr_size, wn,
+                                               est, 3, 2, 2, 1, 1, 1, 1, 1, 1);
+       }
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access))
+               reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
+                      1 << SWP_CFG_VPM_SHIFT |  /* VDQCR read triggered mode */
+                      1 << SWP_CFG_CPM_SHIFT;   /* CR read triggered mode */
+       qbman_cinh_write(s, QBMAN_CINH_SWP_CFG, reg);
+       reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+       if (!reg) {
+               pr_err("The portal %d is not enabled!\n", s->idx);
+               return -1;
+       }
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access)) {
+               qbman_cinh_write(s, QBMAN_CINH_SWP_EQCR_PI, QMAN_RT_MODE);
+               qbman_cinh_write(s, QBMAN_CINH_SWP_RCR_PI, QMAN_RT_MODE);
+       }
+       return 0;
 static inline void qbman_swp_sys_finish(struct qbman_swp_sys *s)

Reply via email to