-----Original Message-----
> Date: Fri,  8 Jun 2018 22:15:13 +0530
> From: Anoob Joseph <anoob.jos...@caviumnetworks.com>
> To: Akhil Goyal <akhil.go...@nxp.com>, Pablo de Lara
>  <pablo.de.lara.gua...@intel.com>, Thomas Monjalon <tho...@monjalon.net>
> Cc: Ankur Dwivedi <ankur.dwiv...@cavium.com>, Jerin Jacob
>  <jerin.ja...@caviumnetworks.com>, Murthy NSSR
>  <nidadavolu.mur...@cavium.com>, Narayana Prasad
>  <narayanaprasad.athr...@caviumnetworks.com>, Nithin Dabilpuram
>  <nithin.dabilpu...@cavium.com>, Ragothaman Jayaraman
>  <ragothaman.jayara...@cavium.com>, Srisivasubramanian Srinivasan
>  <srisivasubramanian.sriniva...@cavium.com>, dev@dpdk.org
> Subject: [PATCH 04/16] crypto/cpt/base: add hardware enq/deq API for CPT
> X-Mailer: git-send-email 2.7.4
> 
> From: Ankur Dwivedi <ankur.dwiv...@cavium.com>
> 
> Adds hardware enqueue/dequeue API of instructions to a queue pair
> for Cavium CPT device.
> 
> Signed-off-by: Ankur Dwivedi <ankur.dwiv...@cavium.com>
> Signed-off-by: Murthy NSSR <nidadavolu.mur...@cavium.com>
> Signed-off-by: Nithin Dabilpuram <nithin.dabilpu...@cavium.com>
> Signed-off-by: Ragothaman Jayaraman <ragothaman.jayara...@cavium.com>
> Signed-off-by: Srisivasubramanian Srinivasan 
> <srisivasubramanian.sriniva...@cavium.com>
> ---
>  drivers/crypto/cpt/base/cpt.h             | 102 +++++++
>  drivers/crypto/cpt/base/cpt_device.c      |   4 +-
>  drivers/crypto/cpt/base/cpt_request_mgr.c | 424 
> ++++++++++++++++++++++++++++++
>  drivers/crypto/cpt/base/cpt_request_mgr.h |  75 ++++++
>  4 files changed, 603 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/crypto/cpt/base/cpt.h
>  create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.c
>  create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.h
> 
> diff --git a/drivers/crypto/cpt/base/cpt.h b/drivers/crypto/cpt/base/cpt.h
> new file mode 100644
> index 0000000..11407ae
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Cavium, Inc
> + */
> +
> +#ifndef __BASE_CPT_H__
> +#define __BASE_CPT_H__
> +
> +/* Linux Includes */
> +#include <endian.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <errno.h>
> +#include <sys/cdefs.h>
> +#include <unistd.h>
> +#include <assert.h>

alphabetical order

> +
> +/* DPDK includes */
> +#include <rte_byteorder.h>
> +#include <rte_common.h>
> +#include <rte_errno.h>
> +#include <rte_memory.h>
> +#include <rte_prefetch.h>
> +
> +#include "../cpt_pmd_logs.h"
> +#include "mcode_defines.h"
> +
> +/** @cond __INTERNAL_DOCUMENTATION__ */
> +
> +/* Declarations */
> +typedef struct cpt_instance cpt_instance_t;
> +
> +/*
> + * Generic Defines
> + */
> +
> +/* Buffer pointer */
> +typedef struct buf_ptr {
> +     void *vaddr;
> +     phys_addr_t dma_addr;
> +     uint32_t size;
> +     uint32_t resv;
> +} buf_ptr_t;
> +
> +/* IOV Pointer */
> +typedef struct{
> +     int buf_cnt;
> +     buf_ptr_t bufs[0];
> +} iov_ptr_t;
> +
> +typedef struct app_data {
> +     uint64_t pktout;
> +     void *marker;
> +} app_data_t;
> +
> +/* Instance operations */
> +
> +/* Enqueue an SE/AE request */
> +int cpt_enqueue_req(cpt_instance_t *inst, void *req, uint8_t flags,
> +           void *event, uint64_t event_flags);
> +
> +/* Dequeue completed SE requests as burst */
> +int32_t cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt,
> +                       void *resp[], uint8_t cc[]);
> +
> +/* Marks event as done in event driven mode */
> +int32_t cpt_event_mark_done(void *marker, uint8_t *op_error);
> +
> +/* Checks queue full condition */
> +uint16_t cpt_queue_full(cpt_instance_t *instance);
> +
> +/* Misc */
> +uint32_t cpt_get_instance_count(void);
> +
> +#define ENQ_FLAG_SYNC                0x01
> +#define ENQ_FLAG_EVENT               0x02
> +#define ENQ_FLAG_NODOORBELL  0x04
> +#define ENQ_FLAG_ONLY_DOORBELL       0x08
> +
> +
> +#define OCTTX_EVENT_TAG(__flags) (__flags & 0xffffffff)
> +#define OCTTX_EVENT_GRP(__flags) ((__flags >> 32) & 0xffff)
> +#define OCTTX_EVENT_TT(__flags) ((__flags >> 48) & 0xff)
> +
> +#define OCTTX_EVENT_FLAGS(__tag, __grp, __tt)    \
> +     (((uint64_t)__tag & 0xffffffff) |        \
> +      (((uint64_t)__grp & 0xffff) << 32) |    \
> +      (((uint64_t)__tt & 0xff) << 48))
> +
> +
> +/* cpt instance */
> +struct cpt_instance {
> +     /* 0th cache line */
> +     uint32_t queue_id;
> +     uint64_t rsvd;
> +};
> +

#ifndef __hot
> +#define __hot __attribute__((hot))
#endif

> +/** @endcond */
> +
> +#endif /* __BASE_CPT_H__ */
> diff --git a/drivers/crypto/cpt/base/cpt_device.c 
> b/drivers/crypto/cpt/base/cpt_device.c
> index b7cd5b5..a50e5b8 100644
> --- a/drivers/crypto/cpt/base/cpt_device.c
> +++ b/drivers/crypto/cpt/base/cpt_device.c
> @@ -193,7 +193,7 @@ int cptvf_get_resource(struct cpt_vf *dev,
>       uint64_t *next_ptr;
>       uint64_t pg_sz = sysconf(_SC_PAGESIZE);
>  
> -     PMD_DRV_LOG(DEBUG, "Initializing csp resource %s\n", cptvf->dev_name);
> +     PMD_DRV_LOG(DEBUG, "Initializing cpt resource %s\n", cptvf->dev_name);
>  
>       cpt_instance = &cptvf->instance;
>  
> @@ -323,7 +323,7 @@ int cptvf_put_resource(cpt_instance_t *instance)
>               return -EINVAL;
>       }
>  
> -     PMD_DRV_LOG(DEBUG, "Releasing csp device %s\n", cptvf->dev_name);
> +     PMD_DRV_LOG(DEBUG, "Releasing cpt device %s\n", cptvf->dev_name);
>  
>       rz = (struct rte_memzone *)instance->rsvd;
>       rte_memzone_free(rz);
> diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.c 
> b/drivers/crypto/cpt/base/cpt_request_mgr.c
> new file mode 100644
> index 0000000..8b9b1ff
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt_request_mgr.c
> @@ -0,0 +1,424 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Cavium, Inc
> + */
> +
> +#include "cpt_request_mgr.h"
> +#include "cpt_debug.h"
> +#include <rte_atomic.h>
> +
> +#define MOD_INC(i, l)   ((i) == (l - 1) ? (i) = 0 : (i)++)
> +
> +#define __hot __attribute__((hot))

same as above

> +
> +static inline uint64_t cpu_cycles(void)
> +{
> +     return rte_get_timer_cycles();
> +}
> +
> +static inline uint64_t cpu_cycles_freq(void)
> +{
> +     return rte_get_timer_hz();
> +}
> +
> +static inline void *
> +get_cpt_inst(struct command_queue *cqueue, void *req)
> +{
> +     (void)req;
> +     PMD_TX_LOG(DEBUG, "CPT queue idx %u, req %p\n", cqueue->idx, req);
> +     return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE];
> +}
> +
> +static inline void
> +mark_cpt_inst(struct cpt_vf *cptvf,
> +           struct command_queue *queue,
> +           uint32_t ring_door_bell)
> +{
> +#ifdef CMD_DEBUG
> +     /* DEBUG */
> +     {
> +             uint32_t i = queue->idx * CPT_INST_SIZE;
> +             cpt_inst_s_t *cmd = (void *)&queue->qhead[i];
> +             uint64_t *p = (void *)&queue->qhead[i];
> +
> +             PRINT("\nQUEUE parameters:");
> +             PRINT("Queue index           = %u\n",
> +                   queue->idx);
> +             PRINT("Queue HEAD            = %p\n",
> +                   queue->qhead);
> +             PRINT("Command Entry         = %p\n",
> +                   cmd);
> +
> +             PRINT("\nCPT_INST_S format:");
> +             PRINT("cmd->s.doneint = %x\n", cmd->s.doneint);
> +             PRINT("cmd->s.res_addr  = %lx\n", cmd->s.res_addr);
> +             PRINT("cmd->s.grp       = %x\n", cmd->s.grp);
> +             PRINT("cmd->s.tag       = %x\n", cmd->s.tag);
> +             PRINT("cmd->s.tt        = %x\n", cmd->s.tt);
> +             PRINT("cmd->s.wq_ptr    = %lx\n", cmd->s.wq_ptr);
> +             PRINT("cmd->s.ei0       = %lx\n", cmd->s.ei0);
> +             PRINT("cmd->s.ei1       = %lx\n", cmd->s.ei1);
> +             PRINT("cmd->s.ei2       = %lx\n", cmd->s.ei2);
> +             PRINT("cmd->s.ei3       = %lx\n", cmd->s.ei3);
> +
> +             PRINT("\nCommand dump from queue HEAD:");
> +             for (i = 0; i < CPT_INST_SIZE / 8; i++)
> +                     PRINT("%lx\n", p[i]);
> +     }
> +#endif
> +     if (unlikely(++queue->idx >= DEFAULT_CMD_QCHUNK_SIZE)) {
> +             uint32_t cchunk = queue->cchunk;
> +             MOD_INC(cchunk, DEFAULT_CMD_QCHUNKS);
> +             queue->qhead = queue->chead[cchunk].head;
> +             queue->idx = 0;
> +             queue->cchunk = cchunk;
> +     }
> +
> +     if (ring_door_bell) {
> +             /* Memory barrier to flush pending writes */
> +             rte_smp_wmb();
> +             cptvf_write_vq_doorbell(cptvf, ring_door_bell);
> +     }
> +}
> +
> +static inline uint8_t
> +check_nb_command_id(cpt_request_info_t *user_req, struct cpt_vf *cptvf)
> +{
> +     uint8_t ret = ERR_REQ_PENDING;
> +     volatile cpt_res_s_t *cptres;
> +
> +     cptres = (volatile cpt_res_s_t *)user_req->completion_addr;
> +
> +     if (unlikely(cptres->s.compcode == CPT_COMP_E_NOTDONE)) {
> +             /*
> +              * Wait for some time for this command to get completed
> +              * before timing out
> +              */
> +             if (cpu_cycles() < user_req->time_out)
> +                     return ret;
> +             /*
> +              * TODO: See if alternate caddr can be used to not loop
> +              * longer than needed.
> +              */
> +             if ((cptres->s.compcode == CPT_COMP_E_NOTDONE) &&
> +                 (user_req->extra_time < TIME_IN_RESET_COUNT)) {
> +                     user_req->extra_time++;
> +                     return ret;
> +             }
> +
> +             if (cptres->s.compcode != CPT_COMP_E_NOTDONE)
> +                     goto complete;
> +
> +             ret = ERR_REQ_TIMEOUT;
> +             PMD_DRV_LOG_RAW(ERR, "Request %p timedout\n", user_req);
> +             cptvf_poll_misc(cptvf);
> +             dump_cpt_request_sglist(&user_req->dbg_inst,
> +                                     "Response Packet Gather in", 1, 1);
> +             goto exit;
> +     }
> +
> +complete:
> +     if (likely(cptres->s.compcode == CPT_COMP_E_GOOD)) {
> +             ret = 0; /* success */
> +             PMD_RX_LOG(DEBUG, "MC status %.8x\n",
> +                        *((volatile uint32_t *)user_req->alternate_caddr));
> +             PMD_RX_LOG(DEBUG, "HW status %.8x\n",
> +                        *((volatile uint32_t *)user_req->completion_addr));
> +     } else if ((cptres->s.compcode == CPT_COMP_E_SWERR) ||
> +                (cptres->s.compcode == CPT_COMP_E_FAULT)) {
> +             ret = (uint8_t)*user_req->alternate_caddr;
> +             if (!ret)
> +                     ret = ERR_BAD_ALT_CCODE;
> +             PMD_RX_LOG(DEBUG, "Request %p : failed with %s : err code :"
> +                        "%x\n", user_req,
> +                        (cptres->s.compcode == CPT_COMP_E_FAULT) ?
> +                        "DMA Fault" : "Software error", ret);
> +     } else {
> +             PMD_DRV_LOG_RAW(ERR, "Request %p : unexpected completion code"
> +                        " %d\n",
> +                        user_req, cptres->s.compcode);
> +             ret = (uint8_t)*user_req->alternate_caddr;
> +     }
> +
> +exit:
> +     dump_cpt_request_sglist(&user_req->dbg_inst,
> +                             "Response Packet Scatter Out", 1, 0);
> +     return ret;
> +}
> +
> +
> +/*
> + * cpt_enqueue_req()
> + *
> + * SE & AE request enqueue function
> + */
> +int32_t __hot
> +cpt_enqueue_req(cpt_instance_t *instance, void *req, uint8_t flags,
> +             void *event, uint64_t event_flags)
> +{
> +     struct pending_queue *pqueue;
> +     struct cpt_vf *cptvf;
> +     cpt_inst_s_t *cpt_ist_p = NULL;
> +     cpt_request_info_t *user_req = (cpt_request_info_t *)req;
> +     struct command_queue *cqueue;
> +     int32_t ret = 0;
> +
> +#ifdef CPTVF_STRICT_PARAM_CHECK
> +     if (unlikely(!instance)) {
> +             PMD_DRV_LOG_RAW(ERR, "Invalid inputs (instance: %p, req: %p)\n",
> +                        instance, req);
> +             return -EINVAL;
> +     }
> +#endif
> +
> +     cptvf = (struct cpt_vf *)instance;
> +     pqueue = &cptvf->pqueue;
> +
> +     if (unlikely(!req)) {
> +             /* ring only pending doorbells */
> +             if ((flags & ENQ_FLAG_ONLY_DOORBELL) && pqueue->p_doorbell) {
> +                     /* Memory barrier to flush pending writes */
> +                     rte_smp_wmb();

Cross check it is rte_wmb() indented here as it barrier between device
and CPU

> +                     cptvf_write_vq_doorbell(cptvf, pqueue->p_doorbell);
> +                     pqueue->p_doorbell = 0;
> +             }
> +             return 0;
> +     }
> +
> +#if defined(ATOMIC_THROTTLING_COUNTER)

if this config useful for end user then expose it in config file and
explain the details in documentation.

> +     /* Ask the application to try again later */
> +     if (unlikely(cpt_pmd_pcount_load(&pqueue->pending_count) >=
> +                  DEFAULT_CMD_QLEN)) {
> +             return -EAGAIN;
> +     }
> +#else
> +     if (unlikely(pqueue->pending_count >= DEFAULT_CMD_QLEN))
> +             return -EAGAIN;
> +#endif
> +     cqueue = &cptvf->cqueue;
> +     cpt_ist_p = get_cpt_inst(cqueue, req);
> +     rte_prefetch_non_temporal(cpt_ist_p);
> +
> +     /* EI0, EI1, EI2, EI3 are already prepared */
> +     /* HW W0 */
> +     cpt_ist_p->u[0] = 0;
> +     /* HW W1 */
> +     cpt_ist_p->s.res_addr = user_req->comp_baddr;
> +     /* HW W2 */
> +     cpt_ist_p->u[2] = 0;
> +     /* HW W3 */
> +     cpt_ist_p->s.wq_ptr = 0;
> +
> +     /* MC EI0 */
> +     cpt_ist_p->s.ei0 = user_req->ist.ei0;
> +     /* MC EI1 */
> +     cpt_ist_p->s.ei1 = user_req->ist.ei1;
> +     /* MC EI2 */
> +     cpt_ist_p->s.ei2 = user_req->ist.ei2;
> +     /* MC EI3 */
> +     cpt_ist_p->s.ei3 = user_req->ist.ei3;
> +
> +     PMD_TX_LOG(DEBUG, "req: %p op: %p dma_mode 0x%x se_req %u\n",
> +                req,
> +                user_req->op,
> +                user_req->dma_mode,
> +                user_req->se_req);
> +
> +#ifdef CPT_DEBUG
> +     {
> +             vq_cmd_word0_t vq_cmd_w0;
> +             vq_cmd_word3_t vq_cmd_w3;
> +
> +             vq_cmd_w3.u64 = cpt_ist_p->s.ei3;
> +             vq_cmd_w0.u64 = be64toh(cpt_ist_p->s.ei0);
> +             user_req->dbg_inst = *cpt_ist_p;
> +
> +             if (vq_cmd_w3.s.cptr) {
> +                     PMD_TX_LOG(DEBUG, "Context Handle: 0x%016lx\n",
> +                                (uint64_t)vq_cmd_w3.s.cptr);
> +                     /* Dump max context i.e 448 bytes */
> +                     cpt_dump_buffer("CONTEXT",
> +                                     os_iova2va((uint64_t)vq_cmd_w3.s.cptr),
> +                                     448);
> +             }
> +
> +             dump_cpt_request_info(user_req, cpt_ist_p);
> +             dump_cpt_request_sglist(cpt_ist_p, "Request (src)", 1, 1);
> +             dump_cpt_request_sglist(cpt_ist_p, "Request (dst)", 0, 0);
> +             cpt_dump_buffer("VQ command word0", &cpt_ist_p->u[4],
> +                             sizeof(vq_cmd_w0));
> +             cpt_dump_buffer("VQ command word1", &cpt_ist_p->u[5],
> +                             sizeof(uint64_t));
> +             cpt_dump_buffer("VQ command word2", &cpt_ist_p->u[6],
> +                             sizeof(uint64_t));
> +             cpt_dump_buffer("VQ command word3", &cpt_ist_p->u[7],
> +                             sizeof(vq_cmd_w3));
> +     }
> +#endif
> +
> +     if (likely(!(flags & ENQ_FLAG_SYNC))) {
> +             void *op = user_req->op;
> +
> +             if (unlikely(flags & ENQ_FLAG_EVENT)) {
> +                     app_data_t *app_data = op;
> +
> +                     /* Event based completion */
> +                     cpt_ist_p->s.tag = OCTTX_EVENT_TAG(event_flags);
> +                     cpt_ist_p->s.grp = OCTTX_EVENT_GRP(event_flags);
> +                     cpt_ist_p->s.tt = OCTTX_EVENT_TT(event_flags);
> +                     cpt_ist_p->s.wq_ptr = (uint64_t)event;
> +
> +#if defined(ATOMIC_THROTTLING_COUNTER)
> +                     app_data->marker = user_req;
> +                     __atomic_fetch_add(&pqueue->pending_count,
> +                                        1, __ATOMIC_RELAXED);
> +#else
> +                     rid_t *rid_e;
> +                     /*
> +                      * Mark it as in progress in pending queue, software
> +                      * will mark it when completion is received
> +                      */
> +                     rid_e = &pqueue->rid_queue[pqueue->enq_tail];
> +                     rid_e->rid = (uint64_t)user_req;
> +                     /* rid_e->op = op; */
> +                     MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
> +                     app_data->marker = rid_e;
> +#endif
> +
> +                     cpt_dump_buffer("CPT Instruction with wqe", cpt_ist_p,
> +                                     sizeof(*cpt_ist_p));
> +
> +                     mark_cpt_inst(cptvf, cqueue, 1);
> +
> +             } else {
> +                     uint32_t doorbell = 0;
> +
> +                     if (likely(flags & ENQ_FLAG_NODOORBELL))
> +                             pqueue->p_doorbell++;
> +                     else
> +                             doorbell = ++pqueue->p_doorbell;
> +
> +                     /* Fill time_out cycles */
> +                     user_req->time_out = cpu_cycles() +
> +                             DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
> +                     user_req->extra_time = 0;
> +
> +                     cpt_dump_buffer("CPT Instruction", cpt_ist_p,
> +                                     sizeof(*cpt_ist_p));
> +
> +                     /* Default mode of software queue */
> +                     mark_cpt_inst(cptvf, cqueue, doorbell);
> +
> +                     pqueue->p_doorbell -= doorbell;
> +                     pqueue->rid_queue[pqueue->enq_tail].rid =
> +                             (uint64_t)user_req;
> +                     /* pqueue->rid_queue[pqueue->enq_tail].op = op; */
> +                     /* We will use soft queue length here to limit
> +                      * requests
> +                      */
> +                     MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
> +                     pqueue->pending_count += 1;
> +             }
> +
> +             PMD_TX_LOG(DEBUG, "Submitted NB cmd with request: %p op: %p\n",
> +                        user_req, op);
> +     } else {
> +             /*
> +              * Synchronous operation,
> +              * hold until completion / timeout
> +              */
> +             /* Fill time_out cycles */
> +             user_req->time_out = cpu_cycles() +
> +                     DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
> +             user_req->extra_time = 0;
> +
> +             cpt_dump_buffer("CPT Instruction", cpt_ist_p,
> +                             sizeof(*cpt_ist_p));
> +
> +             /* Default mode of software queue */
> +             mark_cpt_inst(cptvf, cqueue, 1);
> +
> +             do {
> +                     /* TODO: should we pause */
> +                     ret = check_nb_command_id(user_req, cptvf);
> +                     cptvf_poll_misc(cptvf);
> +#if 0

???

> +                     PMD_TX_LOG(DEBUG, "Doorbell count for cptvf %s: %u\n",
> +                                cptvf->dev_name,
> +                                cptvf_read_vq_doorbell(cptvf));
> +#endif
> +             } while (ret == ERR_REQ_PENDING);
> +

Reply via email to