-----Original Message-----
From: Chautru, Nicolas <nicolas.chau...@intel.com>
Sent: Thursday, March 10, 2022 3:49 PM
To: dev@dpdk.org; gak...@marvell.com; t...@redhat.com
Cc: tho...@monjalon.net; hemant.agra...@nxp.com; Zhang, Mingshan
<mingshan.zh...@intel.com>; david.march...@redhat.com; Chautru,
Nicolas <nicolas.chau...@intel.com>
Subject: [PATCH v1] bbdev: add new operation for FFT processing
Extension of bbdev operation to support FFT based operations.
Signed-off-by: Nicolas Chautru <nicolas.chau...@intel.com>
---
doc/guides/prog_guide/bbdev.rst | 130
+++++++++++++++++++++++++++++++++++
lib/bbdev/rte_bbdev.c | 8 +++
lib/bbdev/rte_bbdev.h | 76 ++++++++++++++++++++
lib/bbdev/rte_bbdev_op.h | 149
++++++++++++++++++++++++++++++++++++++++
lib/bbdev/version.map | 10 +++
5 files changed, 373 insertions(+)
diff --git a/doc/guides/prog_guide/bbdev.rst
b/doc/guides/prog_guide/bbdev.rst index 70fa01a..2791286 100644
--- a/doc/guides/prog_guide/bbdev.rst
+++ b/doc/guides/prog_guide/bbdev.rst
@@ -1118,6 +1118,136 @@ Figure :numref:`figure_turbo_tb_decode` above
showing the Turbo decoding of CBs using BBDEV interface in TB-mode is also
valid for LDPC decode.
+BBDEV FFT Operation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This operation allows to run a combination of DFT and/or IDFT and/or time-
domain windowing.
+These can be used in a modular fashion (using bypass modes) or as a
+processing pipeline which can be used for FFT-based baseband signal
processing.
+In more details it allows :
+- to process the data first through an IDFT of adjustable size and
+padding;
+- to perform the windowing as a programmable cyclic shift offset of the
+data followed by a pointwise multiplication by a time domain window;
+- to process the related data through a DFT of adjustable size and
+depadding for each such cyclic shift output.
+
+A flexible number of Rx antennas are being processed in parallel with the
same configuration.
+The API allows more generally for flexibility in what the PMD may
+support (cabability flags) and flexibility to adjust some of the parameters of
the processing.
+
+The operation/capability flags that can be set for each FFT operation are
given below.
+
+ **NOTE:** The actual operation flags that may be used with a specific
+ BBDEV PMD are dependent on the driver capabilities as reported via
+ ``rte_bbdev_info_get()``, and may be a subset of those below.
+
++--------------------------------------------------------------------+
+|Description of FFT capability flags |
++==============================================================
======+
+|RTE_BBDEV_FFT_WINDOWING |
+| Set to enable/support windowing in time domain |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_CS_ADJUSTMENT |
+| Set to enable/support the cyclic shift time offset adjustment |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_DFT_BYPASS |
+| Set to bypass the DFT and use directly the IDFT as an option |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_IDFT_BYPASS |
+| Set to bypass the IDFT and use directly the DFT as an option |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_WINDOWING_BYPASS |
+| Set to bypass the time domain windowing as an option |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_POWER_MEAS |
+| Set to provide an optional power measument of the DFT output |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_FP16_INPUT |
+| Set if the input data shall use FP16 format instead of INT16 |
++--------------------------------------------------------------------+
+|RTE_BBDEV_FFT_FP16_OUTPUT |
+| Set if the output data shall use FP16 format instead of INT16 |
++--------------------------------------------------------------------+
+
+The structure passed for each FFT operation is given below, with the
+operation flags forming a bitmask in the ``op_flags`` field.
+
+.. code-block:: c
+
+ struct rte_bbdev_op_fft {
+ struct rte_bbdev_op_data base_input;
+ struct rte_bbdev_op_data base_output;
+ struct rte_bbdev_op_data power_meas_output;
+ uint32_t op_flags;
+ uint16_t input_sequence_size;
+ uint16_t input_leading_padding;
+ uint16_t output_sequence_size;
+ uint16_t output_leading_depadding;
+ uint8_t window_index[RTE_BBDEV_MAX_CS_2];
+ uint16_t cs_bitmap;
+ uint8_t num_antennas_log2;
+ uint8_t idft_log2;
+ uint8_t dft_log2;
+ int8_t cs_time_adjustment;
+ int8_t idft_shift;
+ int8_t dft_shift;
+ uint16_t ncs_reciprocal;
+ uint16_t power_shift;
+ uint16_t fp16_exp_adjust;
+ };
+
+The FFT parameters are set out in the table below.
+
++----------------------+--------------------------------------------------------------+
+|Parameter |Description
|
++======================+=======================================
========
++===============+
+|base_input |input data
|
++----------------------+--------------------------------------------------------------+
+|base_output |output data
|
++----------------------+--------------------------------------------------------------+
+|power_meas_output |optional output data with power measurement
on DFT output | |
++----------------------+--------------------------------------------------------------+
+|op_flags |bitmask of all active operation capabilities
|
++----------------------+--------------------------------------------------------------+
+|input_sequence_size |size of the input sequence in 32-bits points per
antenna |
++----------------------+--------------------------------------------------------------+
+|input_leading_padding |number of points padded at the start of input
data |
++----------------------+--------------------------------------------------------------+
+|output_sequence_size |size of the output sequence per antenna and
cyclic shift |
++----------------------+--------------------------------------------------------------+
+|output_depadding |number of points depadded at the start of output
data |
++----------------------+--------------------------------------------------------------+
+|window_index |optional windowing profile index used for each cyclic
shift |
++----------------------+--------------------------------------------------------------+
+|cs_bitmap |bitmap of the cyclic shift output requested (LSB for
index 0) |
++----------------------+--------------------------------------------------------------+
+|num_antennas_log2 |number of antennas as a log2 (10 maps to 1024...)
|
++----------------------+--------------------------------------------------------------+
+|idft_log2 |iDFT size as a log2
|
++----------------------+--------------------------------------------------------------+
+|dft_log2 |DFT size as a log2
|
++----------------------+--------------------------------------------------------------+
+|cs_time_adjustment |adjustment of time position of all the cyclic shift
output |
++----------------------+--------------------------------------------------------------+
+|idft_shift |shift down of signal level post iDFT
|
++----------------------+--------------------------------------------------------------+
+|dft_shift |shift down of signal level post DFT
|
++----------------------+--------------------------------------------------------------+
+|ncs_reciprocal |inverse of max number of CS normalized to 15b (ie.
231 for 12)|
++----------------------+--------------------------------------------------------------+
+|power_shift |shift down of level of power measurement when
enabled |
++----------------------+--------------------------------------------------------------+
+|fp16_exp_adjust |value added to FP16 exponent at conversion from
INT16 |
++----------------------+--------------------------------------------------------------+
+
+The mbuf input ``base_input`` is mandatory for all BBDEV PMDs and is
+the incoming data for the processing. Its size may not fit into an
+actual mbuf, but the stucture is used to pass iova address.
+The mbuf output ``output`` is mandatory and is output of the FFT processing
chain.
+Each point is a complex number of 32bits : either as 2 INT16 or as 2
+FP16 based when the option supported.
+The data layout is based on contiguous concatenation of output data
+first by cyclic shift then by antenna.
Sample code
-----------
diff --git a/lib/bbdev/rte_bbdev.c b/lib/bbdev/rte_bbdev.c index
aaee7b7..a72ecba 100644
--- a/lib/bbdev/rte_bbdev.c
+++ b/lib/bbdev/rte_bbdev.c
@@ -850,6 +850,9 @@ struct rte_bbdev *
case RTE_BBDEV_OP_LDPC_ENC:
result = sizeof(struct rte_bbdev_enc_op);
break;
+ case RTE_BBDEV_OP_FFT:
+ result = sizeof(struct rte_bbdev_fft_op);
+ break;
default:
break;
}
@@ -873,6 +876,10 @@ struct rte_bbdev *
struct rte_bbdev_enc_op *op = element;
memset(op, 0, mempool->elt_size);
op->mempool = mempool;
+ } else if (type == RTE_BBDEV_OP_FFT) {
+ struct rte_bbdev_fft_op *op = element;
+ memset(op, 0, mempool->elt_size);
+ op->mempool = mempool;
}
}
@@ -1123,6 +1130,7 @@ struct rte_mempool *
"RTE_BBDEV_OP_TURBO_ENC",
"RTE_BBDEV_OP_LDPC_DEC",
"RTE_BBDEV_OP_LDPC_ENC",
+ "RTE_BBDEV_OP_FFT",
};
if (op_type < RTE_BBDEV_OP_TYPE_COUNT) diff --git
a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h index b88c881..e9ca673
100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -380,6 +380,12 @@ typedef uint16_t
(*rte_bbdev_enqueue_dec_ops_t)(
struct rte_bbdev_dec_op **ops,
uint16_t num);
+/** @internal Enqueue fft operations for processing on queue of a
+device. */ typedef uint16_t (*rte_bbdev_enqueue_fft_ops_t)(
+ struct rte_bbdev_queue_data *q_data,
+ struct rte_bbdev_fft_op **ops,
+ uint16_t num);
+
/** @internal Dequeue encode operations from a queue of a device. */
typedef uint16_t (*rte_bbdev_dequeue_enc_ops_t)(
struct rte_bbdev_queue_data *q_data,
@@ -390,6 +396,11 @@ typedef uint16_t
(*rte_bbdev_dequeue_dec_ops_t)(
struct rte_bbdev_queue_data *q_data,
struct rte_bbdev_dec_op **ops, uint16_t num);
+/** @internal Dequeue fft operations from a queue of a device. */
+typedef uint16_t (*rte_bbdev_dequeue_fft_ops_t)(
+ struct rte_bbdev_queue_data *q_data,
+ struct rte_bbdev_fft_op **ops, uint16_t num);
+
#define RTE_BBDEV_NAME_MAX_LEN 64 /**< Max length of device name
*/
/**
@@ -438,6 +449,10 @@ struct __rte_cache_aligned rte_bbdev {
rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops;
/** Dequeue decode function */
rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops;
+ /** Enqueue FFT function */
+ rte_bbdev_enqueue_fft_ops_t enqueue_fft_ops;
+ /** Dequeue FFT function */
+ rte_bbdev_dequeue_fft_ops_t dequeue_fft_ops;
const struct rte_bbdev_ops *dev_ops; /**< Functions exported by
PMD */
struct rte_bbdev_data *data; /**< Pointer to device data */
enum rte_bbdev_state state; /**< If device is currently used or not
*/ @@ -570,6 +585,36 @@ struct __rte_cache_aligned rte_bbdev {
return dev->enqueue_ldpc_dec_ops(q_data, ops, num_ops); }
+/**
+ * Enqueue a burst of fft operations to a queue of the device.
+ * This functions only enqueues as many operations as currently
+possible and
+ * does not block until @p num_ops entries in the queue are available.
+ * This function does not provide any error notification to avoid the
+ * corresponding overhead.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param queue_id
+ * The index of the queue.
+ * @param ops
+ * Pointer array containing operations to be enqueued Must have at least
+ * @p num_ops entries
+ * @param num_ops
+ * The maximum number of operations to enqueue.
+ *
+ * @return
+ * The number of operations actually enqueued (this is the number of
processed
+ * entries in the @p ops array).
+ */
+__rte_experimental
+static inline uint16_t
+rte_bbdev_enqueue_fft_ops(uint16_t dev_id, uint16_t queue_id,
+ struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
+ struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+ struct rte_bbdev_queue_data *q_data = &dev->data-
queues[queue_id];
+ return dev->enqueue_fft_ops(q_data, ops, num_ops); }
/**
* Dequeue a burst of processed encode operations from a queue of the
device.
@@ -695,6 +740,37 @@ struct __rte_cache_aligned rte_bbdev {
return dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops); }
+/**
+ * Dequeue a burst of fft operations from a queue of the device.
+ * This functions returns only the current contents of the queue, and
+does not
+ * block until @ num_ops is available.
+ * This function does not provide any error notification to avoid the
+ * corresponding overhead.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param queue_id
+ * The index of the queue.
+ * @param ops
+ * Pointer array where operations will be dequeued to. Must have at least
+ * @p num_ops entries
+ * @param num_ops
+ * The maximum number of operations to dequeue.
+ *
+ * @return
+ * The number of operations actually dequeued (this is the number of
entries
+ * copied into the @p ops array).
+ */
+__rte_experimental
+static inline uint16_t
+rte_bbdev_dequeue_fft_ops(uint16_t dev_id, uint16_t queue_id,
+ struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
+ struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+ struct rte_bbdev_queue_data *q_data = &dev->data-
queues[queue_id];
+ return dev->dequeue_fft_ops(q_data, ops, num_ops); }
+
/** Definitions of device event types */ enum rte_bbdev_event_type {
RTE_BBDEV_EVENT_UNKNOWN, /**< unknown event type */ diff --
git a/lib/bbdev/rte_bbdev_op.h b/lib/bbdev/rte_bbdev_op.h index
6d56133..57e35f4 100644
--- a/lib/bbdev/rte_bbdev_op.h
+++ b/lib/bbdev/rte_bbdev_op.h
@@ -47,6 +47,8 @@
#define RTE_BBDEV_TURBO_MAX_CODE_BLOCKS (64)
/* LDPC: Maximum number of Code Blocks in Transport Block.*/ #define
RTE_BBDEV_LDPC_MAX_CODE_BLOCKS (256)
+/* 12 CS maximum */
+#define RTE_BBDEV_MAX_CS_2 (6)
/** Flags for turbo decoder operation and capability structure */ enum
rte_bbdev_op_td_flag_bitmasks { @@ -211,6 +213,26 @@ enum
rte_bbdev_op_ldpcenc_flag_bitmasks {
RTE_BBDEV_LDPC_ENC_CONCATENATION = (1ULL << 7) };
+/** Flags for DFT operation and capability structure */ enum
+rte_bbdev_op_fft_flag_bitmasks {
+ /** Flexible windowing capability */
+ RTE_BBDEV_FFT_WINDOWING = (1ULL << 0),
+ /** Flexible adjustment of Cyclic Shift time offset */
+ RTE_BBDEV_FFT_CS_ADJUSTMENT = (1ULL << 1),
+ /** Set for bypass the DFT and get directly into iDFT input */
+ RTE_BBDEV_FFT_DFT_BYPASS = (1ULL << 2),
+ /** Set for bypass the IDFT and get directly the DFT output */
+ RTE_BBDEV_FFT_IDFT_BYPASS = (1ULL << 3),
+ /** Set for bypass time domain windowing */
+ RTE_BBDEV_FFT_WINDOWING_BYPASS = (1ULL << 4),
+ /** Set for optional power measurement on DFT output */
+ RTE_BBDEV_FFT_POWER_MEAS = (1ULL << 5),
+ /** Set if the the input data used FP16 format */
+ RTE_BBDEV_FFT_FP16_INPUT = (1ULL << 6),
+ /** Set if the the output data uses FP16 format */
+ RTE_BBDEV_FFT_FP16_OUTPUT = (1ULL << 7) };
+
/** Flags for the Code Block/Transport block mode */ enum
rte_bbdev_op_cb_mode {
/** One operation is one or fraction of one transport block */ @@ -
689,6 +711,55 @@ struct rte_bbdev_op_ldpc_enc {
};
};
+/** Operation structure for FFT processing.
+ *
+ * The operation processes the data for multiple antennas in a single
+call
+ * (.i.e for all the REs belonging to a given SRS sequence for
+instance)
+ *
+ * The output mbuf data structure is expected to be allocated by the
+ * application with enough room for the output data.
+ */
+struct rte_bbdev_op_fft {
+ /** Input data starting from first antenna */
+ struct rte_bbdev_op_data base_input;
+ /** Output data starting from first antenna and first cyclic shift */
+ struct rte_bbdev_op_data base_output;
+ /** Optional power measurement output data */
+ struct rte_bbdev_op_data power_meas_output;
+ /** Flags from rte_bbdev_op_fft_flag_bitmasks */
+ uint32_t op_flags;
+ /** Input sequence size in 32-bits points */
+ uint16_t input_sequence_size;
+ /** Padding at the start of the sequence */
+ uint16_t input_leading_padding;
+ /** Output sequence size in 32-bits points */
+ uint16_t output_sequence_size;
+ /** Depadding at the start of the DFT output */
+ uint16_t output_leading_depadding;
+ /** Window index being used for each cyclic shift output */
+ uint8_t window_index[RTE_BBDEV_MAX_CS_2];
+ /** Bitmap of the cyclic shift output requested */
+ uint16_t cs_bitmap;
+ /** Number of antennas as a log2 – 8 to 128 */
+ uint8_t num_antennas_log2;
+ /** iDFT size as a log2 - 32 to 2048 */
+ uint8_t idft_log2;
+ /** DFT size as a log2 - 8 to 2048 */
+ uint8_t dft_log2;
+ /** Adjustment of position of the cyclic shifts - -31 to 31 */
+ int8_t cs_time_adjustment;
+ /** iDFT shift down */
+ int8_t idft_shift;
+ /** DFT shift down */
+ int8_t dft_shift;
+ /** NCS reciprocal factor */
+ uint16_t ncs_reciprocal;
+ /** power measurement out shift down */
+ uint16_t power_shift;
+ /** Adjust the FP6 exponent for INT<->FP16 conversion */
+ uint16_t fp16_exp_adjust;
+};
+
/** List of the capabilities for the Turbo Decoder */ struct
rte_bbdev_op_cap_turbo_dec {
/** Flags from rte_bbdev_op_td_flag_bitmasks */ @@ -741,6
+812,16 @@ struct rte_bbdev_op_cap_ldpc_enc {
uint16_t num_buffers_dst;
};
+/** List of the capabilities for the FFT */ struct rte_bbdev_op_cap_fft
+{
+ /** Flags from rte_bbdev_op_ldpcenc_flag_bitmasks */
+ uint32_t capability_flags;
+ /** Num input code block buffers */
+ uint16_t num_buffers_src;
+ /** Num output code block buffers */
+ uint16_t num_buffers_dst;
+};
+
/** Different operation types supported by the device */ enum
rte_bbdev_op_type {
RTE_BBDEV_OP_NONE, /**< Dummy operation that does nothing */
@@ -748,6 +829,7 @@ enum rte_bbdev_op_type {
RTE_BBDEV_OP_TURBO_ENC, /**< Turbo encode */
RTE_BBDEV_OP_LDPC_DEC, /**< LDPC decode */
RTE_BBDEV_OP_LDPC_ENC, /**< LDPC encode */
+ RTE_BBDEV_OP_FFT, /**< FFT */
RTE_BBDEV_OP_TYPE_COUNT, /**< Count of different op types */
};
@@ -791,6 +873,18 @@ struct rte_bbdev_dec_op {
};
};
+/** Structure specifying a single fft operation */ struct
+rte_bbdev_fft_op {
+ /** Status of operation that was performed */
+ int status;
+ /** Mempool which op instance is in */
+ struct rte_mempool *mempool;
+ /** Opaque pointer for user data */
+ void *opaque_data;
+ /** Contains turbo decoder specific parameters */
+ struct rte_bbdev_op_fft fft;
+};
+
/** Operation capabilities supported by a device */ struct
rte_bbdev_op_cap {
enum rte_bbdev_op_type type; /**< Type of operation */ @@ -
799,6 +893,7 @@ struct rte_bbdev_op_cap {
struct rte_bbdev_op_cap_turbo_enc turbo_enc;
struct rte_bbdev_op_cap_ldpc_dec ldpc_dec;
struct rte_bbdev_op_cap_ldpc_enc ldpc_enc;
+ struct rte_bbdev_op_cap_fft fft;
} cap; /**< Operation-type specific capabilities */ };
@@ -918,6 +1013,42 @@ struct rte_mempool * }
/**
+ * Bulk allocate fft operations from a mempool with parameter defaults
reset.
+ *
+ * @param mempool
+ * Operation mempool, created by rte_bbdev_op_pool_create().
+ * @param ops
+ * Output array to place allocated operations
+ * @param num_ops
+ * Number of operations to allocate
+ *
+ * @returns
+ * - 0 on success
+ * - EINVAL if invalid mempool is provided
+ */
+__rte_experimental
+static inline int
+rte_bbdev_fft_op_alloc_bulk(struct rte_mempool *mempool,
+ struct rte_bbdev_fft_op **ops, uint16_t num_ops) {
+ struct rte_bbdev_op_pool_private *priv;
+ int ret;
+
+ /* Check type */
+ priv = (struct rte_bbdev_op_pool_private *)
+ rte_mempool_get_priv(mempool);
+ if (unlikely(priv->type != RTE_BBDEV_OP_FFT))
+ return -EINVAL;
+
+ /* Get elements */
+ ret = rte_mempool_get_bulk(mempool, (void **)ops, num_ops);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 0;
+}
+
+/**
* Free decode operation structures that were allocated by
* rte_bbdev_dec_op_alloc_bulk().
* All structures must belong to the same mempool.
@@ -951,6 +1082,24 @@ struct rte_mempool *
rte_mempool_put_bulk(ops[0]->mempool, (void **)ops,
num_ops); }
+/**
+ * Free encode operation structures that were allocated by
+ * rte_bbdev_fft_op_alloc_bulk().
+ * All structures must belong to the same mempool.
+ *
+ * @param ops
+ * Operation structures
+ * @param num_ops
+ * Number of structures
+ */
+__rte_experimental
+static inline void
+rte_bbdev_fft_op_free_bulk(struct rte_bbdev_fft_op **ops, unsigned int
+num_ops) {
+ if (num_ops > 0)
+ rte_mempool_put_bulk(ops[0]->mempool, (void **)ops,
num_ops); }
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/bbdev/version.map b/lib/bbdev/version.map index
cce3f3c..16a16dc 100644
--- a/lib/bbdev/version.map
+++ b/lib/bbdev/version.map
@@ -39,3 +39,13 @@ DPDK_22 {
local: *;
};
+
+EXPERIMENTAL {
+ global:
+
+ # added in 22.06
+ rte_bbdev_enqueue_fft_ops;
+ rte_bbdev_dequeue_fft_ops;
+ rte_bbdev_fft_op_alloc_bulk;
+ rte_bbdev_fft_op_free_bulk;
+};
--
1.8.3.1