[dpdk-dev] [PATCH v2] net/i40e: fix TSO pkt exceeds allowed buf size issue

2019-12-25 Thread Xiaoyun Li
Hardware limits that max buffer size per tx descriptor should be
(16K-1)B. So when TSO enabled, the mbuf data size may exceed the
limit and cause malicious behaviour to the NIC. This patch fixes
this issue by using more tx descs for this kind of large buffer.

Fixes: 4861cde46116 ("i40e: new poll mode driver")
Cc: sta...@dpdk.org

Signed-off-by: Xiaoyun Li 
---
v2:
 * Each pkt can have several segments so the needed tx descs should sum
 * all segments up.
---
 drivers/net/i40e/i40e_rxtx.c | 44 +++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 17dc8c78f..ce95d8c20 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -989,6 +989,23 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union 
i40e_tx_offload tx_offload)
return ctx_desc;
 }
 
+/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */
+#define I40E_MAX_DATA_PER_TXD  (16 * 1024 - 1)
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt)
+{
+   struct rte_mbuf *txd = tx_pkt;
+   uint16_t count = 0;
+
+   while (txd != NULL) {
+   count += DIV_ROUND_UP(txd->data_len, I40E_MAX_DATA_PER_TXD);
+   txd = txd->next;
+   }
+
+   return count;
+}
+
 uint16_t
 i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -1046,8 +1063,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
 * The number of descriptors that must be allocated for
 * a packet equals to the number of the segments of that
 * packet plus 1 context descriptor if needed.
+* Recalculate the needed tx descs when TSO enabled in case
+* the mbuf data size exceeds max data size that hw allows
+* per tx desc.
 */
-   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+   if (ol_flags & PKT_TX_TCP_SEG)
+   nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) +
+nb_ctx);
+   else
+   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
tx_last = (uint16_t)(tx_id + nb_used - 1);
 
/* Circular ring */
@@ -1160,6 +1184,24 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
slen = m_seg->data_len;
buf_dma_addr = rte_mbuf_data_iova(m_seg);
 
+   while ((ol_flags & PKT_TX_TCP_SEG) &&
+   unlikely(slen > I40E_MAX_DATA_PER_TXD)) {
+   txd->buffer_addr =
+   rte_cpu_to_le_64(buf_dma_addr);
+   txd->cmd_type_offset_bsz =
+   i40e_build_ctob(td_cmd,
+   td_offset, I40E_MAX_DATA_PER_TXD,
+   td_tag);
+
+   buf_dma_addr += I40E_MAX_DATA_PER_TXD;
+   slen -= I40E_MAX_DATA_PER_TXD;
+
+   txe->last_id = tx_last;
+   tx_id = txe->next_id;
+   txe = txn;
+   txd = &txr[tx_id];
+   txn = &sw_ring[txe->next_id];
+   }
PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
"buf_dma_addr: %#"PRIx64";\n"
"td_cmd: %#x;\n"
-- 
2.17.1



[dpdk-dev] [PATCH] examples/ntb: fix mempool ops setting issue

2019-12-25 Thread Xiaoyun Li
Mempool ops may register in different order when compiling. The default
ops index is always zero but the wanted ops is ring_mp_mc. This patch
sets best mempool ops before generating mbuf pool to fix this issue.

Fixes: 5194299d6ef5 ("examples/ntb: support more functions")
Cc: sta...@dpdk.org

Signed-off-by: Xiaoyun Li 
---
 examples/ntb/ntb_fwd.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/examples/ntb/ntb_fwd.c b/examples/ntb/ntb_fwd.c
index c914256dd..17eedcf0b 100644
--- a/examples/ntb/ntb_fwd.c
+++ b/examples/ntb/ntb_fwd.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Per-port statistics struct */
 struct ntb_port_statistics {
@@ -1256,6 +1257,11 @@ ntb_mbuf_pool_create(uint16_t mbuf_seg_size, uint32_t 
nb_mbuf,
if (mp == NULL)
return NULL;
 
+   if (rte_mempool_set_ops_byname(mp, rte_mbuf_best_mempool_ops(), NULL)) {
+   printf("error setting mempool handler\n");
+   goto fail;
+   }
+
memset(&mbp_priv, 0, sizeof(mbp_priv));
mbp_priv.mbuf_data_room_size = mbuf_seg_size;
mbp_priv.mbuf_priv_size = 0;
-- 
2.17.1



Re: [dpdk-dev] [PATCH 11/14] examples/ipsec-secgw: add app processing code

2019-12-25 Thread Ananyev, Konstantin


> +static inline int
> +process_ipsec_ev_inbound(struct ipsec_ctx *ctx, struct route_table *rt,
> + struct rte_event *ev)
> +{
> + struct ipsec_sa *sa = NULL;
> + struct rte_mbuf *pkt;
> + uint16_t port_id = 0;
> + enum pkt_type type;
> + uint32_t sa_idx;
> + uint8_t *nlp;
> +
> + /* Get pkt from event */
> + pkt = ev->mbuf;
> +
> + /* Check the packet type */
> + type = process_ipsec_get_pkt_type(pkt, &nlp);
> +
> + switch (type) {
> + case PKT_TYPE_PLAIN_IPV4:
> + if (pkt->ol_flags & PKT_RX_SEC_OFFLOAD)
> + sa = (struct ipsec_sa *) pkt->udata64;


Shouldn't packets with PKT_RX_SEC_OFFLOAD_FAIL be handled somehow?
Another question - as I can see from the code, right now event mode 
supports only inline-proto, correct?
If so, then probably an error should be reported at startup, if in config file
some other types of sessions were requested.

> +
> + /* Check if we have a match */
> + if (check_sp(ctx->sp4_ctx, nlp, &sa_idx) == 0) {
> + /* No valid match */
> + goto drop_pkt_and_exit;
> + }
> + break;
> +
> + case PKT_TYPE_PLAIN_IPV6:
> + if (pkt->ol_flags & PKT_RX_SEC_OFFLOAD)
> + sa = (struct ipsec_sa *) pkt->udata64;
> +
> + /* Check if we have a match */
> + if (check_sp(ctx->sp6_ctx, nlp, &sa_idx) == 0) {
> + /* No valid match */
> + goto drop_pkt_and_exit;
> + }
> + break;
> +
> + default:
> + RTE_LOG(ERR, IPSEC, "Unsupported packet type = %d\n", type);
> + goto drop_pkt_and_exit;
> + }
> +
> + /* Check if the packet has to be bypassed */
> + if (sa_idx == 0)
> + goto route_and_send_pkt;
> +
> + /* Else the packet has to be protected with SA */
> +
> + /* If the packet was IPsec processed, then SA pointer should be set */
> + if (sa == NULL)
> + goto drop_pkt_and_exit;
> +
> + /* SPI on the packet should match with the one in SA */
> + if (unlikely(sa->spi != sa_idx))
> + goto drop_pkt_and_exit;
> +
> +route_and_send_pkt:
> + port_id = get_route(pkt, rt, type);
> + if (unlikely(port_id == RTE_MAX_ETHPORTS)) {
> + /* no match */
> + goto drop_pkt_and_exit;
> + }
> + /* else, we have a matching route */
> +
> + /* Update mac addresses */
> + update_mac_addrs(pkt, port_id);
> +
> + /* Update the event with the dest port */
> + ipsec_event_pre_forward(pkt, port_id);
> + return 1;
> +
> +drop_pkt_and_exit:
> + RTE_LOG(ERR, IPSEC, "Inbound packet dropped\n");
> + rte_pktmbuf_free(pkt);
> + ev->mbuf = NULL;
> + return 0;
> +}
> +


[dpdk-dev] [PATCH v1 0/3] Introduce new class for vDPA device drivers

2019-12-25 Thread Matan Azrad
As discussed and as described in RFC "[RFC] net: new vdpa PMD for Mellanox 
devices",
new vDPA driver is going to be added for Mellanox devices - vDPA mlx5 and more.

The only vDPA driver now is the IFC driver that is located in net directory.

The IFC driver and the new vDPA mlx5 driver provide the vDPA ops introduced in 
librte_vhost and not the eth-dev ops.
All the others drivers in net class provide the eth-dev ops.
The set of features is also different.

Create a new class for vDPA drivers and move IFC to this class.
Later, all the new drivers that implement the vDPA ops will be added to the 
vDPA class.

Also, a vDPA device driver features list was added to vDPA documentation.

Please review the features list and the series.

Later on, I'm going to send the vDPA mlx5 driver.

Thanks.


Matan Azrad (3):
  drivers: introduce vDPA class
  doc: add vDPA feature table
  drivers: move ifc driver to the vDPA class

 MAINTAINERS   |6 +-
 doc/guides/conf.py|5 +
 doc/guides/index.rst  |1 +
 doc/guides/nics/features/ifcvf.ini|8 -
 doc/guides/nics/ifc.rst   |  106 ---
 doc/guides/nics/index.rst |1 -
 doc/guides/vdpadevs/features/default.ini  |   55 ++
 doc/guides/vdpadevs/features/ifcvf.ini|8 +
 doc/guides/vdpadevs/features_overview.rst |   65 ++
 doc/guides/vdpadevs/ifc.rst   |  106 +++
 doc/guides/vdpadevs/index.rst |   15 +
 drivers/Makefile  |2 +
 drivers/meson.build   |1 +
 drivers/net/Makefile  |3 -
 drivers/net/ifc/Makefile  |   34 -
 drivers/net/ifc/base/ifcvf.c  |  329 
 drivers/net/ifc/base/ifcvf.h  |  162 
 drivers/net/ifc/base/ifcvf_osdep.h|   52 --
 drivers/net/ifc/ifcvf_vdpa.c  | 1280 -
 drivers/net/ifc/meson.build   |9 -
 drivers/net/ifc/rte_pmd_ifc_version.map   |3 -
 drivers/net/meson.build   |1 -
 drivers/vdpa/Makefile |   14 +
 drivers/vdpa/ifc/Makefile |   34 +
 drivers/vdpa/ifc/base/ifcvf.c |  329 
 drivers/vdpa/ifc/base/ifcvf.h |  162 
 drivers/vdpa/ifc/base/ifcvf_osdep.h   |   52 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 1280 +
 drivers/vdpa/ifc/meson.build  |9 +
 drivers/vdpa/ifc/rte_pmd_ifc_version.map  |3 +
 drivers/vdpa/meson.build  |8 +
 31 files changed, 2152 insertions(+), 1991 deletions(-)
 delete mode 100644 doc/guides/nics/features/ifcvf.ini
 delete mode 100644 doc/guides/nics/ifc.rst
 create mode 100644 doc/guides/vdpadevs/features/default.ini
 create mode 100644 doc/guides/vdpadevs/features/ifcvf.ini
 create mode 100644 doc/guides/vdpadevs/features_overview.rst
 create mode 100644 doc/guides/vdpadevs/ifc.rst
 create mode 100644 doc/guides/vdpadevs/index.rst
 delete mode 100644 drivers/net/ifc/Makefile
 delete mode 100644 drivers/net/ifc/base/ifcvf.c
 delete mode 100644 drivers/net/ifc/base/ifcvf.h
 delete mode 100644 drivers/net/ifc/base/ifcvf_osdep.h
 delete mode 100644 drivers/net/ifc/ifcvf_vdpa.c
 delete mode 100644 drivers/net/ifc/meson.build
 delete mode 100644 drivers/net/ifc/rte_pmd_ifc_version.map
 create mode 100644 drivers/vdpa/Makefile
 create mode 100644 drivers/vdpa/ifc/Makefile
 create mode 100644 drivers/vdpa/ifc/base/ifcvf.c
 create mode 100644 drivers/vdpa/ifc/base/ifcvf.h
 create mode 100644 drivers/vdpa/ifc/base/ifcvf_osdep.h
 create mode 100644 drivers/vdpa/ifc/ifcvf_vdpa.c
 create mode 100644 drivers/vdpa/ifc/meson.build
 create mode 100644 drivers/vdpa/ifc/rte_pmd_ifc_version.map
 create mode 100644 drivers/vdpa/meson.build

-- 
1.8.3.1



[dpdk-dev] [PATCH v1 2/3] doc: add vDPA feature table

2019-12-25 Thread Matan Azrad
Add vDPA devices features table and explanation.

Any vDPA driver can add its own supported features by ading a new ini
file to the features directory in doc/guides/vdpadevs/features.

Signed-off-by: Matan Azrad 
---
 doc/guides/conf.py|  5 +++
 doc/guides/vdpadevs/features/default.ini  | 55 ++
 doc/guides/vdpadevs/features_overview.rst | 65 +++
 doc/guides/vdpadevs/index.rst |  1 +
 4 files changed, 126 insertions(+)
 create mode 100644 doc/guides/vdpadevs/features/default.ini
 create mode 100644 doc/guides/vdpadevs/features_overview.rst

diff --git a/doc/guides/conf.py b/doc/guides/conf.py
index 0892c06..c368fa5 100644
--- a/doc/guides/conf.py
+++ b/doc/guides/conf.py
@@ -401,6 +401,11 @@ def setup(app):
 'Features',
 'Features availability in compression drivers',
 'Feature')
+table_file = dirname(__file__) + '/vdpadevs/overview_feature_table.txt'
+generate_overview_table(table_file, 1,
+'Features',
+'Features availability in vDPA drivers',
+'Feature')
 
 if LooseVersion(sphinx_version) < LooseVersion('1.3.1'):
 print('Upgrade sphinx to version >= 1.3.1 for '
diff --git a/doc/guides/vdpadevs/features/default.ini 
b/doc/guides/vdpadevs/features/default.ini
new file mode 100644
index 000..a3e0bc7
--- /dev/null
+++ b/doc/guides/vdpadevs/features/default.ini
@@ -0,0 +1,55 @@
+;
+; Features of a default vDPA driver.
+;
+; This file defines the features that are valid for inclusion in
+; the other driver files and also the order that they appear in
+; the features table in the documentation. The feature description
+; string should not exceed feature_str_len defined in conf.py.
+;
+[Features]
+csum =
+guest csum   =
+mac  =
+gso  =
+guest tso4   =
+guest tso6   =
+ecn  =
+ufo  =
+host tso4=
+host tso6=
+mrg rxbuf=
+ctrl vq  =
+ctrl rx  =
+any layout   =
+guest announce   =
+mq   =
+version 1=
+log all  =
+protocol features=
+indirect desc=
+event idx=
+mtu  =
+in_order =
+IOMMU platform   =
+packed   =
+proto mq =
+proto log shmfd  =
+proto rarp   =
+proto reply ack  =
+proto slave req  =
+proto crypto session =
+proto host notifier  =
+proto pagefault  =
+Multiprocess aware   =
+BSD nic_uio  =
+Linux UIO=
+Linux VFIO   =
+Other kdrv   =
+ARMv7=
+ARMv8=
+Power8   =
+x86-32   =
+x86-64   =
+Usage doc=
+Design doc   =
+Perf doc =
\ No newline at end of file
diff --git a/doc/guides/vdpadevs/features_overview.rst 
b/doc/guides/vdpadevs/features_overview.rst
new file mode 100644
index 000..c7745b7
--- /dev/null
+++ b/doc/guides/vdpadevs/features_overview.rst
@@ -0,0 +1,65 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright 2019 Mellanox Technologies, Ltd
+
+Overview of vDPA drivers features
+=
+
+This section explains the supported features that are listed in the table 
below.
+
+  * csum - Device can handle packets with partial checksum.
+  * guest csum - Guest can handle packets with partial checksum.
+  * mac - Device has given MAC address.
+  * gso - Device can handle packets with any GSO type.
+  * guest tso4 - Guest can receive TSOv4.
+  * guest tso6 - Guest can receive TSOv6.
+  * ecn - Device can receive TSO with ECN.
+  * ufo - Device can receive UFO.
+  * host tso4 - Device can receive TSOv4.
+  * host tso6 - Device can receive TSOv6.
+  * mrg rxbuf - Guest can merge receive buffers.
+  * ctrl vq - Control channel is available.
+  * ctrl rx - Control channel RX mode support.
+  * any layout - Device can handle any descriptor layout.
+  * guest announce - Guest can send gratuitous packets.
+  * mq - Device supports Receive Flow Steering.
+  * version 1 - v1.0 compliant.
+  * log all - Device can log all write descriptors (live migration).
+  * protocol features - Protocol features negotiation support.
+  * indirect desc - Indirect buffer descriptors support.
+  * event idx - Support for avail_idx and used_idx fields.
+  * mtu - Host can advise the guest with its maximum supported MTU.
+  * in_order - Device can use descriptors in ring order.
+  * IOMMU platform - Device support IOMMU addresses.
+  * packed - Device support packed virtio queues.
+  * proto mq - Support the number of queues query.
+  * proto log shmfd - Guest support setting log base.
+  * proto rarp - Host can broadcast a fake RARP after live migration.
+  * proto reply ack

[dpdk-dev] [PATCH v1 1/3] drivers: introduce vDPA class

2019-12-25 Thread Matan Azrad
The vDPA (vhost data path acceleration) drivers provide support for
the vDPA operations introduced by the rte_vhost library.

Any driver which provides the vDPA operations should be moved\added to
the vdpa class under drivers/vdpa/.

Create the general files for vDPA class in drivers and in documentation.

Signed-off-by: Matan Azrad 
---
 doc/guides/index.rst  |  1 +
 doc/guides/vdpadevs/index.rst | 13 +
 drivers/Makefile  |  2 ++
 drivers/meson.build   |  1 +
 drivers/vdpa/Makefile |  8 
 drivers/vdpa/meson.build  |  8 
 6 files changed, 33 insertions(+)
 create mode 100644 doc/guides/vdpadevs/index.rst
 create mode 100644 drivers/vdpa/Makefile
 create mode 100644 drivers/vdpa/meson.build

diff --git a/doc/guides/index.rst b/doc/guides/index.rst
index 8a1601b..988c6ea 100644
--- a/doc/guides/index.rst
+++ b/doc/guides/index.rst
@@ -19,6 +19,7 @@ DPDK documentation
bbdevs/index
cryptodevs/index
compressdevs/index
+   vdpadevs/index
eventdevs/index
rawdevs/index
mempool/index
diff --git a/doc/guides/vdpadevs/index.rst b/doc/guides/vdpadevs/index.rst
new file mode 100644
index 000..d69dc91
--- /dev/null
+++ b/doc/guides/vdpadevs/index.rst
@@ -0,0 +1,13 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright 2019 Mellanox Technologies, Ltd
+
+vDPA Device Drivers
+===
+
+The following are a list of vDPA(vhost data path acceleration) device drivers,
+which can be used from an application through vhost API.
+
+.. toctree::
+:maxdepth: 2
+:numbered:
+
diff --git a/drivers/Makefile b/drivers/Makefile
index 7d5da5d..46374ca 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -18,6 +18,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_QAT) += common/qat
 DEPDIRS-common/qat := bus mempool
 DIRS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += compress
 DEPDIRS-compress := bus mempool
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vdpa
+DEPDIRS-vdpa := common bus mempool
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += event
 DEPDIRS-event := common bus mempool net
 DIRS-$(CONFIG_RTE_LIBRTE_RAWDEV) += raw
diff --git a/drivers/meson.build b/drivers/meson.build
index 32d68aa..d271667 100644
--- a/drivers/meson.build
+++ b/drivers/meson.build
@@ -13,6 +13,7 @@ dpdk_driver_classes = ['common',
   'raw', # depends on common, bus and net.
   'crypto',  # depends on common, bus and mempool (net in future).
   'compress', # depends on common, bus, mempool.
+  'vdpa',# depends on common, bus and mempool.
   'event',   # depends on common, bus, mempool and net.
   'baseband'] # depends on common and bus.
 
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
new file mode 100644
index 000..82a2b70
--- /dev/null
+++ b/drivers/vdpa/Makefile
@@ -0,0 +1,8 @@
+#   SPDX-License-Identifier: BSD-3-Clause
+#   Copyright 2019 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# DIRS-$() += 
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/vdpa/meson.build b/drivers/vdpa/meson.build
new file mode 100644
index 000..a839ff5
--- /dev/null
+++ b/drivers/vdpa/meson.build
@@ -0,0 +1,8 @@
+#   SPDX-License-Identifier: BSD-3-Clause
+#   Copyright 2019 Mellanox Technologies, Ltd
+
+drivers = []
+std_deps = ['bus_pci', 'kvargs']
+std_deps += ['vhost']
+config_flag_fmt = 'RTE_LIBRTE_@0@_PMD'
+driver_name_fmt = 'rte_pmd_@0@'
-- 
1.8.3.1



[dpdk-dev] [PATCH v1 3/3] drivers: move ifc driver to the vDPA class

2019-12-25 Thread Matan Azrad
A new vDPA class was recently introduced.

IFC driver implements the vDPA operations, hence it should be moved to
the vDPA class.

Move it.

Signed-off-by: Matan Azrad 
---
 MAINTAINERS  |6 +-
 doc/guides/nics/features/ifcvf.ini   |8 -
 doc/guides/nics/ifc.rst  |  106 ---
 doc/guides/nics/index.rst|1 -
 doc/guides/vdpadevs/features/ifcvf.ini   |8 +
 doc/guides/vdpadevs/ifc.rst  |  106 +++
 doc/guides/vdpadevs/index.rst|1 +
 drivers/net/Makefile |3 -
 drivers/net/ifc/Makefile |   34 -
 drivers/net/ifc/base/ifcvf.c |  329 
 drivers/net/ifc/base/ifcvf.h |  162 
 drivers/net/ifc/base/ifcvf_osdep.h   |   52 --
 drivers/net/ifc/ifcvf_vdpa.c | 1280 --
 drivers/net/ifc/meson.build  |9 -
 drivers/net/ifc/rte_pmd_ifc_version.map  |3 -
 drivers/net/meson.build  |1 -
 drivers/vdpa/Makefile|6 +
 drivers/vdpa/ifc/Makefile|   34 +
 drivers/vdpa/ifc/base/ifcvf.c|  329 
 drivers/vdpa/ifc/base/ifcvf.h|  162 
 drivers/vdpa/ifc/base/ifcvf_osdep.h  |   52 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c| 1280 ++
 drivers/vdpa/ifc/meson.build |9 +
 drivers/vdpa/ifc/rte_pmd_ifc_version.map |3 +
 drivers/vdpa/meson.build |2 +-
 25 files changed, 1994 insertions(+), 1992 deletions(-)
 delete mode 100644 doc/guides/nics/features/ifcvf.ini
 delete mode 100644 doc/guides/nics/ifc.rst
 create mode 100644 doc/guides/vdpadevs/features/ifcvf.ini
 create mode 100644 doc/guides/vdpadevs/ifc.rst
 delete mode 100644 drivers/net/ifc/Makefile
 delete mode 100644 drivers/net/ifc/base/ifcvf.c
 delete mode 100644 drivers/net/ifc/base/ifcvf.h
 delete mode 100644 drivers/net/ifc/base/ifcvf_osdep.h
 delete mode 100644 drivers/net/ifc/ifcvf_vdpa.c
 delete mode 100644 drivers/net/ifc/meson.build
 delete mode 100644 drivers/net/ifc/rte_pmd_ifc_version.map
 create mode 100644 drivers/vdpa/ifc/Makefile
 create mode 100644 drivers/vdpa/ifc/base/ifcvf.c
 create mode 100644 drivers/vdpa/ifc/base/ifcvf.h
 create mode 100644 drivers/vdpa/ifc/base/ifcvf_osdep.h
 create mode 100644 drivers/vdpa/ifc/ifcvf_vdpa.c
 create mode 100644 drivers/vdpa/ifc/meson.build
 create mode 100644 drivers/vdpa/ifc/rte_pmd_ifc_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 9b5c80f..87abf60 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -682,9 +682,9 @@ F: doc/guides/nics/features/iavf*.ini
 Intel ifc
 M: Xiao Wang 
 T: git://dpdk.org/next/dpdk-next-net-intel
-F: drivers/net/ifc/
-F: doc/guides/nics/ifc.rst
-F: doc/guides/nics/features/ifc*.ini
+F: drivers/vdpa/ifc/
+F: doc/guides/vdpadevs/ifc.rst
+F: doc/guides/vdpadevs/features/ifcvf.ini
 
 Intel ice
 M: Qiming Yang 
diff --git a/doc/guides/nics/features/ifcvf.ini 
b/doc/guides/nics/features/ifcvf.ini
deleted file mode 100644
index ef1fc47..000
--- a/doc/guides/nics/features/ifcvf.ini
+++ /dev/null
@@ -1,8 +0,0 @@
-;
-; Supported features of the 'ifcvf' vDPA driver.
-;
-; Refer to default.ini for the full list of available PMD features.
-;
-[Features]
-x86-32   = Y
-x86-64   = Y
diff --git a/doc/guides/nics/ifc.rst b/doc/guides/nics/ifc.rst
deleted file mode 100644
index 12a2a34..000
--- a/doc/guides/nics/ifc.rst
+++ /dev/null
@@ -1,106 +0,0 @@
-..  SPDX-License-Identifier: BSD-3-Clause
-Copyright(c) 2018 Intel Corporation.
-
-IFCVF vDPA driver
-=
-
-The IFCVF vDPA (vhost data path acceleration) driver provides support for the
-Intel FPGA 100G VF (IFCVF). IFCVF's datapath is virtio ring compatible, it
-works as a HW vhost backend which can send/receive packets to/from virtio
-directly by DMA. Besides, it supports dirty page logging and device state
-report/restore, this driver enables its vDPA functionality.
-
-
-Pre-Installation Configuration
---
-
-Config File Options
-~~~
-
-The following option can be modified in the ``config`` file.
-
-- ``CONFIG_RTE_LIBRTE_IFC_PMD`` (default ``y`` for linux)
-
-  Toggle compilation of the ``librte_pmd_ifc`` driver.
-
-
-IFCVF vDPA Implementation
--
-
-IFCVF's vendor ID and device ID are same as that of virtio net pci device,
-with its specific subsystem vendor ID and device ID. To let the device be
-probed by IFCVF driver, adding "vdpa=1" parameter helps to specify that this
-device is to be used in vDPA mode, rather than polling mode, virtio pmd will
-skip when it detects this message. If no this parameter specified, device
-will not be used as a vDPA device, and it will be driven by virtio pmd.
-
-Different VF devices serve different virtio frontends which are in different
-VMs, so each VF needs to have its own DMA address translation service. During
-the 

Re: [dpdk-dev] [PATCH] raw/ntb: fix write memory barrier issue

2019-12-25 Thread Wu, Jingjing



> -Original Message-
> From: Li, Xiaoyun 
> Sent: Wednesday, December 4, 2019 11:19 PM
> To: Wu, Jingjing 
> Cc: dev@dpdk.org; Li, Xiaoyun ; sta...@dpdk.org
> Subject: [PATCH] raw/ntb: fix write memory barrier issue
> 
> All buffers and ring info should be written before tail register update.
> This patch relocates the write memory barrier before updating tail register
> to avoid potential issues.
> 
> Fixes: 11b5c7daf019 ("raw/ntb: add enqueue and dequeue functions")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Xiaoyun Li 
Acked-by: Jingjing Wu 


Re: [dpdk-dev] [PATCH v2] net/i40e: fix TSO pkt exceeds allowed buf size issue

2019-12-25 Thread Zhang, Qi Z
HI Xiaoyun:

Overall looks good to me, some minor capture inline


> -Original Message-
> From: Li, Xiaoyun 
> Sent: Wednesday, December 25, 2019 4:56 PM
> To: Zhang, Qi Z ; Xing, Beilei ;
> Ye, Xiaolong ; Loftus, Ciara ;
> dev@dpdk.org
> Cc: Li, Xiaoyun ; sta...@dpdk.org
> Subject: [PATCH v2] net/i40e: fix TSO pkt exceeds allowed buf size issue
> 
> Hardware limits that max buffer size per tx descriptor should be (16K-1)B. So
> when TSO enabled, the mbuf data size may exceed the limit and cause
> malicious behaviour to the NIC. This patch fixes this issue by using more tx

Behavior

> descs for this kind of large buffer.
> 
> Fixes: 4861cde46116 ("i40e: new poll mode driver")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Xiaoyun Li 
> ---
> v2:
>  * Each pkt can have several segments so the needed tx descs should sum
>  * all segments up.
> ---
>  drivers/net/i40e/i40e_rxtx.c | 44 +++-
>  1 file changed, 43 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
> 17dc8c78f..ce95d8c20 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -989,6 +989,23 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union
> i40e_tx_offload tx_offload)
>   return ctx_desc;
>  }
> 
> +/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */
> +#define I40E_MAX_DATA_PER_TXD(16 * 1024 - 1)

Since this is limited by the 14 bit buffer size on Rx descriptor.

Is it better to reuse exist macro to define the max buf size?

#define I40E_MAX_DATA_PER_TXD \
I40E_TXD_QW1_TX_BUF_SZ_MASK >> I40E_TXD_QW1_TX_BUF_SZ_SHIFT

Regards
Qi

> +/* Calculate the number of TX descriptors needed for each pkt */ static
> +inline uint16_t i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt) {
> + struct rte_mbuf *txd = tx_pkt;
> + uint16_t count = 0;
> +
> + while (txd != NULL) {
> + count += DIV_ROUND_UP(txd->data_len,
> I40E_MAX_DATA_PER_TXD);
> + txd = txd->next;
> + }
> +
> + return count;
> +}
> +
>  uint16_t
>  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> { @@ -1046,8 +1063,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>* The number of descriptors that must be allocated for
>* a packet equals to the number of the segments of that
>* packet plus 1 context descriptor if needed.
> +  * Recalculate the needed tx descs when TSO enabled in case
> +  * the mbuf data size exceeds max data size that hw allows
> +  * per tx desc.
>*/
> - nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
> + if (ol_flags & PKT_TX_TCP_SEG)
> + nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) +
> +  nb_ctx);
> + else
> + nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
>   tx_last = (uint16_t)(tx_id + nb_used - 1);
> 
>   /* Circular ring */
> @@ -1160,6 +1184,24 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>   slen = m_seg->data_len;
>   buf_dma_addr = rte_mbuf_data_iova(m_seg);
> 
> + while ((ol_flags & PKT_TX_TCP_SEG) &&
> + unlikely(slen > I40E_MAX_DATA_PER_TXD)) {
> + txd->buffer_addr =
> + rte_cpu_to_le_64(buf_dma_addr);
> + txd->cmd_type_offset_bsz =
> + i40e_build_ctob(td_cmd,
> + td_offset, I40E_MAX_DATA_PER_TXD,
> + td_tag);
> +
> + buf_dma_addr += I40E_MAX_DATA_PER_TXD;
> + slen -= I40E_MAX_DATA_PER_TXD;
> +
> + txe->last_id = tx_last;
> + tx_id = txe->next_id;
> + txe = txn;
> + txd = &txr[tx_id];
> + txn = &sw_ring[txe->next_id];
> + }
>   PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
>   "buf_dma_addr: %#"PRIx64";\n"
>   "td_cmd: %#x;\n"
> --
> 2.17.1



Re: [dpdk-dev] [PATCH v2] net/i40e: fix TSO pkt exceeds allowed buf size issue

2019-12-25 Thread Li, Xiaoyun
Sure. Will fix them in v3. Thanks.

> -Original Message-
> From: Zhang, Qi Z
> Sent: Thursday, December 26, 2019 09:51
> To: Li, Xiaoyun ; Xing, Beilei ; 
> Ye,
> Xiaolong ; Loftus, Ciara ;
> dev@dpdk.org
> Cc: sta...@dpdk.org
> Subject: RE: [PATCH v2] net/i40e: fix TSO pkt exceeds allowed buf size issue
> 
> HI Xiaoyun:
> 
>   Overall looks good to me, some minor capture inline
> 
> 
> > -Original Message-
> > From: Li, Xiaoyun 
> > Sent: Wednesday, December 25, 2019 4:56 PM
> > To: Zhang, Qi Z ; Xing, Beilei
> > ; Ye, Xiaolong ; Loftus,
> > Ciara ; dev@dpdk.org
> > Cc: Li, Xiaoyun ; sta...@dpdk.org
> > Subject: [PATCH v2] net/i40e: fix TSO pkt exceeds allowed buf size
> > issue
> >
> > Hardware limits that max buffer size per tx descriptor should be
> > (16K-1)B. So when TSO enabled, the mbuf data size may exceed the limit
> > and cause malicious behaviour to the NIC. This patch fixes this issue
> > by using more tx
> 
> Behavior
> 
> > descs for this kind of large buffer.
> >
> > Fixes: 4861cde46116 ("i40e: new poll mode driver")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Xiaoyun Li 
> > ---
> > v2:
> >  * Each pkt can have several segments so the needed tx descs should
> > sum
> >  * all segments up.
> > ---
> >  drivers/net/i40e/i40e_rxtx.c | 44
> > +++-
> >  1 file changed, 43 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > b/drivers/net/i40e/i40e_rxtx.c index
> > 17dc8c78f..ce95d8c20 100644
> > --- a/drivers/net/i40e/i40e_rxtx.c
> > +++ b/drivers/net/i40e/i40e_rxtx.c
> > @@ -989,6 +989,23 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union
> > i40e_tx_offload tx_offload)
> > return ctx_desc;
> >  }
> >
> > +/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */
> > +#define I40E_MAX_DATA_PER_TXD  (16 * 1024 - 1)
> 
> Since this is limited by the 14 bit buffer size on Rx descriptor.
> 
> Is it better to reuse exist macro to define the max buf size?
> 
> #define I40E_MAX_DATA_PER_TXD \
>   I40E_TXD_QW1_TX_BUF_SZ_MASK >>
> I40E_TXD_QW1_TX_BUF_SZ_SHIFT
> 
> Regards
> Qi
> 
> > +/* Calculate the number of TX descriptors needed for each pkt */
> > +static inline uint16_t i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt) {
> > +   struct rte_mbuf *txd = tx_pkt;
> > +   uint16_t count = 0;
> > +
> > +   while (txd != NULL) {
> > +   count += DIV_ROUND_UP(txd->data_len,
> > I40E_MAX_DATA_PER_TXD);
> > +   txd = txd->next;
> > +   }
> > +
> > +   return count;
> > +}
> > +
> >  uint16_t
> >  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t
> > nb_pkts) { @@ -1046,8 +1063,15 @@ i40e_xmit_pkts(void *tx_queue,
> > struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> >  * The number of descriptors that must be allocated for
> >  * a packet equals to the number of the segments of that
> >  * packet plus 1 context descriptor if needed.
> > +* Recalculate the needed tx descs when TSO enabled in case
> > +* the mbuf data size exceeds max data size that hw allows
> > +* per tx desc.
> >  */
> > -   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
> > +   if (ol_flags & PKT_TX_TCP_SEG)
> > +   nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) +
> > +nb_ctx);
> > +   else
> > +   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
> > tx_last = (uint16_t)(tx_id + nb_used - 1);
> >
> > /* Circular ring */
> > @@ -1160,6 +1184,24 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> > **tx_pkts, uint16_t nb_pkts)
> > slen = m_seg->data_len;
> > buf_dma_addr = rte_mbuf_data_iova(m_seg);
> >
> > +   while ((ol_flags & PKT_TX_TCP_SEG) &&
> > +   unlikely(slen > I40E_MAX_DATA_PER_TXD)) {
> > +   txd->buffer_addr =
> > +   rte_cpu_to_le_64(buf_dma_addr);
> > +   txd->cmd_type_offset_bsz =
> > +   i40e_build_ctob(td_cmd,
> > +   td_offset, I40E_MAX_DATA_PER_TXD,
> > +   td_tag);
> > +
> > +   buf_dma_addr += I40E_MAX_DATA_PER_TXD;
> > +   slen -= I40E_MAX_DATA_PER_TXD;
> > +
> > +   txe->last_id = tx_last;
> > +   tx_id = txe->next_id;
> > +   txe = txn;
> > +   txd = &txr[tx_id];
> > +   txn = &sw_ring[txe->next_id];
> > +   }
> > PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
> > "buf_dma_addr: %#"PRIx64";\n"
> > "td_cmd: %#x;\n"
> > --
> > 2.17.1



[dpdk-dev] Cannot write fdir filter successfully

2019-12-25 Thread Qi Zhang
Hi,

I would like to use the flow director (on 82599) to steer packets to
individual queues. However, I couldn't write a flow filter rule to the NIC.
I have tried both the perfect filter and the signature one. Unfortunately,
neither of them could work.

The error is:

fdir_write_perfect_filter_82599(): Timeout writing flow director filter, or

fdir_add_signature_filter_82599(): Timeout writing flow director filter.

The OS on my server is ubuntu 16.04, the DPDK version is 18.11

the filter configure I am using is as follows:

struct rte_eth_fdir_filter ff;

memset(&ff, 0, sizeof(struct rte_eth_fdir_filter));

ff.input.flow.tcp4_flow.ip.dst_ip = 0xA040EC7

ff.input.flow.tcp4_flow.dst_port = 0x1A0A

ff.action.report_status = RTE_ETH_FDIR_REPORT_ID;

ff.action.behavior = RTE_ETH_FDIR_ACCEPT;

ff.action.rx_queue = 1;

ret = rte_eth_dev_filter_ctrl(port_id, RTE_ETH_FILTER_FDIR,
RTE_ETH_FILTER_ADD, &ff);



I am not sure where the problem is. Could you help me out? Thanks.


[dpdk-dev] [PATCH] net/ixgbe: fix blocking system events

2019-12-25 Thread taox . zhu
From: Zhu Tao 

IXGBE link status task use rte alarm thread in old implementation.
Sometime ixgbe link status task takes up to 9 seconds. This will
severely affect the rte-alarm-thread-dependent a task in the system,
like  interrupt or hotplug event. So replace with a independent thread
which has the same thread affinity settings as rte interrupt.

Fixes: 0408f47b ("net/ixgbe: fix busy polling while fiber link update")
Cc: sta...@dpdk.org

Signed-off-by: Zhu Tao 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 184 +--
 drivers/net/ixgbe/ixgbe_ethdev.h |  32 +++
 2 files changed, 210 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 2c6fd0f..f0b387d 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -378,6 +379,9 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev 
*dev,
 struct rte_eth_udp_tunnel *udp_tunnel);
 static int ixgbe_filter_restore(struct rte_eth_dev *dev);
 static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev);
+static int ixgbe_task_thread_init(struct rte_eth_dev *dev);
+static void ixgbe_task_thread_uninit(struct rte_eth_dev *dev);
+
 
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -1069,6 +1073,171 @@ struct rte_ixgbe_xstats_name_off {
 }
 
 /*
+ * Add a task to task queue tail.
+ */
+int ixgbe_add_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
+{
+   struct ixgbe_adapter *ad = dev->data->dev_private;
+   struct ixgbe_task *task;
+
+   if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
+   task = rte_zmalloc("ixgbe", sizeof(struct ixgbe_task), 0);
+   if (task == NULL)
+   return -ENOMEM;
+
+   task->arg = dev;
+   task->task_cb = task_cb;
+   task->status = IXGBE_TASK_READY;
+
+   pthread_mutex_lock(&ad->task_lock);
+   TAILQ_INSERT_TAIL(&ad->task_head, task, next);
+   pthread_cond_signal(&ad->task_cond);
+   pthread_mutex_unlock(&ad->task_lock);
+   } else {
+   return -EPERM;  /* Operation not permitted */
+   }
+
+   return 0;
+}
+
+/*
+ * Sync cancel a task with all @task_cb be exit.
+ */
+int ixgbe_cancel_task(struct rte_eth_dev *dev, ixgbe_task_cb_fn task_cb)
+{
+   struct ixgbe_adapter *ad = dev->data->dev_private;
+   struct ixgbe_task *task, *ttask;
+   int i, executing;
+#define DELAY_TIMEOUT_LOG   2000   // 2s
+#define DELAY_TIMEOUT_MAX   1  // 10s
+
+   for (i = 0; i < DELAY_TIMEOUT_MAX; i++) {
+   executing = 0;
+   if (ad->task_status == IXGBE_TASK_THREAD_RUNNING) {
+   pthread_mutex_lock(&ad->task_lock);
+   TAILQ_FOREACH_SAFE(task, &ad->task_head, next, ttask) {
+   if (task->task_cb == task_cb) {
+   if (task->status == IXGBE_TASK_RUNNING) 
{
+   executing++;
+   } else {
+   TAILQ_REMOVE(&ad->task_head, 
task, next);
+   rte_free(task);
+   }
+   }
+   }
+   pthread_mutex_unlock(&ad->task_lock);
+
+   if (executing) {
+   if (i > DELAY_TIMEOUT_LOG && (i % 1000 == 0)) {
+   PMD_DRV_LOG(WARNING,
+   "Cannel task time wait 
%ds!", i / 1000);
+   }
+
+   rte_delay_us_sleep(1000);   // 1ms
+   continue;
+   }
+   }
+   break;
+   }
+
+   if (i == DELAY_TIMEOUT_MAX)
+   return -EBUSY;
+
+   return 0;
+}
+
+/*
+ * Task main thread. Loop until state is set to IXGBE_TASK_THREAD_EXIT.
+ * For each task, set the status to IXGBE_TASK_RUNNING before execution,
+ * execute and then be dequeue.
+ */
+static void *ixgbe_task_handler(void *args)
+{
+   struct ixgbe_adapter *ad =
+   ((struct rte_eth_dev *)args)->data->dev_private;
+   struct ixgbe_task *task;
+
+   PMD_INIT_LOG(DEBUG, "ixgbe task thread created");
+   while (ad->task_status) {
+   pthread_mutex_lock(&ad->task_lock);
+   if (TAILQ_EMPTY(&ad->task_head)) {
+   pthread_cond_wait(&ad->task_cond, &ad->task_lock);
+   pthread_mutex_unlock(&ad->task_lock);
+   continue;
+   }
+
+   /* pop firt task and run it */
+   task 

[dpdk-dev] [PATCH] net/ice: fix TSO pkt exceeds allowed buf size issue

2019-12-25 Thread Xiaoyun Li
Hardware limits that max buffer size per tx descriptor should be
(16K-1)B. So when TSO enabled, the mbuf data size may exceed the
limit and cause malicious behavior to the NIC. This patch fixes
this issue by using more tx descs for this kind of large buffer.

Fixes: 17c7d0f9d6a4 ("net/ice: support basic Rx/Tx")
Cc: sta...@dpdk.org

Signed-off-by: Xiaoyun Li 
---
 drivers/net/ice/ice_rxtx.c | 59 ++
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index b4f5367c5..c3d549ef4 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -2421,6 +2421,24 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union 
ice_tx_offload tx_offload)
return ctx_desc;
 }
 
+/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
+#define ICE_MAX_DATA_PER_TXD \
+   (ICE_TXD_QW1_TX_BUF_SZ_M >> ICE_TXD_QW1_TX_BUF_SZ_S)
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+ice_calc_pkt_desc(struct rte_mbuf *tx_pkt)
+{
+   struct rte_mbuf *txd = tx_pkt;
+   uint16_t count = 0;
+
+   while (txd != NULL) {
+   count += DIV_ROUND_UP(txd->data_len, ICE_MAX_DATA_PER_TXD);
+   txd = txd->next;
+   }
+
+   return count;
+}
+
 uint16_t
 ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -2440,6 +2458,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
uint32_t td_offset = 0;
uint32_t td_tag = 0;
uint16_t tx_last;
+   uint16_t slen;
uint64_t buf_dma_addr;
uint64_t ol_flags;
union ice_tx_offload tx_offload = {0};
@@ -2471,8 +2490,15 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
/* The number of descriptors that must be allocated for
 * a packet equals to the number of the segments of that
 * packet plus the number of context descriptor if needed.
+* Recalculate the needed tx descs when TSO enabled in case
+* the mbuf data size exceeds max data size that hw allows
+* per tx desc.
 */
-   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+   if (ol_flags & PKT_TX_TCP_SEG)
+   nb_used = (uint16_t)(ice_calc_pkt_desc(tx_pkt) +
+nb_ctx);
+   else
+   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
tx_last = (uint16_t)(tx_id + nb_used - 1);
 
/* Circular ring */
@@ -2562,15 +2588,38 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
txe->mbuf = m_seg;
 
/* Setup TX Descriptor */
+   slen = m_seg->data_len;
buf_dma_addr = rte_mbuf_data_iova(m_seg);
+
+   while ((ol_flags & PKT_TX_TCP_SEG) &&
+   unlikely(slen > ICE_MAX_DATA_PER_TXD)) {
+   txd->buffer_addr =
+   rte_cpu_to_le_64(buf_dma_addr);
+   txd->cmd_type_offset_bsz =
+   rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
+   ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
+   ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
+   ((uint64_t)ICE_MAX_DATA_PER_TXD <<
+ICE_TXD_QW1_TX_BUF_SZ_S) |
+   ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
+
+   buf_dma_addr += ICE_MAX_DATA_PER_TXD;
+   slen -= ICE_MAX_DATA_PER_TXD;
+
+   txe->last_id = tx_last;
+   tx_id = txe->next_id;
+   txe = txn;
+   txd = &txr[tx_id];
+   txn = &sw_ring[txe->next_id];
+   }
+
txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
-   ((uint64_t)td_cmd  << ICE_TXD_QW1_CMD_S) |
+   ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
-   ((uint64_t)m_seg->data_len  <<
-ICE_TXD_QW1_TX_BUF_SZ_S) |
-   ((uint64_t)td_tag  << ICE_TXD_QW1_L2TAG1_S));
+   ((uint64_t)slen << ICE_TXD_QW1_TX_BUF_SZ_S) |
+   ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
 
txe->last_id = tx_l

[dpdk-dev] [PATCH v3] net/i40e: fix TSO pkt exceeds allowed buf size issue

2019-12-25 Thread Xiaoyun Li
Hardware limits that max buffer size per tx descriptor should be
(16K-1)B. So when TSO enabled, the mbuf data size may exceed the
limit and cause malicious behavior to the NIC. This patch fixes
this issue by using more tx descs for this kind of large buffer.

Fixes: 4861cde46116 ("i40e: new poll mode driver")
Cc: sta...@dpdk.org

Signed-off-by: Xiaoyun Li 
---
v3:
 * Reused the existing macros to define I40E_MAX_DATA_PER_TXD
v2:
 * Each pkt can have several segments so the needed tx descs should sum
 * all segments up.
---
 drivers/net/i40e/i40e_rxtx.c | 45 +++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 17dc8c78f..bbdba39b3 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -989,6 +989,24 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union 
i40e_tx_offload tx_offload)
return ctx_desc;
 }
 
+/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */
+#define I40E_MAX_DATA_PER_TXD \
+   (I40E_TXD_QW1_TX_BUF_SZ_MASK >> I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt)
+{
+   struct rte_mbuf *txd = tx_pkt;
+   uint16_t count = 0;
+
+   while (txd != NULL) {
+   count += DIV_ROUND_UP(txd->data_len, I40E_MAX_DATA_PER_TXD);
+   txd = txd->next;
+   }
+
+   return count;
+}
+
 uint16_t
 i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -1046,8 +1064,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
 * The number of descriptors that must be allocated for
 * a packet equals to the number of the segments of that
 * packet plus 1 context descriptor if needed.
+* Recalculate the needed tx descs when TSO enabled in case
+* the mbuf data size exceeds max data size that hw allows
+* per tx desc.
 */
-   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+   if (ol_flags & PKT_TX_TCP_SEG)
+   nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) +
+nb_ctx);
+   else
+   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
tx_last = (uint16_t)(tx_id + nb_used - 1);
 
/* Circular ring */
@@ -1160,6 +1185,24 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
slen = m_seg->data_len;
buf_dma_addr = rte_mbuf_data_iova(m_seg);
 
+   while ((ol_flags & PKT_TX_TCP_SEG) &&
+   unlikely(slen > I40E_MAX_DATA_PER_TXD)) {
+   txd->buffer_addr =
+   rte_cpu_to_le_64(buf_dma_addr);
+   txd->cmd_type_offset_bsz =
+   i40e_build_ctob(td_cmd,
+   td_offset, I40E_MAX_DATA_PER_TXD,
+   td_tag);
+
+   buf_dma_addr += I40E_MAX_DATA_PER_TXD;
+   slen -= I40E_MAX_DATA_PER_TXD;
+
+   txe->last_id = tx_last;
+   tx_id = txe->next_id;
+   txe = txn;
+   txd = &txr[tx_id];
+   txn = &sw_ring[txe->next_id];
+   }
PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
"buf_dma_addr: %#"PRIx64";\n"
"td_cmd: %#x;\n"
-- 
2.17.1



[dpdk-dev] [PATCH v2] net/ice: fix TSO pkt exceeds allowed buf size issue

2019-12-25 Thread Xiaoyun Li
Hardware limits that max buffer size per tx descriptor should be
(16K-1)B. So when TSO enabled, the mbuf data size may exceed the
limit and cause malicious behavior to the NIC. This patch fixes
this issue by using more tx descs for this kind of large buffer.

Fixes: 17c7d0f9d6a4 ("net/ice: support basic Rx/Tx")
Cc: sta...@dpdk.org

Signed-off-by: Xiaoyun Li 
---
v2:
 * Fixed several typos.
---
 drivers/net/ice/ice_rxtx.c | 58 ++
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index b4f5367c5..ccc2c9339 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -2421,6 +2421,24 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union 
ice_tx_offload tx_offload)
return ctx_desc;
 }
 
+/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
+#define ICE_MAX_DATA_PER_TXD \
+   (ICE_TXD_QW1_TX_BUF_SZ_M >> ICE_TXD_QW1_TX_BUF_SZ_S)
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+ice_calc_pkt_desc(struct rte_mbuf *tx_pkt)
+{
+   struct rte_mbuf *txd = tx_pkt;
+   uint16_t count = 0;
+
+   while (txd != NULL) {
+   count += DIV_ROUND_UP(txd->data_len, ICE_MAX_DATA_PER_TXD);
+   txd = txd->next;
+   }
+
+   return count;
+}
+
 uint16_t
 ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -2440,6 +2458,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
uint32_t td_offset = 0;
uint32_t td_tag = 0;
uint16_t tx_last;
+   uint16_t slen;
uint64_t buf_dma_addr;
uint64_t ol_flags;
union ice_tx_offload tx_offload = {0};
@@ -2471,8 +2490,15 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
/* The number of descriptors that must be allocated for
 * a packet equals to the number of the segments of that
 * packet plus the number of context descriptor if needed.
+* Recalculate the needed tx descs when TSO enabled in case
+* the mbuf data size exceeds max data size that hw allows
+* per tx desc.
 */
-   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+   if (ol_flags & PKT_TX_TCP_SEG)
+   nb_used = (uint16_t)(ice_calc_pkt_desc(tx_pkt) +
+nb_ctx);
+   else
+   nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
tx_last = (uint16_t)(tx_id + nb_used - 1);
 
/* Circular ring */
@@ -2562,15 +2588,37 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
txe->mbuf = m_seg;
 
/* Setup TX Descriptor */
+   slen = m_seg->data_len;
buf_dma_addr = rte_mbuf_data_iova(m_seg);
+
+   while ((ol_flags & PKT_TX_TCP_SEG) &&
+   unlikely(slen > ICE_MAX_DATA_PER_TXD)) {
+   txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
+   txd->cmd_type_offset_bsz =
+   rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
+   ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
+   ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
+   ((uint64_t)ICE_MAX_DATA_PER_TXD <<
+ICE_TXD_QW1_TX_BUF_SZ_S) |
+   ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
+
+   buf_dma_addr += ICE_MAX_DATA_PER_TXD;
+   slen -= ICE_MAX_DATA_PER_TXD;
+
+   txe->last_id = tx_last;
+   tx_id = txe->next_id;
+   txe = txn;
+   txd = &tx_ring[tx_id];
+   txn = &sw_ring[txe->next_id];
+   }
+
txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
txd->cmd_type_offset_bsz =
rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
-   ((uint64_t)td_cmd  << ICE_TXD_QW1_CMD_S) |
+   ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) |
((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
-   ((uint64_t)m_seg->data_len  <<
-ICE_TXD_QW1_TX_BUF_SZ_S) |
-   ((uint64_t)td_tag  << ICE_TXD_QW1_L2TAG1_S));
+   ((uint64_t)slen << ICE_TXD_QW1_TX_BUF_SZ_S) |
+   ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S));
 
txe->last_id = tx_last;