Re: [dpdk-dev] [PATCH v7 25/27] net/i40e: set/clear VF stats from PF

2017-01-06 Thread Lu, Wenzhuo
Hi Jingjing, Qi,


> -Original Message-
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 9:25 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Zhang, Qi Z
> Subject: RE: [dpdk-dev] [PATCH v7 25/27] net/i40e: set/clear VF stats from PF
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Zhang, Qi Z 
> > Subject: [dpdk-dev] [PATCH v7 25/27] net/i40e: set/clear VF stats from
> > PF
> >
> > From: Qi Zhang 
> >
> > This patch add support to get/clear VF statistics from PF side.
> > Two APIs are added:
> > rte_pmd_i40e_get_vf_stats.
> > rte_pmd_i40e_reset_vf_stats.
> >
> > Signed-off-by: Qi Zhang 
> > ---
> >  drivers/net/i40e/i40e_ethdev.c| 81
> > +++
> >  drivers/net/i40e/rte_pmd_i40e.h   | 41 
> >  drivers/net/i40e/rte_pmd_i40e_version.map |  2 +
> >  3 files changed, 124 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 47e03d6..be45cfa 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -10480,3 +10480,84 @@ int rte_pmd_i40e_set_vf_vlan_filter(uint8_t
> > port, uint16_t vlan_id,
> >
> > return ret;
> >  }
> > +
> > +int
> > +rte_pmd_i40e_get_vf_stats(uint8_t port,
> > + uint16_t vf_id,
> > + struct rte_eth_stats *stats)
> > +{
> > +   struct rte_eth_dev *dev;
> > +   struct rte_eth_dev_info dev_info;
> > +   struct i40e_pf *pf;
> > +   struct i40e_vsi *vsi;
> > +   int ret = 0;
> > +
> > +   RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > +
> > +   dev = &rte_eth_devices[port];
> > +   rte_eth_dev_info_get(port, &dev_info);
> > +
> > +   if (vf_id >= dev_info.max_vfs)
> > +   return -EINVAL;
> > +
> > +   pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > +
> > +   if (vf_id > pf->vf_num - 1 || !pf->vfs) {
> > +   PMD_DRV_LOG(ERR, "Invalid argument.");
> > +   return -EINVAL;
> > +   }
> > +
> > +   vsi = pf->vfs[vf_id].vsi;
> > +   if (!vsi)
> > +   return -EINVAL;
> > +
> > +   i40e_update_vsi_stats(vsi);
> > +
> > +   stats->ipackets = vsi->eth_stats.rx_unicast +
> > +   vsi->eth_stats.rx_multicast +
> > +   vsi->eth_stats.rx_broadcast;
> > +   stats->opackets = vsi->eth_stats.tx_unicast +
> > +   vsi->eth_stats.tx_multicast +
> > +   vsi->eth_stats.tx_broadcast;
> > +   stats->ibytes   = vsi->eth_stats.rx_bytes;
> > +   stats->obytes   = vsi->eth_stats.tx_bytes;
> > +   stats->ierrors  = vsi->eth_stats.rx_discards;
> > +   stats->oerrors  = vsi->eth_stats.tx_errors +
> > +vsi->eth_stats.tx_discards;
> > +
> > +   return ret;
> 
> It looks ret is not changed in this func at all.
> 
> > +}
> > +
> > +int
> > +rte_pmd_i40e_reset_vf_stats(uint8_t port,
> > +   uint16_t vf_id)
> > +{
> > +   struct rte_eth_dev *dev;
> > +   struct rte_eth_dev_info dev_info;
> > +   struct i40e_pf *pf;
> > +   struct i40e_vsi *vsi;
> > +   int ret = 0;
> > +
> > +   RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > +
> > +   dev = &rte_eth_devices[port];
> > +   rte_eth_dev_info_get(port, &dev_info);
> > +
> > +   if (vf_id >= dev_info.max_vfs)
> > +   return -EINVAL;
> > +
> > +   pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > +
> > +   if (vf_id > pf->vf_num - 1 || !pf->vfs) {
> > +   PMD_DRV_LOG(ERR, "Invalid argument.");
> > +   return -EINVAL;
> > +   }
> > +
> > +   vsi = pf->vfs[vf_id].vsi;
> > +   if (!vsi)
> > +   return -EINVAL;
> > +
> > +   vsi->offset_loaded = false;
> > +   i40e_update_vsi_stats(vsi);
> > +
> > +   return ret;
> Same comment as above.
I'll change it to 'return 0'. The same as above.



[dpdk-dev] [PATCH v2 3/5] net/qede: fix PF fastpath status block index

2017-01-06 Thread Rasesh Mody
From: Harish Patil 

Allocate double the number of fastpath status block index
since the PF RX/TX queues are not sharing the status block.
This is an interim solution till other parts of the code
is modified to handle the same.

Fixes: f1e4b6c0acee ("net/qede: fix status block index for VF queues")

Signed-off-by: Harish Patil 
---
 drivers/net/qede/qede_rxtx.c |   14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index aebe8cb..f20881c 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -431,13 +431,15 @@ int qede_alloc_fp_resc(struct qede_dev *qdev)
struct ecore_dev *edev = &qdev->edev;
struct qede_fastpath *fp;
uint32_t num_sbs;
-   int rc, i;
+   uint16_t i;
+   uint16_t sb_idx;
+   int rc;
 
if (IS_VF(edev))
ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs);
else
-   num_sbs = (ecore_cxt_get_proto_cid_count
- (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL)) / 2;
+   num_sbs = ecore_cxt_get_proto_cid_count
+ (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL);
 
if (num_sbs == 0) {
DP_ERR(edev, "No status blocks available\n");
@@ -455,7 +457,11 @@ int qede_alloc_fp_resc(struct qede_dev *qdev)
 
for (i = 0; i < QEDE_QUEUE_CNT(qdev); i++) {
fp = &qdev->fp_array[i];
-   if (qede_alloc_mem_sb(qdev, fp->sb_info, i % num_sbs)) {
+   if (IS_VF(edev))
+   sb_idx = i % num_sbs;
+   else
+   sb_idx = i;
+   if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) {
qede_free_fp_arrays(qdev);
return -ENOMEM;
}
-- 
1.7.10.3



[dpdk-dev] [PATCH v2 1/5] net/qede: fix scatter-gather issue

2017-01-06 Thread Rasesh Mody
From: Harish Patil 

 - Make qede_process_sg_pkts() inline and add unlikely check
 - Fix mbuf segment chaining logic in qede_process_sg_pkts()
 - Change qede_encode_sg_bd() to return total segments required
 - Fix first TX buffer descriptor's length
 - Replace repeatitive code using a macro

Fixes: bec0228816c0 ("net/qede: support scatter gather")

Signed-off-by: Harish Patil 
---
 drivers/net/qede/qede_rxtx.c |  139 --
 drivers/net/qede/qede_rxtx.h |4 --
 2 files changed, 65 insertions(+), 78 deletions(-)

diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index 814d384..ecff5bc 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -810,39 +810,28 @@ static inline uint32_t 
qede_rx_cqe_to_tunn_pkt_type(uint16_t flags)
return RTE_PTYPE_UNKNOWN;
 }
 
-
-int qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
-int num_segs, uint16_t pkt_len)
+static inline int
+qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
+uint8_t num_segs, uint16_t pkt_len)
 {
struct qede_rx_queue *rxq = p_rxq;
struct qede_dev *qdev = rxq->qdev;
struct ecore_dev *edev = &qdev->edev;
-   uint16_t sw_rx_index, cur_size;
-
register struct rte_mbuf *seg1 = NULL;
register struct rte_mbuf *seg2 = NULL;
+   uint16_t sw_rx_index;
+   uint16_t cur_size;
 
seg1 = rx_mb;
while (num_segs) {
-   cur_size = pkt_len > rxq->rx_buf_size ?
-   rxq->rx_buf_size : pkt_len;
-   if (!cur_size) {
-   PMD_RX_LOG(DEBUG, rxq,
-  "SG packet, len and num BD mismatch\n");
+   cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
+   pkt_len;
+   if (unlikely(!cur_size)) {
+   PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs"
+  " left for mapping jumbo\n", num_segs);
qede_recycle_rx_bd_ring(rxq, qdev, num_segs);
return -EINVAL;
}
-
-   if (qede_alloc_rx_buffer(rxq)) {
-   uint8_t index;
-
-   PMD_RX_LOG(DEBUG, rxq, "Buffer allocation failed\n");
-   index = rxq->port_id;
-   rte_eth_devices[index].data->rx_mbuf_alloc_failed++;
-   rxq->rx_alloc_errors++;
-   return -ENOMEM;
-   }
-
sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
qede_rx_bd_ring_consume(rxq);
@@ -852,16 +841,9 @@ int qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf 
*rx_mb,
seg1 = seg1->next;
num_segs--;
rxq->rx_segs++;
-   continue;
}
-   seg1 = NULL;
-
-   if (pkt_len)
-   PMD_RX_LOG(DEBUG, rxq,
-  "Mapped all BDs of jumbo, but still have %d bytes\n",
-  pkt_len);
 
-   return ECORE_SUCCESS;
+   return 0;
 }
 
 uint16_t
@@ -878,11 +860,16 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
register struct rte_mbuf *rx_mb = NULL;
register struct rte_mbuf *seg1 = NULL;
enum eth_rx_cqe_type cqe_type;
-   uint16_t len, pad, preload_idx, pkt_len, parse_flag;
-   uint8_t csum_flag, num_segs;
+   uint16_t pkt_len; /* Sum of all BD segments */
+   uint16_t len; /* Length of first BD */
+   uint8_t num_segs = 1;
+   uint16_t pad;
+   uint16_t preload_idx;
+   uint8_t csum_flag;
+   uint16_t parse_flag;
enum rss_hash_type htype;
uint8_t tunn_parse_flag;
-   int ret;
+   uint8_t j;
 
hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
@@ -915,6 +902,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
fp_cqe = &cqe->fast_path_regular;
 
len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
+   pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
pad = fp_cqe->placement_offset;
assert((len + pad) <= rx_mb->buf_len);
 
@@ -979,25 +967,29 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
rxq->rx_alloc_errors++;
break;
}
-
qede_rx_bd_ring_consume(rxq);
-
if (fp_cqe->bd_num > 1) {
-   pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
+   PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs"
+  " len on first: %04x Total Len: %04x\n",
+ 

[dpdk-dev] [PATCH v2 2/5] net/qede: fix minimum buffer size and scatter Rx check

2017-01-06 Thread Rasesh Mody
From: Harish Patil 

 - Fix minimum RX buffer size to 1024B
 - Force enable scatter/gather mode if given RX buf size is lesser than MTU
 - Adjust RX buffer size to cache-line size with overhead included

Fixes: bec0228816c0 ("net/qede: support scatter gather")
Fixes: 2ea6f76aff40 ("qede: add core driver")

Signed-off-by: Harish Patil 
---
 drivers/net/qede/qede_ethdev.c |3 +--
 drivers/net/qede/qede_rxtx.c   |   47 +---
 drivers/net/qede/qede_rxtx.h   |   11 --
 3 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index b7886f4..0b40d1b 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -969,8 +969,7 @@ qede_dev_info_get(struct rte_eth_dev *eth_dev,
PMD_INIT_FUNC_TRACE(edev);
 
dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
-   dev_info->min_rx_bufsize = (uint32_t)(ETHER_MIN_MTU +
- QEDE_ETH_OVERHEAD);
+   dev_info->min_rx_bufsize = (uint32_t)QEDE_MIN_RX_BUFF_SIZE;
dev_info->max_rx_pktlen = (uint32_t)ETH_TX_MAX_NON_LSO_PKT_LEN;
dev_info->rx_desc_lim = qede_rx_desc_lim;
dev_info->tx_desc_lim = qede_tx_desc_lim;
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index ecff5bc..aebe8cb 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -89,11 +89,11 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
queue_idx,
 {
struct qede_dev *qdev = dev->data->dev_private;
struct ecore_dev *edev = &qdev->edev;
-   struct rte_eth_dev_data *eth_data = dev->data;
+   struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
struct qede_rx_queue *rxq;
-   uint16_t pkt_len = (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len;
+   uint16_t max_rx_pkt_len;
+   uint16_t bufsz;
size_t size;
-   uint16_t data_size;
int rc;
int i;
 
@@ -127,34 +127,27 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
queue_idx,
rxq->nb_rx_desc = nb_desc;
rxq->queue_id = queue_idx;
rxq->port_id = dev->data->port_id;
-
-   /* Sanity check */
-   data_size = (uint16_t)rte_pktmbuf_data_room_size(mp) -
-   RTE_PKTMBUF_HEADROOM;
-
-   if (pkt_len > data_size && !dev->data->scattered_rx) {
-   DP_ERR(edev, "MTU %u should not exceed dataroom %u\n",
-  pkt_len, data_size);
-   rte_free(rxq);
-   return -EINVAL;
+   max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len;
+   qdev->mtu = max_rx_pkt_len;
+
+   /* Fix up RX buffer size */
+   bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
+   if ((rxmode->enable_scatter)||
+   (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) {
+   if (!dev->data->scattered_rx) {
+   DP_INFO(edev, "Forcing scatter-gather mode\n");
+   dev->data->scattered_rx = 1;
+   }
}
-
if (dev->data->scattered_rx)
-   rxq->rx_buf_size = data_size;
+   rxq->rx_buf_size = bufsz + QEDE_ETH_OVERHEAD;
else
-   rxq->rx_buf_size = pkt_len + QEDE_ETH_OVERHEAD;
-
-   qdev->mtu = pkt_len;
+   rxq->rx_buf_size = qdev->mtu + QEDE_ETH_OVERHEAD;
+   /* Align to cache-line size if needed */
+   rxq->rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rxq->rx_buf_size);
 
-   DP_INFO(edev, "MTU = %u ; RX buffer = %u\n",
-   qdev->mtu, rxq->rx_buf_size);
-
-   if (pkt_len > ETHER_MAX_LEN) {
-   dev->data->dev_conf.rxmode.jumbo_frame = 1;
-   DP_NOTICE(edev, false, "jumbo frame enabled\n");
-   } else {
-   dev->data->dev_conf.rxmode.jumbo_frame = 0;
-   }
+   DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n",
+   qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx);
 
/* Allocate the parallel driver ring for Rx buffers */
size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc;
diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h
index a95b4ab..9a393e9 100644
--- a/drivers/net/qede/qede_rxtx.h
+++ b/drivers/net/qede/qede_rxtx.h
@@ -51,14 +51,21 @@
((flags) & (PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_MASK \
<< PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_SHIFT))
 
+#define QEDE_MIN_RX_BUFF_SIZE  (1024)
+#define QEDE_VLAN_TAG_SIZE (4)
+#define QEDE_LLC_SNAP_HDR_LEN  (8)
+
 /* Max supported alignment is 256 (8 shift)
  * minimal alignment shift 6 is optimal for 57xxx HW performance
  */
 #define QEDE_L1_CACHE_SHIFT6
 #define QEDE_RX_ALIGN_SHIFT(RTE_MAX(6, RTE_MIN(8, QEDE_L1_CACHE_SHIFT)))
 #define QEDE_FW_RX_ALIGN_END   (1UL << QEDE_RX_ALIGN_SHIFT)
-
-#define QEDE_

[dpdk-dev] [PATCH v2 5/5] net/qede: convert few DP_NOTICE and DP_INFO to DP_ERR

2017-01-06 Thread Rasesh Mody
From: Rasesh Mody 

Signed-off-by: Rasesh Mody 
---
 drivers/net/qede/base/ecore_mcp.c |2 +-
 drivers/net/qede/qede_ethdev.c|   11 +--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/qede/base/ecore_mcp.c 
b/drivers/net/qede/base/ecore_mcp.c
index bb13828..f634d98 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -931,7 +931,7 @@ static void ecore_mcp_send_protocol_stats(struct ecore_hwfn 
*p_hwfn,
hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN;
break;
default:
-   DP_NOTICE(p_hwfn, false, "Invalid protocol type %d\n", type);
+   DP_INFO(p_hwfn, "Invalid protocol type %d\n", type);
return;
}
 
diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 6d90c46..c67fbb6 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -650,7 +650,7 @@ static void qede_vlan_offload_set(struct rte_eth_dev 
*eth_dev, int mask)
qede_vlan_filter_set(eth_dev, 0, 1);
} else {
if (qdev->configured_vlans > 1) { /* Excluding VLAN0 */
-   DP_NOTICE(edev, false,
+   DP_ERR(edev,
  " Please remove existing VLAN filters"
  " before disabling VLAN filtering\n");
/* Signal app that VLAN filtering is still
@@ -684,7 +684,7 @@ static int qede_vlan_filter_set(struct rte_eth_dev *eth_dev,
 
if (on) {
if (qdev->configured_vlans == dev_info->num_vlan_filters) {
-   DP_INFO(edev, "Reached max VLAN filter limit"
+   DP_ERR(edev, "Reached max VLAN filter limit"
  " enabling accept_any_vlan\n");
qede_config_accept_any_vlan(qdev, true);
return 0;
@@ -849,14 +849,13 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
if (edev->num_hwfns > 1) {
if (eth_dev->data->nb_rx_queues < 2 ||
eth_dev->data->nb_tx_queues < 2) {
-   DP_NOTICE(edev, false,
- "100G mode needs min. 2 RX/TX queues\n");
+   DP_ERR(edev, "100G mode needs min. 2 RX/TX queues\n");
return -EINVAL;
}
 
if ((eth_dev->data->nb_rx_queues % 2 != 0) ||
(eth_dev->data->nb_tx_queues % 2 != 0)) {
-   DP_NOTICE(edev, false,
+   DP_ERR(edev,
  "100G mode needs even no. of RX/TX queues\n");
return -EINVAL;
}
@@ -867,7 +866,7 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
eth_dev->data->scattered_rx = 1;
 
if (rxmode->enable_lro == 1) {
-   DP_INFO(edev, "LRO is not supported\n");
+   DP_ERR(edev, "LRO is not supported\n");
return -EINVAL;
}
 
-- 
1.7.10.3



Re: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion from PF

2017-01-06 Thread Lu, Wenzhuo
Hi Jingjing, Bernard,


> -Original Message-
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 8:33 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Iremonger, Bernard
> Subject: RE: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion from
> PF
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Iremonger, Bernard 
> > Subject: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion
> > from PF
> >
> > From: Bernard Iremonger 
> >
> > Support inserting VF VLAN id from PF.
> > User can call the API on PF to insert a VLAN id to a specific VF.
> >
> > Signed-off-by: Bernard Iremonger 
> > ---
> >  drivers/net/i40e/i40e_ethdev.c| 56
> > +++
> >  drivers/net/i40e/rte_pmd_i40e.h   | 19 +++
> >  drivers/net/i40e/rte_pmd_i40e_version.map |  1 +
> >  3 files changed, 76 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 7ab1c93..31c387d 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -10266,3 +10266,59 @@ static void i40e_set_default_mac_addr(struct
> > rte_eth_dev *dev,
> > else
> > return -EINVAL;
> >  }
> > +
> > +int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
> > +   uint16_t vlan_id)
> > +{
> > +   struct rte_eth_dev *dev;
> > +   struct rte_eth_dev_info dev_info;
> > +   struct i40e_pf *pf;
> > +   struct i40e_hw *hw;
> > +   struct i40e_vsi *vsi;
> > +   struct i40e_vsi_context ctxt;
> > +   int ret;
> > +
> > +   RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > +
> > +   dev = &rte_eth_devices[port];
> > +   rte_eth_dev_info_get(port, &dev_info);
> 
> It looks dev_info is not used in this function.
I'll delete it.

> 
> > +   pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > +   hw = I40E_PF_TO_HW(pf);
> > +
> > +   /**
> > +* return -ENODEV if SRIOV not enabled, VF number not configured
> > +* or no queue assigned.
> > +*/
> > +   if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 ||
> > +   pf->vf_nb_qps == 0)
> > +   return -ENODEV;
> > +
> > +   if (vf_id >= pf->vf_num || !pf->vfs)
> > +   return -EINVAL;
> > +
> > +   if (vlan_id > ETHER_MAX_VLAN_ID)
> > +   return -EINVAL;
> > +
> > +   vsi = pf->vfs[vf_id].vsi;
> > +   if (!vsi)
> > +   return -EINVAL;
> > +
> > +   vsi->info.valid_sections =
> > cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
> > +   vsi->info.pvid = vlan_id;
> > +   if (vlan_id > 0)
> > +   vsi->info.port_vlan_flags |=
> I40E_AQ_VSI_PVLAN_INSERT_PVID;
> > +   else
> > +   vsi->info.port_vlan_flags &=
> > ~I40E_AQ_VSI_PVLAN_INSERT_PVID;
> So, Pvid is used here for insert. Does it has any relationship with vlan anti-
> spoof patch?
> If so, it's better to consider how to deal with that.
It's vlan insertion not filtering. So I think not related.

> 
> Thanks
> Jingjing


[dpdk-dev] [PATCH v2 4/5] net/qede: fix per queue stats/xstats

2017-01-06 Thread Rasesh Mody
From: Rasesh Mody 

If value of number of rxq/txq is diffrent than
RTE_ETHDEV_QUEUE_STAT_CNTRS, limit per queue
stats/xstats to minimum of the two.

Fixes: 7634c5f91569 ("net/qede: add queue statistics")

Signed-off-by: Rasesh Mody 
---
 drivers/net/qede/qede_ethdev.c |   32 +---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 0b40d1b..6d90c46 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -1162,6 +1162,7 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct 
rte_eth_stats *eth_stats)
struct ecore_dev *edev = &qdev->edev;
struct ecore_eth_stats stats;
unsigned int i = 0, j = 0, qid;
+   unsigned int rxq_stat_cntrs, txq_stat_cntrs;
struct qede_tx_queue *txq;
 
qdev->ops->get_vport_stats(edev, &stats);
@@ -1195,6 +1196,17 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct 
rte_eth_stats *eth_stats)
eth_stats->oerrors = stats.tx_err_drop_pkts;
 
/* Queue stats */
+   rxq_stat_cntrs = RTE_MIN(QEDE_RSS_COUNT(qdev),
+  RTE_ETHDEV_QUEUE_STAT_CNTRS);
+   txq_stat_cntrs = RTE_MIN(QEDE_TSS_COUNT(qdev),
+  RTE_ETHDEV_QUEUE_STAT_CNTRS);
+   if ((rxq_stat_cntrs != QEDE_RSS_COUNT(qdev)) ||
+   (txq_stat_cntrs != QEDE_TSS_COUNT(qdev)))
+   DP_VERBOSE(edev, ECORE_MSG_DEBUG,
+  "Not all the queue stats will be displayed. Set"
+  " RTE_ETHDEV_QUEUE_STAT_CNTRS config param"
+  " appropriately and retry.\n");
+
for (qid = 0; qid < QEDE_QUEUE_CNT(qdev); qid++) {
if (qdev->fp_array[qid].type & QEDE_FASTPATH_RX) {
eth_stats->q_ipackets[i] =
@@ -1213,7 +1225,11 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct 
rte_eth_stats *eth_stats)
rx_alloc_errors));
i++;
}
+   if (i == rxq_stat_cntrs)
+   break;
+   }
 
+   for (qid = 0; qid < QEDE_QUEUE_CNT(qdev); qid++) {
if (qdev->fp_array[qid].type & QEDE_FASTPATH_TX) {
txq = qdev->fp_array[(qid)].txqs[0];
eth_stats->q_opackets[j] =
@@ -1223,13 +1239,17 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct 
rte_eth_stats *eth_stats)
  xmit_pkts)));
j++;
}
+   if (j == txq_stat_cntrs)
+   break;
}
 }
 
 static unsigned
 qede_get_xstats_count(struct qede_dev *qdev) {
return RTE_DIM(qede_xstats_strings) +
-   (RTE_DIM(qede_rxq_xstats_strings) * QEDE_RSS_COUNT(qdev));
+   (RTE_DIM(qede_rxq_xstats_strings) *
+RTE_MIN(QEDE_RSS_COUNT(qdev),
+RTE_ETHDEV_QUEUE_STAT_CNTRS));
 }
 
 static int
@@ -1239,6 +1259,7 @@ qede_get_xstats_names(__rte_unused struct rte_eth_dev 
*dev,
struct qede_dev *qdev = dev->data->dev_private;
const unsigned int stat_cnt = qede_get_xstats_count(qdev);
unsigned int i, qid, stat_idx = 0;
+   unsigned int rxq_stat_cntrs;
 
if (xstats_names != NULL) {
for (i = 0; i < RTE_DIM(qede_xstats_strings); i++) {
@@ -1249,7 +1270,9 @@ qede_get_xstats_names(__rte_unused struct rte_eth_dev 
*dev,
stat_idx++;
}
 
-   for (qid = 0; qid < QEDE_RSS_COUNT(qdev); qid++) {
+   rxq_stat_cntrs = RTE_MIN(QEDE_RSS_COUNT(qdev),
+RTE_ETHDEV_QUEUE_STAT_CNTRS);
+   for (qid = 0; qid < rxq_stat_cntrs; qid++) {
for (i = 0; i < RTE_DIM(qede_rxq_xstats_strings); i++) {
snprintf(xstats_names[stat_idx].name,
sizeof(xstats_names[stat_idx].name),
@@ -1273,6 +1296,7 @@ qede_get_xstats(struct rte_eth_dev *dev, struct 
rte_eth_xstat *xstats,
struct ecore_eth_stats stats;
const unsigned int num = qede_get_xstats_count(qdev);
unsigned int i, qid, stat_idx = 0;
+   unsigned int rxq_stat_cntrs;
 
if (n < num)
return num;
@@ -1285,7 +1309,9 @@ qede_get_xstats(struct rte_eth_dev *dev, struct 
rte_eth_xstat *xstats,
stat_idx++;
}
 
-   for (qid = 0; qid < QEDE_QUEUE_CNT(qdev); qid++) {
+   rxq_stat_cntrs = RTE_MIN(QEDE_RSS_COUNT(qdev),
+RTE_ETHDEV_QUEUE_STAT_CNTRS);
+   for (qid = 0; qid < rxq_stat_cntrs; qid++) {
if (qdev->fp_array[qid].type & QEDE_FASTPATH_RX) {
for (i = 0; i < RTE_DIM(qede_rxq_xstats_strings); i++) {
xstats[stat_idx].value = *(uint64_t *)(
-- 
1.7.10.3



Re: [dpdk-dev] [PATCH v3] net/mlx5: add support for ConnectX-5 NICs

2017-01-06 Thread Adrien Mazarguil
On Thu, Jan 05, 2017 at 04:49:31PM -0800, Yongseok Koh wrote:
> Add PCI device ID for ConnectX-5 and enable multi-packet send for PF and VF
> along with changing documentation and release note.
> 
> Signed-off-by: Yongseok Koh 

Acked-by: Adrien Mazarguil 

Thanks!

-- 
Adrien Mazarguil
6WIND


Re: [dpdk-dev] [PATCH v7 03/27] net/i40e: set VF MAC anti-spoofing from PF

2017-01-06 Thread Lu, Wenzhuo
Hi Jingjing,

> -Original Message-
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 8:33 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Lu, Wenzhuo
> Subject: RE: [dpdk-dev] [PATCH v7 03/27] net/i40e: set VF MAC anti-spoofing
> from PF
> 
> > +
> > +   vsi->info.valid_sections =
> > cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
> > +   if (on)
> > +   vsi->info.sec_flags |=
> > I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
> > +   else
> > +   vsi->info.sec_flags &=
> > ~I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
> > +
> > +   memset(&ctxt, 0, sizeof(ctxt));
> > +   (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
> > +   ctxt.seid = vsi->seid;
> > +
> > +   hw = I40E_VSI_TO_HW(vsi);
> > +   ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
> > +   if (ret != I40E_SUCCESS)
> > +   PMD_DRV_LOG(ERR, "Failed to update VSI params");
> 
> If fails, will you revert the info in vsi struct?
Will not. Just leverage the existing behavior. I think it has some good side as 
user should not try it again and again if not success.

> 
> > +
> > +   return ret;
> 
> Please return eth dev lib error code, but not I40E_XXX
Yes, will change it.



Re: [dpdk-dev] [PATCH v3] net/mlx5: add support for ConnectX-5 NICs

2017-01-06 Thread Thomas Monjalon
2017-01-06 09:50, Adrien Mazarguil:
> On Thu, Jan 05, 2017 at 04:49:31PM -0800, Yongseok Koh wrote:
> > Add PCI device ID for ConnectX-5 and enable multi-packet send for PF and VF
> > along with changing documentation and release note.
> > 
> > Signed-off-by: Yongseok Koh 
> 
> Acked-by: Adrien Mazarguil 

You'll need to update the website:
http://dpdk.org/browse/tools/dpdk-web/tree/doc/nics.html#n83


Re: [dpdk-dev] XL710 with i40e driver drops packets on RX even on a small rates.

2017-01-06 Thread Martin Weiser
Hello,

just to let you know we were finally able to resolve the issue. It seems
that the affected boards had a firmware issue with PCIe x8 v3.
When we forced the PCI slots to run at x8 v2 the issue disappeared for
Test 1 and Test 2. Test 3 still produced missed packets but probably due
to the reduced PCIe x8 v2 bandwidth.
We then found out that there exists a BIOS/firmware update for these
boards which was issued by Supermicro in November ... unfortunately
there are no changenotes whatsoever.
But lo and behold this update seems to include a fix for exactly this
issue since now the XL710 is working as expected with PCIe x8 v3.

Best regards,
Martin


On 04.01.17 13:33, Martin Weiser wrote:
> Hello,
>
> I have performed some more thorough testing on 3 different machines to
> illustrate the strange results with XL710.
> Please note that all 3 systems were able to forward the traffic of Test
> 1 and Test 2 without packet loss when a 82599ES NIC was installed in the
> same PCI slot as the XL710 in the tests below.
>
> Here is the test setup and the test results:
>
>
> ## Test traffic
>
> In all tests the t-rex traffic generator was used to generate traffic on
> a XL710 card with the following parameters:
>
> ### Test 1
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 25 --flip
>
> This resulted in a 60 second run with ~1.21 Gbps traffic on each of the
> two interfaces with ~10 packets per
> second on each interface.
>
> ### Test 2
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 100 --flip
>
> This resulted in a 60 second run with ~4.85 Gbps traffic on each of the
> two interfaces with ~40 packets per
> second on each interface.
>
> ### Test 3
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 400 --flip
>
> This resulted in a 60 second run with ~19.43 Gbps traffic on each of the
> two interfaces with ~160 packets per
> second on each interface.
>
>
>
> ## DPDK
>
> On all systems a vanilla DPDK v16.11 testpmd was used with the following
> parameters (PCI IDs differed between systems):
>
> ./build/app/testpmd -l 1,2 -w :06:00.0 -w :06:00.1 -- -i
>
>
>
> ## System 1
>
> * Board: Supermicro X10SDV-TP8F
> * CPU:
> Architecture:  x86_64
> CPU op-mode(s):32-bit, 64-bit
> Byte Order:Little Endian
> CPU(s):8
> On-line CPU(s) list:   0-7
> Thread(s) per core:2
> Core(s) per socket:4
> Socket(s): 1
> NUMA node(s):  1
> Vendor ID: GenuineIntel
> CPU family:6
> Model: 86
> Model name:Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz
> Stepping:  3
> CPU MHz:   800.250
> CPU max MHz:   2200.
> CPU min MHz:   800.
> BogoMIPS:  4399.58
> Virtualization:VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache:  256K
> L3 cache:  6144K
> NUMA node0 CPU(s): 0-7
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
> tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
> rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid
> dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx
> f16c rdrand lahf_lm abm 3dnowprefetch epb intel_pt tpr_shadow vnmi
> flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms
> invpcid rtm cqm rdseed adx smap xsaveopt cqm_llc cqm_occup_llc
> cqm_mbm_total cqm_mbm_local dtherm arat pln pts
> * Memory channels: 2
> * Memory: 2 * 8192 MB DDR4 @ 2133 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002505
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters: isolcpus=1,2,3,5,6,7 default_hugepagesz=1G
> hugepagesz=1G hugepages=1
>
> ### Test 1
>
> Mostly no packet loss. Sometimes ~10 packets missed of ~60 on each
> interface when testpmd was not started in
> interactive mode.
>
> ### Test 2
>
> 100-300 packets of ~2400 missed on each interface.
>
> ### Test 3
>
> 4000-5000 packets of ~9600 missed on each interface.
>
>
>
> ## System 2
>
> * Board: Supermicro X10SDV-7TP8F
> * CPU:
> Architecture:  x86_64
> CPU op-mode(s):32-bit, 64-bit
> Byte Order:Little Endian
> CPU(s):32
> On-line CPU(s) list:   0-31
> Thread(s) per core:2
> Core(s) per socket:16
> Socket(s): 1
> NUMA node(s):  1
> Vendor ID: GenuineIntel
> CPU family:6
> Model: 86
> Mod

Re: [dpdk-dev] XL710 with i40e driver drops packets on RX even on a small rates.

2017-01-06 Thread Zhang, Helin
Very good to know that!
Congratulations!

/Helin

-Original Message-
From: Martin Weiser [mailto:martin.wei...@allegro-packets.com] 
Sent: Friday, January 6, 2017 5:17 PM
To: dev@dpdk.org; Ilya Maximets 
Cc: Zhang, Helin ; Wu, Jingjing 
Subject: Re: [dpdk-dev] XL710 with i40e driver drops packets on RX even on a 
small rates.

Hello,

just to let you know we were finally able to resolve the issue. It seems that 
the affected boards had a firmware issue with PCIe x8 v3.
When we forced the PCI slots to run at x8 v2 the issue disappeared for Test 1 
and Test 2. Test 3 still produced missed packets but probably due to the 
reduced PCIe x8 v2 bandwidth.
We then found out that there exists a BIOS/firmware update for these boards 
which was issued by Supermicro in November ... unfortunately there are no 
changenotes whatsoever.
But lo and behold this update seems to include a fix for exactly this issue 
since now the XL710 is working as expected with PCIe x8 v3.

Best regards,
Martin


On 04.01.17 13:33, Martin Weiser wrote:
> Hello,
>
> I have performed some more thorough testing on 3 different machines to 
> illustrate the strange results with XL710.
> Please note that all 3 systems were able to forward the traffic of 
> Test
> 1 and Test 2 without packet loss when a 82599ES NIC was installed in 
> the same PCI slot as the XL710 in the tests below.
>
> Here is the test setup and the test results:
>
>
> ## Test traffic
>
> In all tests the t-rex traffic generator was used to generate traffic 
> on a XL710 card with the following parameters:
>
> ### Test 1
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 25 --flip
>
> This resulted in a 60 second run with ~1.21 Gbps traffic on each of 
> the two interfaces with ~10 packets per second on each interface.
>
> ### Test 2
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 100 --flip
>
> This resulted in a 60 second run with ~4.85 Gbps traffic on each of 
> the two interfaces with ~40 packets per second on each interface.
>
> ### Test 3
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 400 --flip
>
> This resulted in a 60 second run with ~19.43 Gbps traffic on each of 
> the two interfaces with ~160 packets per second on each interface.
>
>
>
> ## DPDK
>
> On all systems a vanilla DPDK v16.11 testpmd was used with the 
> following parameters (PCI IDs differed between systems):
>
> ./build/app/testpmd -l 1,2 -w :06:00.0 -w :06:00.1 -- -i
>
>
>
> ## System 1
>
> * Board: Supermicro X10SDV-TP8F
> * CPU:
> Architecture:  x86_64
> CPU op-mode(s):32-bit, 64-bit
> Byte Order:Little Endian
> CPU(s):8
> On-line CPU(s) list:   0-7
> Thread(s) per core:2
> Core(s) per socket:4
> Socket(s): 1
> NUMA node(s):  1
> Vendor ID: GenuineIntel
> CPU family:6
> Model: 86
> Model name:Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz
> Stepping:  3
> CPU MHz:   800.250
> CPU max MHz:   2200.
> CPU min MHz:   800.
> BogoMIPS:  4399.58
> Virtualization:VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache:  256K
> L3 cache:  6144K
> NUMA node0 CPU(s): 0-7
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss 
> ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs 
> bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni 
> pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm 
> pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes 
> xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb intel_pt 
> tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle 
> avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap xsaveopt cqm_llc 
> cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm arat pln pts
> * Memory channels: 2
> * Memory: 2 * 8192 MB DDR4 @ 2133 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002505
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters: isolcpus=1,2,3,5,6,7 default_hugepagesz=1G 
> hugepagesz=1G hugepages=1
>
> ### Test 1
>
> Mostly no packet loss. Sometimes ~10 packets missed of ~60 on each 
> interface when testpmd was not started in interactive mode.
>
> ### Test 2
>
> 100-300 packets of ~2400 missed on each interface.
>
> ### Test 3
>
> 4000-5000 packets of ~9600 missed on each interface.
>
>
>
> ## System 2
>
> * Board: Supermicro X10SDV-7TP8F
> * CPU:
> Architecture:  x86_64
> CPU op-mode(s):32-bit, 64-bit
> Byte Order:Li

Re: [dpdk-dev] [PATCH v5 00/20] Decouple ethdev from PCI device

2017-01-06 Thread Andrew Rybchenko

On 01/03/2017 03:20 PM, Ferruh Yigit wrote:

On 12/25/2016 10:33 PM, Thomas Monjalon wrote:

2016-12-23 16:57, Jan Blunck:

This repost addresses the review comments of Thomas Monjalon to completely
remove the ethdev helper to further decrease the coupling of the ethdev and
the eal layers. This required me to squash together all patches using the
rte_eth_dev_to_pci() helper into "Decouple from PCI device" patch. As
discussed privately I'll keep the PCI information in rte_eth_dev_info
untouched.

Applied with some trivial fixes, thanks


I rebased these changes into next-net tree. And need to update some sfc
and nfp patches [1] there.

Andrew, Alejandro,

Can you please review your driver in the latest next-net tree?

Thanks,
ferruh

[1]
nfp:
net/nfp: add Rx interrupts

sfc:
net/sfc: support link status change interrupt
net/sfc: interrupts support sufficient for event queue init
net/sfc: implement driver operation to init device on attach
net/sfc: libefx-based PMD stub sufficient to build and init


Ferruh,

thanks that you care about it. I've reviewed and tested it. It looks and 
works fine for me.


Andrew.


[dpdk-dev] [PATCH v3 1/6] ethdev: fix port data mismatched in multiple process model

2017-01-06 Thread Yuanhan Liu
Assume we have two virtio ports, 00:03.0 and 00:04.0. The first one is
managed by the kernel driver, while the later one is managed by DPDK.

Now we start the primary process. 00:03.0 will be skipped by DPDK virtio
PMD driver (since it's being used by the kernel). 00:04.0 would be
successfully initiated by DPDK virtio PMD (if nothing abnormal happens).
After that, we would get a port id 0, and all the related info needed
by virtio (virtio_hw) is stored at rte_eth_dev_data[0].

Then we start the secondary process. As usual, 00:03.0 will be firstly
probed. It firstly tries to get a local eth_dev structure for it (by
rte_eth_dev_allocate):

port_id = rte_eth_dev_find_free_port();
...

eth_dev = &rte_eth_devices[port_id];
eth_dev->data = &rte_eth_dev_data[port_id];
...

return eth_dev;

Since it's a first PCI device, port_id will be 0. eth_dev->data would
then point to rte_eth_dev_data[0]. And here things start going wrong,
as rte_eth_dev_data[0] actually stores the virtio_hw for 00:04.0.

That said, in the secondary process, DPDK will continue to drive PCI
device 00.03.0 (despite the fact it's been managed by kernel), with
the info from PCI device 00:04.0. Which is wrong.

The fix is to attach the port already registered by the primary process:
iterate the rte_eth_dev_data[], and get the port id who's PCI ID matches
the current PCI device.

This would let us maintain same port ID for the same PCI device, keeping
the chance of referencing to wrong data minimal.

Fixes: af75078fece3 ("first public release")

Cc: sta...@dpdk.org
Cc: Thomas Monjalon 
Cc: Bruce Richardson 
Cc: Ferruh Yigit 
Signed-off-by: Yuanhan Liu 
---

v3: - do not move rte_eth_dev_data_alloc to pci_probe
- rename eth_dev_attach to eth_dev_attach_secondary
- introduce eth_dev_init() for common eth_dev struct initiation
- move comment block inside the "if" block
---
 lib/librte_ether/rte_ethdev.c | 77 ++-
 1 file changed, 68 insertions(+), 9 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index fde8112..c3e65f1 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -189,6 +189,21 @@ struct rte_eth_dev *
return RTE_MAX_ETHPORTS;
 }
 
+static void
+eth_dev_init(struct rte_eth_dev *eth_dev, uint8_t port_id, const char *name)
+{
+   eth_dev->data = &rte_eth_dev_data[port_id];
+   eth_dev->attached = DEV_ATTACHED;
+   eth_dev_last_created_port = port_id;
+   nb_ports++;
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   snprintf(eth_dev->data->name, sizeof(eth_dev->data->name),
+"%s", name);
+   eth_dev->data->port_id = port_id;
+   }
+}
+
 struct rte_eth_dev *
 rte_eth_dev_allocate(const char *name)
 {
@@ -211,12 +226,41 @@ struct rte_eth_dev *
}
 
eth_dev = &rte_eth_devices[port_id];
-   eth_dev->data = &rte_eth_dev_data[port_id];
-   snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
-   eth_dev->data->port_id = port_id;
-   eth_dev->attached = DEV_ATTACHED;
-   eth_dev_last_created_port = port_id;
-   nb_ports++;
+   eth_dev_init(eth_dev, port_id, name);
+
+   return eth_dev;
+}
+
+/*
+ * Attach to a port already registered by the primary process, which
+ * makes sure that the same device would have the same port id both
+ * in the primary and secondary process.
+ */
+static struct rte_eth_dev *
+eth_dev_attach_secondary(const char *name)
+{
+   uint8_t i;
+   struct rte_eth_dev *eth_dev;
+
+   if (rte_eth_dev_data == NULL)
+   rte_eth_dev_data_alloc();
+
+   for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+   if (strcmp(rte_eth_dev_data[i].name, name) == 0)
+   break;
+   }
+   if (i == RTE_MAX_ETHPORTS) {
+   RTE_PMD_DEBUG_TRACE(
+   "device %s is not driven by the primary process\n",
+   name);
+   return NULL;
+   }
+
+   RTE_ASSERT(eth_dev->data->port_id == i);
+
+   eth_dev = &rte_eth_devices[i];
+   eth_dev_init(eth_dev, i, NULL);
+
return eth_dev;
 }
 
@@ -246,9 +290,24 @@ struct rte_eth_dev *
rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
sizeof(ethdev_name));
 
-   eth_dev = rte_eth_dev_allocate(ethdev_name);
-   if (eth_dev == NULL)
-   return -ENOMEM;
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   eth_dev = rte_eth_dev_allocate(ethdev_name);
+   if (eth_dev == NULL)
+   return -ENOMEM;
+   } else {
+   eth_dev = eth_dev_attach_secondary(ethdev_name);
+   if (eth_dev == NULL) {
+   /*
+* if we failed to attach a device, it means
+* the device is skipped, due t

[dpdk-dev] [PATCH v3 0/6] net/virtio: fix several multiple process issues

2017-01-06 Thread Yuanhan Liu
v3: - fixed several comments from Thomas regarding to eth_dev
- updated the release note and nic features matrix

v2: - fixed virtio 1.0 multiple process support
- fixed the case when few virtio net devices are managed by DPDK
  while few others are handled by Linux kernel.

This patch series fixes few crash issues regarding to multiple process
model. In my limited fuzzy test, now it works for both virtio 0.95 and
1.0, as well as for the case some virtio-net devices are managed by
kernel device while some others are managed by DPDK.

---
Maintaining the multiple process support is not an easy task -- you
have to be very mindful while coding -- what kind of stuff should
and should not be in shared memory. Otherwise, it's very likely the
multiple process model will be broken.

A typical example is the ops pointer, a pointer to a set of function
pointers.  Normally, it's a pointer stored in a read-only data section
of the application:

static const struct virtio_pci_ops legacy_ops = {
...,
}

The pointer, of course, may vary in different process space. If,
however, we store the pointer into shared memory, we could only
have one value for it.  Setting it from process A and accessing
it from process B would likely lead to an illegal memory access.
As a result, crash happens.

The fix is to keep those addresses locally, in a new struct,
virtio_hw_internal. By that, each process maintains it's own
version of the pointer (from its own process space). Thus,
everything would work as expected.


---
Yuanhan Liu (6):
  ethdev: fix port data mismatched in multiple process model
  net/virtio: fix wrong Rx/Tx method for secondary process
  net/virtio: store PCI operators pointer locally
  net/virtio: store IO port info locally
  net/virtio: fix multiple process support
  net/virtio: remove dead structure field

 doc/guides/nics/features/virtio.ini |  1 +
 doc/guides/rel_notes/release_17_02.rst  |  5 ++
 drivers/net/virtio/virtio_ethdev.c  | 69 +---
 drivers/net/virtio/virtio_pci.c | 81 +
 drivers/net/virtio/virtio_pci.h | 25 --
 drivers/net/virtio/virtio_user_ethdev.c |  5 +-
 drivers/net/virtio/virtqueue.h  |  2 +-
 lib/librte_ether/rte_ethdev.c   | 77 +++
 8 files changed, 204 insertions(+), 61 deletions(-)

-- 
1.9.0



[dpdk-dev] [PATCH v3 2/6] net/virtio: fix wrong Rx/Tx method for secondary process

2017-01-06 Thread Yuanhan Liu
If the primary enables the vector Rx/Tx path, the current code would
let the secondary always choose the non vector Rx/Tx path. This results
to a Rx/Tx method mismatch between primary and secondary process. Werid
errors then may happen, something like:

PMD: virtio_xmit_pkts() tx: virtqueue_enqueue error: -14

Fix it by choosing the correct Rx/Tx callbacks for the secondary process.
That is, use vector path if it's given.

Fixes: 8d8393fb1861 ("virtio: pick simple Rx/Tx")

Cc: sta...@dpdk.org
Signed-off-by: Yuanhan Liu 
---

v2: fix a checkpatch warning: use {} consistently
---
 drivers/net/virtio/virtio_ethdev.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 079fd6c..ef37ad1 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1304,7 +1304,12 @@ static int virtio_dev_xstats_get_names(struct 
rte_eth_dev *dev,
eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
 
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-   rx_func_get(eth_dev);
+   if (hw->use_simple_rxtx) {
+   eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+   eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
+   } else {
+   rx_func_get(eth_dev);
+   }
return 0;
}
 
-- 
1.9.0



[dpdk-dev] [PATCH v3 4/6] net/virtio: store IO port info locally

2017-01-06 Thread Yuanhan Liu
Like vtpci_ops, the rte_pci_ioport has to store in local memory. This
is basically for the rte_pci_device field is allocated from process
local memory, but not from shared memory.

Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_pci.c | 49 ++---
 drivers/net/virtio/virtio_pci.h |  3 ++-
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index b1f2e18..d1e9c05 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -92,17 +92,17 @@
while (length > 0) {
if (length >= 4) {
size = 4;
-   rte_eal_pci_ioport_read(&hw->io, dst, size,
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
VIRTIO_PCI_CONFIG(hw) + offset);
*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
} else if (length >= 2) {
size = 2;
-   rte_eal_pci_ioport_read(&hw->io, dst, size,
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
VIRTIO_PCI_CONFIG(hw) + offset);
*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
} else {
size = 1;
-   rte_eal_pci_ioport_read(&hw->io, dst, size,
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
VIRTIO_PCI_CONFIG(hw) + offset);
}
 
@@ -111,7 +111,7 @@
length -= size;
}
 #else
-   rte_eal_pci_ioport_read(&hw->io, dst, length,
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length,
VIRTIO_PCI_CONFIG(hw) + offset);
 #endif
 }
@@ -131,16 +131,16 @@
if (length >= 4) {
size = 4;
tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
-   rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size,
+   rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
VIRTIO_PCI_CONFIG(hw) + offset);
} else if (length >= 2) {
size = 2;
tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
-   rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size,
+   rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
VIRTIO_PCI_CONFIG(hw) + offset);
} else {
size = 1;
-   rte_eal_pci_ioport_write(&hw->io, src, size,
+   rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size,
VIRTIO_PCI_CONFIG(hw) + offset);
}
 
@@ -149,7 +149,7 @@
length -= size;
}
 #else
-   rte_eal_pci_ioport_write(&hw->io, src, length,
+   rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length,
 VIRTIO_PCI_CONFIG(hw) + offset);
 #endif
 }
@@ -159,7 +159,7 @@
 {
uint32_t dst;
 
-   rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES);
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4, 
VIRTIO_PCI_HOST_FEATURES);
return dst;
 }
 
@@ -171,7 +171,7 @@
"only 32 bit features are allowed for legacy virtio!");
return;
}
-   rte_eal_pci_ioport_write(&hw->io, &features, 4,
+   rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4,
 VIRTIO_PCI_GUEST_FEATURES);
 }
 
@@ -180,14 +180,14 @@
 {
uint8_t dst;
 
-   rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS);
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
return dst;
 }
 
 static void
 legacy_set_status(struct virtio_hw *hw, uint8_t status)
 {
-   rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS);
+   rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
 }
 
 static void
@@ -201,7 +201,7 @@
 {
uint8_t dst;
 
-   rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR);
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
return dst;
 }
 
@@ -211,8 +211,10 @@
 {
uint16_t dst;
 
-   rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
-   rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
+   rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
+VIRTIO_MSI_CONFIG_VECTOR);
+   rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2,
+   VIRTIO_MSI_CONFIG_VECTOR);
return dst;
 }
 
@@ -221,8 +223,9 @@
 {
uint16_t dst;
 
-   rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
-   rte

[dpdk-dev] [PATCH v3 3/6] net/virtio: store PCI operators pointer locally

2017-01-06 Thread Yuanhan Liu
We used to store the vtpci_ops at virtio_hw structure. The struct,
however, is stored in shared memory. That means only one value is
allowed. For the multiple process model, however, the address of
vtpci_ops should be different among different processes.

Take virtio PMD as example, the vtpci_ops is set by the primary
process, based on its own process space. If we access that address
from the secondary process, that would be an illegal memory access,
A crash then might happen.

To make the multiple process model work, we need store the vtpci_ops
in local memory but not in a shared memory. This is what the patch
does: a local virtio_hw_internal array of size RTE_MAX_ETHPORTS is
allocated. This new structure is used to store all these kind of
info in a non-shared memory. Current, we have:

- vtpci_ops

- rte_pci_ioport

- virtio pci mapped memory, such as common_cfg.

The later two will be done in coming patches. Later patches would also
set them correctly for secondary process, so that the multiple process
model could work.

Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_ethdev.c  |  9 ++---
 drivers/net/virtio/virtio_pci.c | 26 +-
 drivers/net/virtio/virtio_pci.h | 17 -
 drivers/net/virtio/virtio_user_ethdev.c |  3 ++-
 drivers/net/virtio/virtqueue.h  |  2 +-
 5 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index ef37ad1..5567aa2 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -152,6 +152,8 @@ struct rte_virtio_xstats_name_off {
 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
sizeof(rte_virtio_txq_stat_strings[0]))
 
+struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
+
 static int
 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
int *dlen, int pkt_num)
@@ -360,7 +362,7 @@ struct rte_virtio_xstats_name_off {
 * Read the virtqueue size from the Queue Size field
 * Always power of 2 and if 0 virtqueue does not exist
 */
-   vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx);
+   vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
if (vq_size == 0) {
PMD_INIT_LOG(ERR, "virtqueue does not exist");
@@ -519,7 +521,7 @@ struct rte_virtio_xstats_name_off {
}
}
 
-   if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
+   if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
PMD_INIT_LOG(ERR, "setup_queue failed");
return -EINVAL;
}
@@ -1114,7 +1116,7 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev 
*dev,
req_features);
 
/* Read device(host) feature bits */
-   host_features = hw->vtpci_ops->get_features(hw);
+   host_features = VTPCI_OPS(hw)->get_features(hw);
PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
host_features);
 
@@ -1322,6 +1324,7 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev 
*dev,
return -ENOMEM;
}
 
+   hw->port_id = eth_dev->data->port_id;
pci_dev = eth_dev->pci_dev;
 
if (pci_dev) {
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 9b47165..b1f2e18 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -537,14 +537,14 @@
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
  void *dst, int length)
 {
-   hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length);
+   VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
 }
 
 void
 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
   const void *src, int length)
 {
-   hw->vtpci_ops->write_dev_cfg(hw, offset, src, length);
+   VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
 }
 
 uint64_t
@@ -557,7 +557,7 @@
 * host all support.
 */
features = host_features & hw->guest_features;
-   hw->vtpci_ops->set_features(hw, features);
+   VTPCI_OPS(hw)->set_features(hw, features);
 
return features;
 }
@@ -565,9 +565,9 @@
 void
 vtpci_reset(struct virtio_hw *hw)
 {
-   hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+   VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
/* flush status write */
-   hw->vtpci_ops->get_status(hw);
+   VTPCI_OPS(hw)->get_status(hw);
 }
 
 void
@@ -580,21 +580,21 @@
 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
 {
if (status != VIRTIO_CONFIG_STATUS_RESET)
-   status |= hw->vtpci_ops->get_status(hw);
+   status |= VTPCI_OPS(hw)->get_status(hw);
 
-   hw->vtpci_ops->set_status(hw, status);
+   VTPCI_OPS(hw)->set_statu

[dpdk-dev] [PATCH v3 5/6] net/virtio: fix multiple process support

2017-01-06 Thread Yuanhan Liu
The introduce of virtio 1.0 support brings yet another set of ops, badly,
it's not handled correctly, that it breaks the multiple process support.

The issue is the data/function pointer may vary from different processes,
and the old used to do one time set (for primary process only). That
said, the function pointer the secondary process saw is actually from the
primary process space. Accessing it could likely result to a crash.

Kudos to the last patches, we now be able to maintain those info that may
vary among different process locally, meaning every process could have its
own copy for each of them, with the correct value set. And this is what
this patch does:

- remap the PCI (IO port for legacy device and memory map for modern
  device)

- set vtpci_ops correctly

After that, multiple process would work like a charm. (At least, it
passed my fuzzy test)

Fixes: b8f04520ad71 ("virtio: use PCI ioport API")
Fixes: d5bbeefca826 ("virtio: introduce PCI implementation structure")
Fixes: 6ba1f63b5ab0 ("virtio: support specification 1.0")

Cc: sta...@dpdk.org
Cc: Juho Snellman 
Cc: Yaron Illouz 
Reported-by: Juho Snellman 
Reported-by: Yaron Illouz 
Signed-off-by: Yuanhan Liu 
---

v3: - update release note and nic matrix

v2: - fixed PCI remap, so that virtio 1.0 also works
---
 doc/guides/nics/features/virtio.ini |  1 +
 doc/guides/rel_notes/release_17_02.rst  |  5 
 drivers/net/virtio/virtio_ethdev.c  | 53 +++--
 drivers/net/virtio/virtio_pci.c |  4 +--
 drivers/net/virtio/virtio_pci.h |  4 +++
 drivers/net/virtio/virtio_user_ethdev.c |  2 +-
 6 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/doc/guides/nics/features/virtio.ini 
b/doc/guides/nics/features/virtio.ini
index 41830c1..f5de291 100644
--- a/doc/guides/nics/features/virtio.ini
+++ b/doc/guides/nics/features/virtio.ini
@@ -22,3 +22,4 @@ ARMv8= Y
 x86-32   = Y
 x86-64   = Y
 Usage doc= Y
+Multiprocess aware   = Y
diff --git a/doc/guides/rel_notes/release_17_02.rst 
b/doc/guides/rel_notes/release_17_02.rst
index 3b65038..a4260de 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -54,6 +54,11 @@ Resolved Issues
Also, make sure to start the actual text at the margin.
=
 
+   * **fixed virtio multiple process support.**
+
+ Fixed few regressions introduced in recent releases that break the virtio
+ multiple process support.
+
 
 EAL
 ~~~
diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 5567aa2..19d4348 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1289,6 +1289,49 @@ static int virtio_dev_xstats_get_names(struct 
rte_eth_dev *dev,
 }
 
 /*
+ * Remap the PCI device again (IO port map for legacy device and
+ * memory map for modern device), so that the secondary process
+ * could have the PCI initiated correctly.
+ */
+static int
+virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
+{
+   if (hw->modern) {
+   /*
+* We don't have to re-parse the PCI config space, since
+* rte_eal_pci_map_device() makes sure the mapped address
+* in secondary process would equal to the one mapped in
+* the primary process: error will be returned if that
+* requirement is not met.
+*
+* That said, we could simply reuse all cap pointers
+* (such as dev_cfg, common_cfg, etc.) parsed from the
+* primary process, which is stored in shared memory.
+*/
+   if (rte_eal_pci_map_device(pci_dev)) {
+   PMD_INIT_LOG(DEBUG, "failed to map pci device!");
+   return -1;
+   }
+   } else {
+   if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
+   return -1;
+   }
+
+   return 0;
+}
+
+static void
+virtio_set_vtpci_ops(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
+{
+   if (pci_dev == NULL)
+   VTPCI_OPS(hw) = &virtio_user_ops;
+   else if (hw->modern)
+   VTPCI_OPS(hw) = &modern_ops;
+   else
+   VTPCI_OPS(hw) = &legacy_ops;
+}
+
+/*
  * This function is based on probe() function in virtio_pci.c
  * It returns 0 on success.
  */
@@ -1296,7 +1339,7 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev 
*dev,
 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 {
struct virtio_hw *hw = eth_dev->data->dev_private;
-   struct rte_pci_device *pci_dev;
+   struct rte_pci_device *pci_dev = eth_dev->pci_dev;
uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
int ret;
 
@@ -1306,6 +1349,13 @@ static int virtio_dev_xstats_get_names(struct 
rte_eth_dev *dev,
eth_dev->tx_pkt_

[dpdk-dev] [PATCH v3 6/6] net/virtio: remove dead structure field

2017-01-06 Thread Yuanhan Liu
Actually, virtio_hw->dev is not used since the beginning when it's
introduced. Remove it.

Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_pci.c | 2 --
 drivers/net/virtio/virtio_pci.h | 1 -
 2 files changed, 3 deletions(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index f5754e5..fbdb5b7 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -730,8 +730,6 @@
 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
   uint32_t *dev_flags)
 {
-   hw->dev = dev;
-
/*
 * Try if we can succeed reading virtio pci caps, which exists
 * only on modern pci device. If failed, we fallback to legacy
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 511a1c8..4235bef 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -258,7 +258,6 @@ struct virtio_hw {
uint32_tnotify_off_multiplier;
uint8_t *isr;
uint16_t*notify_base;
-   struct rte_pci_device *dev;
struct virtio_pci_common_cfg *common_cfg;
struct virtio_net_config *dev_cfg;
void*virtio_user_dev;
-- 
1.9.0



Re: [dpdk-dev] [PATCH v2 1/3] crypto/aesni_gcm: fix J0 padding bytes for GCM

2017-01-06 Thread De Lara Guarch, Pablo


> -Original Message-
> From: Azarewicz, PiotrX T
> Sent: Monday, January 02, 2017 8:51 AM
> To: Azarewicz, PiotrX T; Kusztal, ArkadiuszX; dev@dpdk.org
> Cc: Trahe, Fiona; De Lara Guarch, Pablo; Griffin, John; Jain, Deepak K;
> Doherty, Declan; Kusztal, ArkadiuszX
> Subject: RE: [dpdk-dev] [PATCH v2 1/3] crypto/aesni_gcm: fix J0 padding
> bytes for GCM
> 
> Hi Arek,
> 
> > > Subject: [dpdk-dev] [PATCH v2 1/3] crypto/aesni_gcm: fix J0 padding
> > > bytes for GCM
> > >
> > > This commit fixes pre-counter block (J0) padding by clearing four most
> > > significant bytes before setting initial counter value.
> > >
> > > Fixes: b2bb3597470c ("crypto/aesni_gcm: move pre-counter block to
> > > driver")
> > >
> > > Signed-off-by: Arek Kusztal 

This should be CC'd to the stable subtree. I will do it for you this time.

Thanks,
Pablo


Re: [dpdk-dev] [PATCH v2 1/3] crypto/aesni_gcm: fix J0 padding bytes for GCM

2017-01-06 Thread De Lara Guarch, Pablo
CC'ing stable mailing list.

> -Original Message-
> From: Kusztal, ArkadiuszX
> Sent: Friday, December 23, 2016 8:25 AM
> To: dev@dpdk.org
> Cc: Trahe, Fiona; De Lara Guarch, Pablo; Griffin, John; Jain, Deepak K;
> Doherty, Declan; Kusztal, ArkadiuszX
> Subject: [PATCH v2 1/3] crypto/aesni_gcm: fix J0 padding bytes for GCM
> 
> This commit fixes pre-counter block (J0) padding by clearing
> four most significant bytes before setting initial counter value.
> 
> Fixes: b2bb3597470c ("crypto/aesni_gcm: move pre-counter block to
> driver")
> 
> Signed-off-by: Arek Kusztal 
> ---
>  drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
> b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
> index dba5e15..af3d60f 100644
> --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
> +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
> @@ -40,6 +40,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "aesni_gcm_pmd_private.h"
> 
> @@ -241,7 +242,8 @@ process_gcm_crypto_op(struct aesni_gcm_qp *qp,
> struct rte_crypto_sym_op *op,
>* to set BE LSB to 1, driver expects that 16B is allocated
>*/
>   if (op->cipher.iv.length == 12) {
> - op->cipher.iv.data[15] = 1;
> + uint32_t *iv_padd = (uint32_t *)&op->cipher.iv.data[12];
> + *iv_padd = rte_bswap32(1);
>   }
> 
>   if (op->auth.aad.length != 12 && op->auth.aad.length != 8 &&
> --
> 2.1.0



Re: [dpdk-dev] [PATCH v5 01/12] eal/bus: introduce bus abstraction

2017-01-06 Thread Shreyansh Jain

On Wednesday 04 January 2017 03:22 AM, Thomas Monjalon wrote:

2016-12-26 18:53, Shreyansh Jain:

+DPDK_17.02 {
+   global:
+
+   rte_bus_list;
+   rte_eal_bus_add_device;
+   rte_eal_bus_add_driver;
+   rte_eal_bus_get;
+   rte_eal_bus_dump;
+   rte_eal_bus_register;
+   rte_eal_bus_insert_device;
+   rte_eal_bus_remove_device;
+   rte_eal_bus_remove_driver;
+   rte_eal_bus_unregister;


I think the prefix can be just rte_bus_ instead of rte_eal_bus_.


Ok. I will change these.
I thought as these symbols are part of librte_eal/*, naming should 
reflect that.





+/** Double linked list of buses */
+TAILQ_HEAD(rte_bus_list, rte_bus);
+
+/* Global Bus list */
+extern struct rte_bus_list rte_bus_list;


Why the bus list is public?


I will revisit this. When I made it public, intention was that some
element external to EAL (e.g. drivers/bus) might require to iterate
over this list. But, as of now I don't think any such user/case exists
(except test*).




+/**
+ * A structure describing a generic bus.
+ */
+struct rte_bus {
+   TAILQ_ENTRY(rte_bus) next;   /**< Next bus object in linked list */
+   struct rte_driver_list driver_list;
+/**< List of all drivers on bus */
+   struct rte_device_list device_list;
+/**< List of all devices on bus */
+   const char *name;/**< Name of the bus */
+};


I am not convinced we should link a generic bus to drivers and devices.
What do you think of having rte_pci_bus being a rte_bus and linking
with rte_pci_device and rte_pci_driver lists?


This is different from what I had in mind.
You are saying:

 Class: rte_bus
  `-> No object instantiated for this class
 Class: rte_pci_bus inheriting rte_bus
  `-> object instantiated for this class.

Here, rte_bus is being treated as an abstract class which is only 
inherited and rte_pci_bus is the base class which is instantiated.


And I was thinking:

 Class: rte_bus
  `-> Object: pci_bus (defined in */eal/eal_pci.c)

Here, rte_bus is that base class which is instantiated.

I agree that what you are suggesting is inline with current model:
 rte_device -> abstract class (no one instantiates it)
  `-> rte_pci_device -> Base class which inherits rte_device and
is instantiated.

When I choose not to create rte_pci_bus, it was because I didn't want 
another indirection in form of rte_bus->rte_pci_bus->object.
There were no 'non-generic' bus functions which were only applicable for 
rte_pci_bus. Eventually, rte_pci_bus ended up being a direct inheritance 
of rte_bus.




I'm thinking to something like that:

struct rte_bus {
TAILQ_ENTRY(rte_bus) next;
const char *name;
rte_bus_scan_t scan;
rte_bus_match_t match;
};
struct rte_pci_bus {
struct rte_bus bus;
struct rte_pci_driver_list pci_drivers;
struct rte_pci_device_list pci_devices;
};


if we go by rte_bus->rte_pci_bus->(instance of rte_pci_bus), above is 
fine. Though, I am in favor of rte_bus->(instance of rte_bus for PCI) 
because I don't see any 'non-generic' information in rte_pci_bus which 
can't be put in rte_bus.





+/** Helper for Bus registration. The constructor has higher priority than
+ * PMD constructors
+ */
+#define RTE_REGISTER_BUS(nm, bus) \
+static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \
+{\
+   (bus).name = RTE_STR(nm);\
+   rte_eal_bus_register(&bus); \
+}


By removing the lists from rte_bus as suggested above, do you still need
a priority for this constructor?


I think yes.
Even if we have rte_pci_bus as a class, object of rte_bus should be part 
of Bus list *before* registration of a driver (because, driver 
registration searches for bus by name).


(This is assuming that no global PCI/VDEV/XXX bus object is created 
which is directly used within all PCI specific bus operations).


There was another suggestion on list which was to check for existence of 
bus _within_ each driver registration and create/instantiate an object 
in case no bus is registered. I didn't like the approach so I didn't use 
it. From David [1], and me [2]


[1] http://dpdk.org/ml/archives/dev/2016-December/051689.html
[2] http://dpdk.org/ml/archives/dev/2016-December/051698.html





 struct rte_device {
TAILQ_ENTRY(rte_device) next; /**< Next device */
+   struct rte_bus *bus;  /**< Device connected to this bus */
const struct rte_driver *driver;/**< Associated driver */
int numa_node;/**< NUMA node connection */
struct rte_devargs *devargs;  /**< Device user arguments */
@@ -148,6 +149,7 @@ void rte_eal_device_remove(struct rte_device *dev);
  */
 struct rte_driver {
TAILQ_ENTRY(rte_driver) next;  /**< Next in list. */
+   struct rte_bus *bus;   /**< Bus serviced by this driver */
const char *name;   /**< Drive

Re: [dpdk-dev] [PATCH v2 3/3] crypto/qat: fix iv size in PMD capabilities

2017-01-06 Thread De Lara Guarch, Pablo
CC'ing stable mailing list.

> -Original Message-
> From: Kusztal, ArkadiuszX
> Sent: Friday, December 23, 2016 8:25 AM
> To: dev@dpdk.org
> Cc: Trahe, Fiona; De Lara Guarch, Pablo; Griffin, John; Jain, Deepak K;
> Doherty, Declan; Kusztal, ArkadiuszX
> Subject: [PATCH v2 3/3] crypto/qat: fix iv size in PMD capabilities
> 
> This patch sets iv size in qat PMD to 12 bytes to be
> conformant with nist SP800-38D.
> 
> Fixes: 26c2e4ad5ad4 ("cryptodev: add capabilities discovery")
> 
> Signed-off-by: Arek Kusztal 
> ---
>  drivers/crypto/qat/qat_crypto.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/crypto/qat/qat_crypto.c
> b/drivers/crypto/qat/qat_crypto.c
> index fa78c60..0b714ad 100644
> --- a/drivers/crypto/qat/qat_crypto.c
> +++ b/drivers/crypto/qat/qat_crypto.c
> @@ -303,8 +303,8 @@ static const struct rte_cryptodev_capabilities
> qat_pmd_capabilities[] = {
>   .increment = 8
>   },
>   .iv_size = {
> - .min = 16,
> - .max = 16,
> + .min = 12,
> + .max = 12,
>   .increment = 0
>   }
>   }, }
> --
> 2.1.0



Re: [dpdk-dev] [PATCH v2 2/3] crypto/aesni_gcm: fix iv size in PMD capabilities

2017-01-06 Thread De Lara Guarch, Pablo
CC'ing stable mailing list.

> -Original Message-
> From: Kusztal, ArkadiuszX
> Sent: Friday, December 23, 2016 8:25 AM
> To: dev@dpdk.org
> Cc: Trahe, Fiona; De Lara Guarch, Pablo; Griffin, John; Jain, Deepak K;
> Doherty, Declan; Kusztal, ArkadiuszX
> Subject: [PATCH v2 2/3] crypto/aesni_gcm: fix iv size in PMD capabilities
> 
> This patch sets iv size in aesni gcm PMD to 12 bytes to be
> conformant with nist SP800-38D.
> 
> Fixes: eec136f3c54f ("aesni_gcm: add driver for AES-GCM crypto
> operations")
> 
> Signed-off-by: Arek Kusztal 
> ---
>  drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c
> b/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c
> index e824d4b..c51f82a 100644
> --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c
> +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c
> @@ -77,8 +77,8 @@ static const struct rte_cryptodev_capabilities
> aesni_gcm_pmd_capabilities[] = {
>   .increment = 0
>   },
>   .iv_size = {
> - .min = 16,
> - .max = 16,
> + .min = 12,
> + .max = 12,
>   .increment = 0
>   }
>   }, }
> --
> 2.1.0



Re: [dpdk-dev] [PATCH v5 04/12] eal: integrate bus scan and probe with EAL

2017-01-06 Thread Shreyansh Jain

On Wednesday 04 January 2017 03:16 AM, Thomas Monjalon wrote:

2016-12-26 18:53, Shreyansh Jain:

--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -844,6 +845,9 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_intr_init() < 0)
rte_panic("Cannot init interrupt-handling thread\n");

+   if (rte_eal_bus_scan())
+   rte_panic("Cannot scan the buses for devices\n");


Yes, definitely. Just one scan functions which scan registered bus.


@@ -884,6 +888,9 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_pci_probe())
rte_panic("Cannot probe PCI\n");

+   if (rte_eal_bus_probe())
+   rte_panic("Cannot probe devices\n");
+
if (rte_eal_dev_init() < 0)
rte_panic("Cannot init pmd devices\n");


What is the benefit of initializing (probe) a device outside of the scan?
Currently, it is done in two steps, so you are keeping the same behaviour.


Yes, only for compatibility to existing model of two-step process.
Ideally, only a single step is enough (init->probe).

During the discussion in [1] also this point was raised - at that time
for VDEV and applicability to PCI.

[1] http://dpdk.org/ml/archives/dev/2016-December/051306.html

If you want, I can merge these two. I postponed it because 1) it is an
independent change and should really impact bus and 2) I was not sure
of dependency of init *before* pthread_create for all workers.



I imagine a model where the scan function decide to initialize the
device and can require some help from a callback to make this decision.
So the whitelist/blacklist policy can be implemented with callbacks at
the scan level and possibly the responsibility of the application.
Note that the callback model would be a change for a next release.



Agree. But, that is not really part of Bus patches - isn't it? Or, you
want to change that with this series?




Re: [dpdk-dev] [PATCH v2 0/3] Fix iv sizes in crypto drivers capabilities

2017-01-06 Thread De Lara Guarch, Pablo


> -Original Message-
> From: Kusztal, ArkadiuszX
> Sent: Friday, December 23, 2016 8:25 AM
> To: dev@dpdk.org
> Cc: Trahe, Fiona; De Lara Guarch, Pablo; Griffin, John; Jain, Deepak K;
> Doherty, Declan; Kusztal, ArkadiuszX
> Subject: [PATCH v2 0/3] Fix iv sizes in crypto drivers capabilities
> 
> This patchset fixes iv (initialization vector) size values in qat
> and aesni gcm pmds to be conformant with nist SP800-38D.
> 
> v2:
> - added missing signed-off-by line
> 
> Arek Kusztal (3):
>   crypto/aesni_gcm: fix J0 padding bytes for GCM
>   crypto/aesni_gcm: fix iv size in PMD capabilities
>   crypto/qat: fix iv size in PMD capabilities
> 
>  drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 4 +++-
>  drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c | 4 ++--
>  drivers/crypto/qat/qat_crypto.c  | 4 ++--
>  3 files changed, 7 insertions(+), 5 deletions(-)
> 
> --
> 2.1.0

Applied to dpdk-next-crypto.
Thanks,

Pablo


Re: [dpdk-dev] [PATCH] pmdinfogen: Resolve coverity scan forward null issue

2017-01-06 Thread Thomas Monjalon
2017-01-05 14:22, Neil Horman:
> From: Neil Horman 
> 
> Coverity issue 139593 reports a forward null dereference from a for loop
> that works with a variable previously tested for null that had no error
> handling or condition to prevent it.  Pretty obvious fix below.
> 
> Signed-off-by: Neil Horman 

Coverity issue: 139593
Fixes: 98b0fdb0ffc6 ("pmdinfogen: add buildtools and pmdinfogen utility")

Applied, thanks


Re: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion from PF

2017-01-06 Thread Iremonger, Bernard
Hi  Jinqjing, Wenzhuo,

> -Original Message-
> From: Lu, Wenzhuo
> Sent: Friday, January 6, 2017 8:20 AM
> To: Wu, Jingjing ; dev@dpdk.org
> Cc: Iremonger, Bernard 
> Subject: RE: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion
> from PF
> 
> Hi Jingjing, Bernard,
> 
> 
> > -Original Message-
> > From: Wu, Jingjing
> > Sent: Friday, January 6, 2017 8:33 AM
> > To: Lu, Wenzhuo; dev@dpdk.org
> > Cc: Iremonger, Bernard
> > Subject: RE: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN
> > insertion from PF
> >
> >
> >
> > > -Original Message-
> > > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu
> > > Sent: Tuesday, January 3, 2017 2:55 PM
> > > To: dev@dpdk.org
> > > Cc: Iremonger, Bernard 
> > > Subject: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion
> > > from PF
> > >
> > > From: Bernard Iremonger 
> > >
> > > Support inserting VF VLAN id from PF.
> > > User can call the API on PF to insert a VLAN id to a specific VF.
> > >
> > > Signed-off-by: Bernard Iremonger 
> > > ---
> > >  drivers/net/i40e/i40e_ethdev.c| 56
> > > +++
> > >  drivers/net/i40e/rte_pmd_i40e.h   | 19 +++
> > >  drivers/net/i40e/rte_pmd_i40e_version.map |  1 +
> > >  3 files changed, 76 insertions(+)
> > >
> > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > b/drivers/net/i40e/i40e_ethdev.c index 7ab1c93..31c387d 100644
> > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > @@ -10266,3 +10266,59 @@ static void
> > > i40e_set_default_mac_addr(struct rte_eth_dev *dev,
> > >   else
> > >   return -EINVAL;
> > >  }
> > > +
> > > +int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
> > > + uint16_t vlan_id)
> > > +{
> > > + struct rte_eth_dev *dev;
> > > + struct rte_eth_dev_info dev_info;
> > > + struct i40e_pf *pf;
> > > + struct i40e_hw *hw;
> > > + struct i40e_vsi *vsi;
> > > + struct i40e_vsi_context ctxt;
> > > + int ret;
> > > +
> > > + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > > +
> > > + dev = &rte_eth_devices[port];
> > > + rte_eth_dev_info_get(port, &dev_info);
> >
> > It looks dev_info is not used in this function.
> I'll delete it.

It was intended to use dev_info have a check on max_vfs.

if (vf_id >= dev_info.max_vfs)
  return -EINVAL;

Should this check be added instead of deleting dev_info ?
 
> >
> > > + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > > + hw = I40E_PF_TO_HW(pf);
> > > +
> > > + /**
> > > +  * return -ENODEV if SRIOV not enabled, VF number not configured
> > > +  * or no queue assigned.
> > > +  */
> > > + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 ||
> > > + pf->vf_nb_qps == 0)
> > > + return -ENODEV;
> > > +
> > > + if (vf_id >= pf->vf_num || !pf->vfs)
> > > + return -EINVAL;
> > > +
> > > + if (vlan_id > ETHER_MAX_VLAN_ID)
> > > + return -EINVAL;
> > > +
> > > + vsi = pf->vfs[vf_id].vsi;
> > > + if (!vsi)
> > > + return -EINVAL;
> > > +
> > > + vsi->info.valid_sections =
> > > cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
> > > + vsi->info.pvid = vlan_id;
> > > + if (vlan_id > 0)
> > > + vsi->info.port_vlan_flags |=
> > I40E_AQ_VSI_PVLAN_INSERT_PVID;
> > > + else
> > > + vsi->info.port_vlan_flags &=
> > > ~I40E_AQ_VSI_PVLAN_INSERT_PVID;
> > So, Pvid is used here for insert. Does it has any relationship with
> > vlan anti- spoof patch?
> > If so, it's better to consider how to deal with that.
> It's vlan insertion not filtering. So I think not related.
> 
> >
> > Thanks
> > Jingjing

Regards,

Bernard.



Re: [dpdk-dev] [PATCH v7 17/27] net/i40e: set VF VLAN filter from PF

2017-01-06 Thread Iremonger, Bernard
Hi Jinqjing,

> -Original Message-
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 12:37 AM
> To: Lu, Wenzhuo ; dev@dpdk.org
> Cc: Iremonger, Bernard 
> Subject: RE: [dpdk-dev] [PATCH v7 17/27] net/i40e: set VF VLAN filter from
> PF
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Iremonger, Bernard 
> > Subject: [dpdk-dev] [PATCH v7 17/27] net/i40e: set VF VLAN filter from
> > PF
> >
> > From: Bernard Iremonger 
> >
> > add rte_pmd_i40e_set_vf_vlan_filter API.
> > User can call the API on PF to enable/disable a set of VF's VLAN filters.
> >
> > Signed-off-by: Bernard Iremonger 
> > ---
> >  drivers/net/i40e/i40e_ethdev.c| 52
> > +++
> >  drivers/net/i40e/rte_pmd_i40e.h   | 22 +
> >  drivers/net/i40e/rte_pmd_i40e_version.map |  1 +
> >  3 files changed, 75 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 4d2fb20..47e03d6 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -10428,3 +10428,55 @@ int rte_pmd_i40e_set_vf_vlan_tag(uint8_t
> > port, uint16_t vf_id, uint8_t on)
> >
> > return ret;
> >  }
> > +
> > +int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
> > +   uint64_t vf_mask, uint8_t on) {
> > +   struct rte_eth_dev *dev;
> > +   struct rte_eth_dev_info dev_info;
> > +   struct i40e_pf *pf;
> > +   uint16_t pool_idx;
> > +   int ret = I40E_SUCCESS;
> > +
> > +   RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > +
> > +   dev = &rte_eth_devices[port];
> > +   rte_eth_dev_info_get(port, &dev_info);
> > +
> > +   if (vlan_id > ETHER_MAX_VLAN_ID)
> > +   return -EINVAL;
> > +
> > +   if (on > 1)
> > +   return -EINVAL;
> > +
> > +   pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > +   if ((pf->flags & I40E_FLAG_VMDQ) == 0) {
> > +   PMD_INIT_LOG(ERR, "Firmware doesn't support VMDQ");
> > +   return -ENOTSUP;
> > +   }
> > +
> > +   if (!pf->vmdq) {
> > +   PMD_INIT_LOG(INFO, "VMDQ not configured");
> > +   return -ENOTSUP;
> > +   }
> > +
> > +   for (pool_idx = 0;
> > +pool_idx < ETH_64_POOLS &&
> > +pool_idx < pf->nb_cfg_vmdq_vsi &&
> > +ret == I40E_SUCCESS;
> > +pool_idx++) {
> > +   if (vf_mask & ((uint64_t)(1ULL << pool_idx))) {
> > +   if (on)
> > +   ret = i40e_vsi_add_vlan(pf-
> >vmdq[pool_idx].vsi,
> > +   vlan_id);
> > +   else
> > +   ret = i40e_vsi_delete_vlan(
> > +   pf->vmdq[pool_idx].vsi, vlan_id);
> > +   }
> > +   }
> 
> The head is saying "set VF VLAN filter", why do you deal with VMDQ VSIs?

This API is modelled on the rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, 
uint16_t vlan_id,   uint64_t vf_mask, uint8_t on)  which seems to use VMDq. 
More investigation is needed here.

Regards,

Bernard.



Re: [dpdk-dev] [PATCH v5 05/12] eal: add probe and remove support for rte_driver

2017-01-06 Thread Shreyansh Jain

On Wednesday 04 January 2017 03:35 AM, Thomas Monjalon wrote:

2016-12-26 18:53, Shreyansh Jain:

--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -152,6 +162,8 @@ struct rte_driver {
struct rte_bus *bus;   /**< Bus serviced by this driver */
const char *name;   /**< Driver name. */
const char *alias;  /**< Driver alias. */
+   driver_probe_t *probe; /**< Probe the device */
+   driver_remove_t *remove;   /**< Remove/hotplugging the device */
 };


If I understand well, this probe function does neither scan nor match.
So it could be named init.


Current model is:

After scanning for devices and populating bus->device_list,
Bus probe does:
 `-> bus->match()
 `-> rte_driver->probe() for matched driver

For PCI drivers, '.probe = rte_eal_pci_probe'.

For example, igb_ethdev.c:

--->8---
static struct eth_driver rte_igb_pmd = {
.pci_drv = {
.driver = {
.probe = rte_eal_pci_probe,
.remove = rte_eal_pci_remove,
},
...
--->8---



I think the probe (init) and remove ops must be specific to the bus.
We can have them in rte_bus, and as an example, the pci implementation
would call the pci probe and remove ops of rte_pci_driver.


So,
---
After scanning for devices (bus->scan()):
Bus probe (rte_eal_bus_probe()):
 `-> bus->match()
 `-> bus->init() - a new fn rte_bus_pci_init()
 -> which calls rte_eal_pci_probe()
 -> and rte_pci_driver->probe()

and remove rte_driver probe and remove callbacks because they are now
redundant. (they were added in bus patches itself)
---

Is the above correct understanding of your statement?

Somehow I don't remember why I didn't do this in first place - it seems
to be better option than introducing a rte_driver->probe()/remove()
layer. I will change it (and think again why I rejected this idea in
first place). Thanks.



Please use rte_ prefix in public headers.



I am assuming you are referring to driver_probe_t/driver_remove_t.


Re: [dpdk-dev] [PATCH v7 00/17] net/i40e: consistent filter API

2017-01-06 Thread Ferruh Yigit
On 1/6/2017 5:27 AM, Beilei Xing wrote:
> The patch set depends on Adrien's Generic flow API(rte_flow).
> 
> The patches mainly finish following functions:
> 1) Store and restore all kinds of filters.
> 2) Parse all kinds of filters.
> 3) Add flow validate function.
> 4) Add flow create function.
> 5) Add flow destroy function.
> 6) Add flow flush function.
> 
> v7 changes:
>  Separate filter related code from eth_i40e_dev_init().
>  Change struct i40e_flow to ret_flow.
> 
> v6 changes:
>  Change functions' name to be more readable.
>  Add comments for parse_pattern functions to list supported rules.
>  Add comments for parse_action functions to list supported actions.
>  Add ETHTYPE check when parsing ethertype pattern.
> 
> v5 changes:
>  Change some local variable name.
>  Add removing i40e_flow_list during device unint.
>  Fix compile error when gcc compile option isn't '-O0'.
> 
> v4 changes:
>  Change I40E_TCI_MASK with 0x to align with testpmd.
>  Modidy the stats show when restoring filters.
> 
> v3 changes:
>  Set the related cause pointer to a non-NULL value when error happens.
>  Change return value when error happens.
>  Modify filter_del parameter with key.
>  Malloc filter after checking when delete a filter.
>  Delete meaningless initialization.
>  Add return value when there's error.
>  Change global variable definition.
>  Modify some function declaration.
> 
> v2 changes:
>  Add i40e_flow.c, all flow ops are implemented in the file.
>  Change the whole implementation of all parse flow functions.
>  Update error info for all flow ops.
>  Add flow_list to store flows created.
> 
> Beilei Xing (17):
>   net/i40e: store ethertype filter
>   net/i40e: store tunnel filter
>   net/i40e: store flow director filter
>   net/i40e: restore ethertype filter
>   net/i40e: restore tunnel filter
>   net/i40e: restore flow director filter
>   net/i40e: add flow validate function
>   net/i40e: parse flow director filter
>   net/i40e: parse tunnel filter
>   net/i40e: add flow create function
>   net/i40e: add flow destroy function
>   net/i40e: destroy ethertype filter
>   net/i40e: destroy tunnel filter
>   net/i40e: destroy flow directory filter
>   net/i40e: add flow flush function
>   net/i40e: flush ethertype filters
>   net/i40e: flush tunnel filters
> 
>  drivers/net/i40e/Makefile  |2 +
>  drivers/net/i40e/i40e_ethdev.c |  594 +++--
>  drivers/net/i40e/i40e_ethdev.h |  178 
>  drivers/net/i40e/i40e_fdir.c   |  140 ++-
>  drivers/net/i40e/i40e_flow.c   | 1849 
> 
>  5 files changed, 2692 insertions(+), 71 deletions(-)
>  create mode 100644 drivers/net/i40e/i40e_flow.c
> 
> Acked-by: Jingjing Wu 
> 

Series applied to dpdk-next-net/master, thanks.

(extern moved into header file)




Re: [dpdk-dev] [PATCH v5 04/12] eal: integrate bus scan and probe with EAL

2017-01-06 Thread Shreyansh Jain

On Friday 06 January 2017 04:08 PM, Shreyansh Jain wrote:

On Wednesday 04 January 2017 03:16 AM, Thomas Monjalon wrote:

2016-12-26 18:53, Shreyansh Jain:

--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -844,6 +845,9 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_intr_init() < 0)
rte_panic("Cannot init interrupt-handling thread\n");

+   if (rte_eal_bus_scan())
+   rte_panic("Cannot scan the buses for devices\n");


Yes, definitely. Just one scan functions which scan registered bus.


@@ -884,6 +888,9 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_pci_probe())
rte_panic("Cannot probe PCI\n");

+   if (rte_eal_bus_probe())
+   rte_panic("Cannot probe devices\n");
+
if (rte_eal_dev_init() < 0)
rte_panic("Cannot init pmd devices\n");


What is the benefit of initializing (probe) a device outside of the scan?
Currently, it is done in two steps, so you are keeping the same
behaviour.


Yes, only for compatibility to existing model of two-step process.
Ideally, only a single step is enough (init->probe).

During the discussion in [1] also this point was raised - at that time
for VDEV and applicability to PCI.

[1] http://dpdk.org/ml/archives/dev/2016-December/051306.html

If you want, I can merge these two. I postponed it because 1) it is an
independent change and should really impact bus and 2) I was not sure
of dependency of init *before* pthread_create for all workers.


I forgot _not_ in above - rephrasing:

If you want, I can merge these two. I postponed it because 1) it is an
independent change and should _not_ really impact bus and 2) I was not 
sure of dependency of init *before* pthread_create for all workers.






I imagine a model where the scan function decide to initialize the
device and can require some help from a callback to make this decision.
So the whitelist/blacklist policy can be implemented with callbacks at
the scan level and possibly the responsibility of the application.
Note that the callback model would be a change for a next release.



Agree. But, that is not really part of Bus patches - isn't it? Or, you
want to change that with this series?







Re: [dpdk-dev] [PATCH v7 26/27] net/i40e: fix segmentation fault in close

2017-01-06 Thread Iremonger, Bernard
Hi Jingjing,

> -Original Message-
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 1:29 AM
> To: Lu, Wenzhuo ; dev@dpdk.org
> Cc: Iremonger, Bernard ; sta...@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v7 26/27] net/i40e: fix segmentation fault in
> close
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Iremonger, Bernard ;
> sta...@dpdk.org
> > Subject: [dpdk-dev] [PATCH v7 26/27] net/i40e: fix segmentation fault
> > in close
> >
> > From: Bernard Iremonger 
> >
> > The vsi's have already been released, so the second call to
> > i40e_vsi_release results in a segmentation fault.
> > The second call to i40e_vsi_release has been removed.
> Where is the first call to release vmdq vsi?

All of the VSI's are released in the call to i40e_vsi_release(pf->main_vsi) at 
line 1895.
This function is recursive and release all the VSI's.

There is still a VSI address in pf->vmdq[i].vsi  but calling 
i40e_vsi_release(pf->vmdq[i].vsi);
Results in a segmentation fault.

> 
> Thanks
> Jingjing
> >
> > Fixes: 3cb446b4aeb2 ("i40e: free vmdq vsi when closing")
> >
> > CC: sta...@dpdk.org
> >
> > Signed-off-by: Bernard Iremonger 
> > ---
> >  drivers/net/i40e/i40e_ethdev.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index be45cfa..0b7c366 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -1882,7 +1882,6 @@ static inline void i40e_GLQF_reg_init(struct
> > i40e_hw
> > *hw)
> > i40e_vsi_release(pf->main_vsi);
> >
> > for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) {
> > -   i40e_vsi_release(pf->vmdq[i].vsi);
> > pf->vmdq[i].vsi = NULL;
> > }
> >
> > @@ -4137,6 +4136,9 @@ enum i40e_status_code
> > if (!vsi)
> > return I40E_SUCCESS;
> >
> > +   if (!vsi->adapter)
> > +   return I40E_ERR_BAD_PTR;
> > +
> > user_param = vsi->user_param;
> >
> > pf = I40E_VSI_TO_PF(vsi);
> > --
> > 1.9.3
Regards,

Bernard.


Re: [dpdk-dev] [PATCH v5 08/12] eal/pci: generalize args of PCI scan/match towards RTE device/driver

2017-01-06 Thread Shreyansh Jain

On Wednesday 04 January 2017 03:43 AM, Thomas Monjalon wrote:

2016-12-26 18:54, Shreyansh Jain:

PCI scan and match now work on rte_device/rte_driver rather than PCI
specific objects. These functions can now be plugged to the generic
bus callbacks for scanning and matching devices/drivers.


These sentences looks weird :)
PCI functions must work with PCI objects, it's simpler.
However I agree to register PCI scan, match, init and remove functions
with the generic rte_bus. Then the rte_device object is casted into
rte_pci_device inside these functions.



Ok. I will rephrase.


[dpdk-dev] [PATCH] examples/ip_pipeline: check vlan and mpls params

2017-01-06 Thread Jyoti, Anand B
The attached patch checks VLAN IDs and MPLS label for max value and also checks 
the max number of supported MPLS labels to avoid array overflow in the CLI 
command line parameters.

Regards,
Anand B Jyoti




Re: [dpdk-dev] [PATCH v7 10/27] net/i40e: set VF MAC from PF support

2017-01-06 Thread Ferruh Yigit
On 1/6/2017 12:32 AM, Wu, Jingjing wrote:
> 
> 
>> -Original Message-
>> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wenzhuo Lu
>> Sent: Tuesday, January 3, 2017 2:55 PM
>> To: dev@dpdk.org
>> Cc: Yigit, Ferruh 
>> Subject: [dpdk-dev] [PATCH v7 10/27] net/i40e: set VF MAC from PF support
>>
>> From: Ferruh Yigit 
>>
>> Support setting VF MAC address from PF.
>> User can call the API on PF to set a specific VF's MAC address.
>>
>> This will remove all existing MAC filters.
>>
>> Signed-off-by: Ferruh Yigit 
>> ---
>>  drivers/net/i40e/i40e_ethdev.c| 42
>> +++
>>  drivers/net/i40e/rte_pmd_i40e.h   | 19 ++
>>  drivers/net/i40e/rte_pmd_i40e_version.map |  1 +
>>  3 files changed, 62 insertions(+)
>>
>> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
>> index 9d050c8..758b574 100644
>> --- a/drivers/net/i40e/i40e_ethdev.c
>> +++ b/drivers/net/i40e/i40e_ethdev.c
>> @@ -10198,3 +10198,45 @@ static void i40e_set_default_mac_addr(struct
>> rte_eth_dev *dev,
>>
>>  return ret;
>>  }
>> +
>> +int
>> +rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id,
>> + struct ether_addr *mac_addr)
>> +{
>> +struct rte_eth_dev_info dev_info;
>> +struct i40e_mac_filter *f;
>> +struct rte_eth_dev *dev;
>> +struct i40e_pf_vf *vf;
>> +struct i40e_vsi *vsi;
>> +struct i40e_pf *pf;
>> +void *temp;
>> +
>> +if (i40e_validate_mac_addr((u8 *)mac_addr) != I40E_SUCCESS)
>> +return -EINVAL;
>> +
>> +RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
>> +
>> +dev = &rte_eth_devices[port];
>> +rte_eth_dev_info_get(port, &dev_info);
>> +
>> +if (vf_id >= dev_info.max_vfs)
>> +return -EINVAL;
>> +
>> +pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
>> +
>> +if (vf_id > pf->vf_num - 1 || !pf->vfs)
>> +return -EINVAL;
>> +
>> +vf = &pf->vfs[vf_id];
>> +vsi = vf->vsi;
>> +if (!vsi)
>> +return -EINVAL;
>> +
>> +ether_addr_copy(mac_addr, &vf->mac_addr);
> 
> Only store the mac address in vf struct?
> Are you supposing the API is called before VF is initialized?

Yes.
PF should set the VF MAC before VF initialized.

If PF sets the VF MAC after VF already initialized, new MAC address
won't be effective until next VF initialization.

> If so, it's better to comment it.

Good idea, I will.

> 
> Thanks
> Jingjing
> 



Re: [dpdk-dev] [PATCH v2 2/3] lib/librte_cryptodev: functions for new performance test application

2017-01-06 Thread De Lara Guarch, Pablo


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Slawomir
> Mrozowicz
> Sent: Thursday, January 05, 2017 4:50 PM
> To: dev@dpdk.org
> Cc: Mrozowicz, SlawomirX; Doherty, Declan; Kerlin, Marcin
> Subject: [dpdk-dev] [PATCH v2 2/3] lib/librte_cryptodev: functions for new
> performance test application
> 
> This patch adds helper functions for new performance application.
> Application can be used to measute throughput and latency of
> cryptography operation performed by crypto device.
> 
> Signed-off-by: Declan Doherty 
> Signed-off-by: Slawomir Mrozowicz 
> Signed-off-by: Marcin Kerlin 

Hi Slawomir,

You should change the title of this patch to something like: "cryptodev: add 
functions for...".
Also, maybe it would be better to say "add functions to retrieve device info",
since you are not saying much with the current title.

I have seen also that this is based on the SGL code, so this patch didn't apply 
cleanly.
Next time, specify it in the email the dependencies of the patch.

Thanks,
Pablo





Re: [dpdk-dev] [PATCH v3 1/6] ethdev: fix port data mismatched in multiple process model

2017-01-06 Thread Thomas Monjalon
2017-01-06 18:16, Yuanhan Liu:
> +static void
> +eth_dev_init(struct rte_eth_dev *eth_dev, uint8_t port_id, const char *name)
> +{
> + eth_dev->data = &rte_eth_dev_data[port_id];
> + eth_dev->attached = DEV_ATTACHED;
> + eth_dev_last_created_port = port_id;
> + nb_ports++;
> +
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> + snprintf(eth_dev->data->name, sizeof(eth_dev->data->name),
> +  "%s", name);
> + eth_dev->data->port_id = port_id;
> + }

Why not keeping eth_dev->data filling in rte_eth_dev_allocate?

> +}
> +
>  struct rte_eth_dev *
>  rte_eth_dev_allocate(const char *name)
>  {
> @@ -211,12 +226,41 @@ struct rte_eth_dev *
>   }
>  
>   eth_dev = &rte_eth_devices[port_id];

Why not moving this line in eth_dev_init?

> - eth_dev->data = &rte_eth_dev_data[port_id];
> - snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
> - eth_dev->data->port_id = port_id;
> - eth_dev->attached = DEV_ATTACHED;
> - eth_dev_last_created_port = port_id;
> - nb_ports++;
> + eth_dev_init(eth_dev, port_id, name);
> +
> + return eth_dev;
> +}

[...]
> +/*
> + * Attach to a port already registered by the primary process, which
> + * makes sure that the same device would have the same port id both
> + * in the primary and secondary process.
> + */
> +static struct rte_eth_dev *
> +eth_dev_attach_secondary(const char *name)

OK, good description

[...]
> - eth_dev = rte_eth_dev_allocate(ethdev_name);
> - if (eth_dev == NULL)
> - return -ENOMEM;
> + if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> + eth_dev = rte_eth_dev_allocate(ethdev_name);
> + if (eth_dev == NULL)
> + return -ENOMEM;

You could merge here the rest of primary init below.

> + } else {
> + eth_dev = eth_dev_attach_secondary(ethdev_name);
> + if (eth_dev == NULL) {
> + /*
> +  * if we failed to attach a device, it means
> +  * the device is skipped, due to some errors.
> +  * Take virtio-net device as example, it could
> +  * due to the device is managed by virtio-net
> +  * kernel driver.  For such case, we return a
> +  * positive value, to let EAL skip it as well.
> +  */

I'm not sure we need an example here.
Is the virtio case special?

nit: "it could due" looks to be a typo

> + return 1;
> + }
> + }
>  
>   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
>   eth_dev->data->dev_private = rte_zmalloc("ethdev private 
> structure",




Re: [dpdk-dev] [PATCH v2 1/5] net/qede: fix scatter-gather issue

2017-01-06 Thread Ferruh Yigit
On 1/6/2017 8:16 AM, Rasesh Mody wrote:
> From: Harish Patil 
> 
>  - Make qede_process_sg_pkts() inline and add unlikely check
>  - Fix mbuf segment chaining logic in qede_process_sg_pkts()
>  - Change qede_encode_sg_bd() to return total segments required
>  - Fix first TX buffer descriptor's length
>  - Replace repeatitive code using a macro
> 
> Fixes: bec0228816c0 ("net/qede: support scatter gather")
> 
> Signed-off-by: Harish Patil 

Series applied to dpdk-next-net/master, thanks.



Re: [dpdk-dev] [PATCH v3] net/mlx5: add support for ConnectX-5 NICs

2017-01-06 Thread Ferruh Yigit
On 1/6/2017 8:50 AM, Adrien Mazarguil wrote:
> On Thu, Jan 05, 2017 at 04:49:31PM -0800, Yongseok Koh wrote:
>> Add PCI device ID for ConnectX-5 and enable multi-packet send for PF and VF
>> along with changing documentation and release note.
>>
>> Signed-off-by: Yongseok Koh 
> 
> Acked-by: Adrien Mazarguil 

Applied to dpdk-next-net/master, thanks.



Re: [dpdk-dev] [PATCH v5 04/12] eal: integrate bus scan and probe with EAL

2017-01-06 Thread Thomas Monjalon
2017-01-06 17:30, Shreyansh Jain:
> On Friday 06 January 2017 04:08 PM, Shreyansh Jain wrote:
> > On Wednesday 04 January 2017 03:16 AM, Thomas Monjalon wrote:
> >> 2016-12-26 18:53, Shreyansh Jain:
> >>> --- a/lib/librte_eal/linuxapp/eal/eal.c
> >>> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> >>> @@ -844,6 +845,9 @@ rte_eal_init(int argc, char **argv)
> >>> if (rte_eal_intr_init() < 0)
> >>> rte_panic("Cannot init interrupt-handling thread\n");
> >>>
> >>> +   if (rte_eal_bus_scan())
> >>> +   rte_panic("Cannot scan the buses for devices\n");
> >>
> >> Yes, definitely. Just one scan functions which scan registered bus.
> >>
> >>> @@ -884,6 +888,9 @@ rte_eal_init(int argc, char **argv)
> >>> if (rte_eal_pci_probe())
> >>> rte_panic("Cannot probe PCI\n");
> >>>
> >>> +   if (rte_eal_bus_probe())
> >>> +   rte_panic("Cannot probe devices\n");
> >>> +
> >>> if (rte_eal_dev_init() < 0)
> >>> rte_panic("Cannot init pmd devices\n");
> >>
> >> What is the benefit of initializing (probe) a device outside of the scan?
> >> Currently, it is done in two steps, so you are keeping the same
> >> behaviour.
> >
> > Yes, only for compatibility to existing model of two-step process.
> > Ideally, only a single step is enough (init->probe).
> >
> > During the discussion in [1] also this point was raised - at that time
> > for VDEV and applicability to PCI.
> >
> > [1] http://dpdk.org/ml/archives/dev/2016-December/051306.html
> >
> > If you want, I can merge these two. I postponed it because 1) it is an
> > independent change and should really impact bus and 2) I was not sure
> > of dependency of init *before* pthread_create for all workers.
> 
> I forgot _not_ in above - rephrasing:
> 
> If you want, I can merge these two. I postponed it because 1) it is an
> independent change and should _not_ really impact bus and 2) I was not 
> sure of dependency of init *before* pthread_create for all workers.

I'm OK with your approach.

> >> I imagine a model where the scan function decide to initialize the
> >> device and can require some help from a callback to make this decision.
> >> So the whitelist/blacklist policy can be implemented with callbacks at
> >> the scan level and possibly the responsibility of the application.
> >> Note that the callback model would be a change for a next release.
> >
> > Agree. But, that is not really part of Bus patches - isn't it? Or, you
> > want to change that with this series?

No it is not the scope of this series.
Please could you add it in the cover letter as a next step?
Thanks


Re: [dpdk-dev] [PATCH v5 2/6] net/mlx5: support basic flow items and actions

2017-01-06 Thread Ferruh Yigit
On 1/4/2017 6:42 PM, Adrien Mazarguil wrote:
> Hi Ferruh,
> 
> On Wed, Jan 04, 2017 at 05:49:46PM +, Ferruh Yigit wrote:
>> Hi Nelio,
>>
>> A quick question.
> 
> I'll reply since it's related to the API.
> 
>> On 12/29/2016 3:15 PM, Nelio Laranjeiro wrote:
>>> Introduce initial software for rte_flow rules.
>>>
>>> VLAN, VXLAN are still not supported.
>>>
>>> Signed-off-by: Nelio Laranjeiro 
>>> Acked-by: Adrien Mazarguil 
>>
>> <...>
>>
>>> +static int
>>> +priv_flow_validate(struct priv *priv,
>>> +  const struct rte_flow_attr *attr,
>>> +  const struct rte_flow_item items[],
>>> +  const struct rte_flow_action actions[],
>>> +  struct rte_flow_error *error,
>>> +  struct mlx5_flow *flow)
>>> +{
>>> +   const struct mlx5_flow_items *cur_item = mlx5_flow_items;
>>
>> <...>
>>
>>> +   for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
>> <...>
>>> +   }
>>> +   for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
>> <...>
>>> +   }
>>
>> Is it guarantied in somewhere that items or actions terminated with
>> TYPE_END?
> 
> Yes, since it's now the only way to terminate items/actions lists [1][2].
> There used to be a "max" value in the original draft but it seemed redundant
> and proved annoying to use, and was therefore dropped.
> 
> END items/actions behave like a NUL terminator for C strings. They are
> likewise defined with value 0 for convenience.

At least it is good idea to set END values to 0, but still if user not
set it, most probably this will crash the app.

Although most probably this kind of error will be detected easily in
development phase, still it would be nice to return an error instead of
crashing when user provide wrong input.

> 
>> And these fields are direct inputs from user.
>> Is there a way to verify user provided values are with TYPE_END terminated?
> 
> No, applications must check for its presence (they normally add it
> themselves) before feeding these lists to PMDs. I think that's safe enough.
> 
> Note the testpmd flow command does not allow entering a flow rule without
> "end" tokens in both lists, there is no way around this restriction.
> 
> [1] http://dpdk.org/doc/guides/prog_guide/rte_flow.html#matching-pattern
> [2] http://dpdk.org/doc/guides/prog_guide/rte_flow.html#actions
> 



Re: [dpdk-dev] [PATCH RFC 0/2] Allow vectorized Rx with 4096 desc ring size on Intel NICs.

2017-01-06 Thread Ferruh Yigit
On 1/6/2017 2:29 AM, Wu, Jingjing wrote:
> 
> 
>> -Original Message-
>> From: Yigit, Ferruh
>> Sent: Wednesday, January 4, 2017 1:25 AM
>> To: Thomas Monjalon ; Ilya Maximets
>> 
>> Cc: dev@dpdk.org; Zhang, Helin ; Ananyev, Konstantin
>> ; Wu, Jingjing ;
>> Heetae Ahn ; Richardson, Bruce
>> ; Lu, Wenzhuo 
>> Subject: Re: [dpdk-dev] [PATCH RFC 0/2] Allow vectorized Rx with 4096 desc 
>> ring
>> size on Intel NICs.
>>
>> On 1/2/2017 3:40 PM, Thomas Monjalon wrote:
>>> 2016-12-27 08:03, Ilya Maximets:
 Hello.
 Ferruh, Thomas, is there a chance for this to be accepted to 17.02?
 Maybe I should resend this patch-set without 'RFC' tag?
>>>
>>> Yes it should be integrated in 17.02.
>>> Ferruh, any news?
>>>
>>
>> I was waiting for a review from driver maintainers
> 
> The patch looks good me, both ixgbe and i40e.

Acked-by: Jingjing Wu 

Series applied to dpdk-next-net/master, thanks.

> 
> Thanks, Ilya.
> 
> Another idea is like, now the memory zone can be freed now.
> So maybe later, we don't need to reserve a maximum size for rx ring, and can 
> change it when number of desc are changed.
> 




Re: [dpdk-dev] [PATCH v3 1/6] ethdev: fix port data mismatched in multiple process model

2017-01-06 Thread Yuanhan Liu
On Fri, Jan 06, 2017 at 02:12:48PM +0100, Thomas Monjalon wrote:
> 2017-01-06 18:16, Yuanhan Liu:
> > +static void
> > +eth_dev_init(struct rte_eth_dev *eth_dev, uint8_t port_id, const char 
> > *name)
> > +{
> > +   eth_dev->data = &rte_eth_dev_data[port_id];
> > +   eth_dev->attached = DEV_ATTACHED;
> > +   eth_dev_last_created_port = port_id;
> > +   nb_ports++;
> > +
> > +   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +   snprintf(eth_dev->data->name, sizeof(eth_dev->data->name),
> > +"%s", name);
> > +   eth_dev->data->port_id = port_id;
> > +   }
> 
> Why not keeping eth_dev->data filling in rte_eth_dev_allocate?

I could do that.

> 
> > +}
> > +
> >  struct rte_eth_dev *
> >  rte_eth_dev_allocate(const char *name)
> >  {
> > @@ -211,12 +226,41 @@ struct rte_eth_dev *
> > }
> >  
> > eth_dev = &rte_eth_devices[port_id];
> 
> Why not moving this line in eth_dev_init?

Ditto.

> 
> > -   eth_dev->data = &rte_eth_dev_data[port_id];
> > -   snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
> > -   eth_dev->data->port_id = port_id;
> > -   eth_dev->attached = DEV_ATTACHED;
> > -   eth_dev_last_created_port = port_id;
> > -   nb_ports++;
> > +   eth_dev_init(eth_dev, port_id, name);
> > +
> > +   return eth_dev;
> > +}
> 
> [...]
> > +/*
> > + * Attach to a port already registered by the primary process, which
> > + * makes sure that the same device would have the same port id both
> > + * in the primary and secondary process.
> > + */
> > +static struct rte_eth_dev *
> > +eth_dev_attach_secondary(const char *name)
> 
> OK, good description
> 
> [...]
> > -   eth_dev = rte_eth_dev_allocate(ethdev_name);
> > -   if (eth_dev == NULL)
> > -   return -ENOMEM;
> > +   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> > +   eth_dev = rte_eth_dev_allocate(ethdev_name);
> > +   if (eth_dev == NULL)
> > +   return -ENOMEM;
> 
> You could merge here the rest of primary init below.

Oh, right.

> 
> > +   } else {
> > +   eth_dev = eth_dev_attach_secondary(ethdev_name);
> > +   if (eth_dev == NULL) {
> > +   /*
> > +* if we failed to attach a device, it means
> > +* the device is skipped, due to some errors.
> > +* Take virtio-net device as example, it could
> > +* due to the device is managed by virtio-net
> > +* kernel driver.  For such case, we return a
> > +* positive value, to let EAL skip it as well.
> > +*/
> 
> I'm not sure we need an example here.
> Is the virtio case special?

Not quite sure, likely not. I was thinking a detailed example helps
people to understand better. I could remove it if you think it's
not necessary.

> nit: "it could due" looks to be a typo

Oops.

--yliu


Re: [dpdk-dev] [PATCH v5 01/12] eal/bus: introduce bus abstraction

2017-01-06 Thread Thomas Monjalon
2017-01-06 16:01, Shreyansh Jain:
> On Wednesday 04 January 2017 03:22 AM, Thomas Monjalon wrote:
> > 2016-12-26 18:53, Shreyansh Jain:
> >> +/**
> >> + * A structure describing a generic bus.
> >> + */
> >> +struct rte_bus {
> >> +  TAILQ_ENTRY(rte_bus) next;   /**< Next bus object in linked list */
> >> +  struct rte_driver_list driver_list;
> >> +   /**< List of all drivers on bus */
> >> +  struct rte_device_list device_list;
> >> +   /**< List of all devices on bus */
> >> +  const char *name;/**< Name of the bus */
> >> +};
> >
> > I am not convinced we should link a generic bus to drivers and devices.
> > What do you think of having rte_pci_bus being a rte_bus and linking
> > with rte_pci_device and rte_pci_driver lists?
> 
> This is different from what I had in mind.
> You are saying:
> 
>   Class: rte_bus
>`-> No object instantiated for this class
>   Class: rte_pci_bus inheriting rte_bus
>`-> object instantiated for this class.
> 
> Here, rte_bus is being treated as an abstract class which is only 
> inherited and rte_pci_bus is the base class which is instantiated.
> 
> And I was thinking:
> 
>   Class: rte_bus
>`-> Object: pci_bus (defined in */eal/eal_pci.c)
> 
> Here, rte_bus is that base class which is instantiated.
> 
> I agree that what you are suggesting is inline with current model:
>   rte_device -> abstract class (no one instantiates it)
>`-> rte_pci_device -> Base class which inherits rte_device and
>  is instantiated.

Yes

> When I choose not to create rte_pci_bus, it was because I didn't want 
> another indirection in form of rte_bus->rte_pci_bus->object.
> There were no 'non-generic' bus functions which were only applicable for 
> rte_pci_bus. Eventually, rte_pci_bus ended up being a direct inheritance 
> of rte_bus.
> 
> > I'm thinking to something like that:
> >
> > struct rte_bus {
> > TAILQ_ENTRY(rte_bus) next;
> > const char *name;
> > rte_bus_scan_t scan;
> > rte_bus_match_t match;
> > };
> > struct rte_pci_bus {
> > struct rte_bus bus;
> > struct rte_pci_driver_list pci_drivers;
> > struct rte_pci_device_list pci_devices;
> > };
> 
> if we go by rte_bus->rte_pci_bus->(instance of rte_pci_bus), above is 
> fine. Though, I am in favor of rte_bus->(instance of rte_bus for PCI) 
> because I don't see any 'non-generic' information in rte_pci_bus which 
> can't be put in rte_bus.

The lists of drivers and devices are specific to the bus.
Your proposal was to list them as generic rte_driver/rte_device and
cast them. I'm just saying we can directly declare them with the right type,
e.g. rte_pci_driver/rte_pci_device.

In the same logic, the functions probe/remove are specifics for the bus and
should be declared in rte_pci_driver instead of the generic rte_driver.


> >> +/** Helper for Bus registration. The constructor has higher priority than
> >> + * PMD constructors
> >> + */
> >> +#define RTE_REGISTER_BUS(nm, bus) \
> >> +static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) 
> >> \
> >> +{\
> >> +  (bus).name = RTE_STR(nm);\
> >> +  rte_eal_bus_register(&bus); \
> >> +}
> >
> > By removing the lists from rte_bus as suggested above, do you still need
> > a priority for this constructor?
> 
> I think yes.
> Even if we have rte_pci_bus as a class, object of rte_bus should be part 
> of Bus list *before* registration of a driver (because, driver 
> registration searches for bus by name).
> 
> (This is assuming that no global PCI/VDEV/XXX bus object is created 
> which is directly used within all PCI specific bus operations).
> 
> There was another suggestion on list which was to check for existence of 
> bus _within_ each driver registration and create/instantiate an object 
> in case no bus is registered. I didn't like the approach so I didn't use 
> it. From David [1], and me [2]
> 
> [1] http://dpdk.org/ml/archives/dev/2016-December/051689.html
> [2] http://dpdk.org/ml/archives/dev/2016-December/051698.html

OK, we can keep your approach of prioritize bus registrations.
If we see an issue later, we could switch to a bus registration done
when a first driver is registered on the bus.


> >>  struct rte_device {
> >>TAILQ_ENTRY(rte_device) next; /**< Next device */
> >> +  struct rte_bus *bus;  /**< Device connected to this bus */
> >>const struct rte_driver *driver;/**< Associated driver */
> >>int numa_node;/**< NUMA node connection */
> >>struct rte_devargs *devargs;  /**< Device user arguments */
> >> @@ -148,6 +149,7 @@ void rte_eal_device_remove(struct rte_device *dev);
> >>   */
> >>  struct rte_driver {
> >>TAILQ_ENTRY(rte_driver) next;  /**< Next in list. */
> >> +  struct rte_bus *bus;   /**< Bus serviced by this driver */
> >>const char *name;   /**< Driver name. */
> >>const char *alias;  /**< Driver alias. */
> >>  

Re: [dpdk-dev] [PATCH v5 05/12] eal: add probe and remove support for rte_driver

2017-01-06 Thread Thomas Monjalon
2017-01-06 17:14, Shreyansh Jain:
> On Wednesday 04 January 2017 03:35 AM, Thomas Monjalon wrote:
> > 2016-12-26 18:53, Shreyansh Jain:
> >> --- a/lib/librte_eal/common/include/rte_dev.h
> >> +++ b/lib/librte_eal/common/include/rte_dev.h
> >> @@ -152,6 +162,8 @@ struct rte_driver {
> >>struct rte_bus *bus;   /**< Bus serviced by this driver */
> >>const char *name;   /**< Driver name. */
> >>const char *alias;  /**< Driver alias. */
> >> +  driver_probe_t *probe; /**< Probe the device */
> >> +  driver_remove_t *remove;   /**< Remove/hotplugging the device */
> >>  };
> >
> > If I understand well, this probe function does neither scan nor match.
> > So it could be named init.
> 
> Current model is:
> 
> After scanning for devices and populating bus->device_list,
> Bus probe does:
>   `-> bus->match()
>   `-> rte_driver->probe() for matched driver
> 
> For PCI drivers, '.probe = rte_eal_pci_probe'.
> 
> For example, igb_ethdev.c:
> 
> --->8---
> static struct eth_driver rte_igb_pmd = {
>  .pci_drv = {
>  .driver = {
>  .probe = rte_eal_pci_probe,
>  .remove = rte_eal_pci_remove,
>  },
> ...
> --->8---

Yes
I'm just having some doubts about the naming "probe" compared to "init".
And yes I know I was advocating to unify naming to "probe" recently :)
I would like to be sure it is not confusing for anyone.
Do you agree that "init" refers to global driver initialization and
"probe" refers to instantiating a device?

If yes, the comment could be changed from "Probe the device" to
"Check and instantiate a device".

> > I think the probe (init) and remove ops must be specific to the bus.
> > We can have them in rte_bus, and as an example, the pci implementation
> > would call the pci probe and remove ops of rte_pci_driver.

I do not understand clearly what I was saying here :/

> So,
> ---
> After scanning for devices (bus->scan()):
> Bus probe (rte_eal_bus_probe()):
>   `-> bus->match()
>   `-> bus->init() - a new fn rte_bus_pci_init()

I suggest the naming bus->probe().
It is currently implemented in rte_eal_pci_probe_one_driver().

>   -> which calls rte_eal_pci_probe()

Not needed here, this function is converted into the PCI match function.

>   -> and rte_pci_driver->probe()

Yes, bus->probe() makes some processing and calls rte_pci_driver->probe().


> and remove rte_driver probe and remove callbacks because they are now
> redundant. (they were added in bus patches itself)
> ---
> 
> Is the above correct understanding of your statement?

I think we just need to move probe/remove in rte_pci_driver.

> Somehow I don't remember why I didn't do this in first place - it seems
> to be better option than introducing a rte_driver->probe()/remove()
> layer. I will change it (and think again why I rejected this idea in
> first place). Thanks.

Thanks


Re: [dpdk-dev] [PATCH] doc: fix a link in contribution guide

2017-01-06 Thread Mcnamara, John
> -Original Message-
> From: Yongseok Koh [mailto:ys...@mellanox.com]
> Sent: Friday, January 6, 2017 1:23 AM
> To: Mcnamara, John 
> Cc: dev@dpdk.org; Yongseok Koh 
> Subject: [PATCH] doc: fix a link in contribution guide
> 
> A referenced document in the Linux Kernel has been moved to a sub-
> directory.
> 

Hi Yongseok,

Thanks for that. It is interesting to see kernel community move to RST/Sphinx.


> 
>  The DPDK development process is modelled (loosely) on the Linux Kernel
> development model so it is worth reading the  Linux kernel guide on
> submitting patches:
> -`How to Get Your Change Into the Linux Kernel 
> `_.
> +`How to Get Your Change Into the Linux Kernel
>  patches.rst>`_.


It would be better to use the Html rendered link here:

https://www.kernel.org/doc/html/latest/process/submitting-patches.html


>  The rationale for many of the DPDK guidelines is explained in greater
> detail in the kernel guidelines.
> 
> 
> @@ -192,7 +192,7 @@ Here are some guidelines for the body of a commit
> message:
>git commit --signoff # or -s
> 
>The purpose of the signoff is explained in the
> -  `Developer's Certificate of Origin
> `_
> +  `Developer's Certificate of Origin
> +  + t>`_

And here:

https://www.kernel.org/doc/html/latest/process/submitting-patches.html#developer-s-certificate-of-origin-1-1

John



Re: [dpdk-dev] [PATCH v4 5/5] ethtool: dispaly firmware version

2017-01-06 Thread Remy Horton


On 05/01/2017 01:31, Yang, Qiming wrote:
[..]

+   if (strcmp(drvinfo->driver, "net_ixgbe") == 0)


Do you need this check. I think it is not good idea to add this kind
of checks into ethtool app. Why not just print "%d.%d.%d %#X, major,
minor, patch, etrack" for all cases ?
> Qiming: because I want to keep the format same with kernel version 
ethtool.


My feeling is that if we're going to have driver-specific output, then 
the task of constructing the string should be pushed down into the PMDs. 
It would also solve the headache of trying to standardise, as Thomas has 
mentioned..


..Remy


[dpdk-dev] [PATCH] examples/ip_pipeline: check vlan and mpls params

2017-01-06 Thread Jyoti, Anand B
diff --git a/examples/ip_pipeline/pipeline/pipeline_routing.c 
b/examples/ip_pipeline/pipeline/pipeline_routing.c
index 3aadbf9..b7faf5b 100644
--- a/examples/ip_pipeline/pipeline/pipeline_routing.c
+++ b/examples/ip_pipeline/pipeline/pipeline_routing.c
@@ -494,6 +494,26 @@ app_pipeline_routing_add_route(struct app_params *app,
/* data */
if (data->port_id >= p->n_ports_out)
return -1;
+
+   /* Valid range of VLAN tags 12 bits */
+   if(data->flags & PIPELINE_ROUTING_ROUTE_QINQ)
+   if((data->l2.qinq.svlan & 0xF000) ||
+   (data->l2.qinq.cvlan & 0xF000))
+   return -1;
+
+   /* Max number of MPLS labels supported */
+   if(data->flags & PIPELINE_ROUTING_ROUTE_MPLS){
+   uint32_t i;
+
+   if(data->l2.mpls.n_labels >
+   PIPELINE_ROUTING_MPLS_LABELS_MAX)
+   return -1;
+
+   /* Max MPLS label value 20 bits */
+   for(i = 0; i < data->l2.mpls.n_labels; i++)
+   if(data->l2.mpls.labels[i] & 0xFFF0)
+   return -1;
+   }
}
break;




Re: [dpdk-dev] [PATCH 01/18] net/ixgbe: store SYN filter

2017-01-06 Thread Ferruh Yigit
On 12/22/2016 9:48 AM, Zhao1, Wei wrote:
> Hi, Yigit
> 
>> -Original Message-
>> From: Yigit, Ferruh
>> Sent: Wednesday, December 21, 2016 12:56 AM
>> To: Zhao1, Wei ; dev@dpdk.org
>> Cc: Lu, Wenzhuo 
>> Subject: Re: [dpdk-dev] [PATCH 01/18] net/ixgbe: store SYN filter
>>
>> On 12/2/2016 10:42 AM, Wei Zhao wrote:
>>> From: wei zhao1 
>>>
>>> Add support for storing SYN filter in SW.
>>
>> Do you think does it makes more clear to refer as TCP SYN filter? Or SYN 
>> filter
>> is clear enough?
>>
> 
> Ok, I will change to " TCP SYN filter " to make it more clear
> 
>>>
>>> Signed-off-by: Wenzhuo Lu 
>>> Signed-off-by: wei zhao1 
>>
>> Can you please update sign-off to your actual name?
>>
> 
> Ok, I will change to " Signed-off-by: Wei Zhao "
> 
>>> ---
>>>  drivers/net/ixgbe/ixgbe_ethdev.c | 12 ++--
>>> drivers/net/ixgbe/ixgbe_ethdev.h |  2 ++
>>>  2 files changed, 12 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
>>> b/drivers/net/ixgbe/ixgbe_ethdev.c
>>> index edc9b22..7f10cca 100644
>>> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
>>> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
>>> @@ -1287,6 +1287,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
>>> memset(filter_info->fivetuple_mask, 0,
>>>sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE);
>>>
>>> +   /* initialize SYN filter */
>>> +   filter_info->syn_info = 0;
>>
>> can it be an option to memset all filter_info? (and of course move list init
>> after memset)
>>
> 
> Maybe, change to the following code?
> 
> memset(filter_info, 0, sizeof(struct ixgbe_filter_info)); 
> TAILQ_INIT(&filter_info->fivetuple_list);
> 
> But that wiil mix /* initialize ether type filter */ and /* initialize 5tuple 
> filter list */ two process together,
> Because  struct ixgbe_filter_info store two type info of ether and 5tuple.

I see filter info consist of different filter types, but as far as I can
see they are not used before this memset, so what is the problem of
cleaning all struct?

Currently memset a sub-set of struct, and assigns zero to other field
explicitly, and rest remains undefined and unused. I am suggesting
memset whole structure and get rid of zero assignment.

> So, not to change ?
> 
> struct ixgbe_filter_info {
>   uint8_t ethertype_mask;  /* Bit mask for every used ethertype filter */
>   /* store used ethertype filters*/
>   struct ixgbe_ethertype_filter ethertype_filters[IXGBE_MAX_ETQF_FILTERS];
>   /* Bit mask for every used 5tuple filter */
>   uint32_t fivetuple_mask[IXGBE_5TUPLE_ARRAY_SIZE];
>   struct ixgbe_5tuple_filter_list fivetuple_list;
>   /* store the SYN filter info */
>   uint32_t syn_info;
> };
> 
> 
<...>


Re: [dpdk-dev] [PATCH v2 01/18] net/ixgbe: store SYN filter

2017-01-06 Thread Ferruh Yigit
On 12/30/2016 7:52 AM, Wei Zhao wrote:
> Add support for storing SYN filter in SW.

I think you have mentioned in previous review that you will update this
to "TCP SYN", I would like to remind in case it is missed.

> 
> Signed-off-by: Wenzhuo Lu 
> Signed-off-by: Wei Zhao 
> ---
> 
> v2:
> --synqf assignment location change
> ---
<...>


Re: [dpdk-dev] [PATCH v2 02/18] net/ixgbe: store flow director filter

2017-01-06 Thread Ferruh Yigit
On 12/30/2016 7:52 AM, Wei Zhao wrote:
> Add support for storing flow director filter in SW.
> 
> Signed-off-by: Wenzhuo Lu 
> Signed-off-by: Wei Zhao 
> ---
> 
> v2:
> --add a fdir initialization function in device start process
> ---
<...>
> @@ -1276,6 +1278,9 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
>  
>   /* initialize SYN filter */
>   filter_info->syn_info = 0;
> + /* initialize flow director filter list & hash */
> + ixgbe_fdir_filter_init(eth_dev);
> +
>   return 0;
>  }
>  
> @@ -1284,6 +1289,9 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
>  {
>   struct rte_pci_device *pci_dev;
>   struct ixgbe_hw *hw;
> + struct ixgbe_hw_fdir_info *fdir_info =
> + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(eth_dev->data->dev_private);
> + struct ixgbe_fdir_filter *fdir_filter;
>  
>   PMD_INIT_FUNC_TRACE();
>  
> @@ -1317,9 +1325,56 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
>   rte_free(eth_dev->data->hash_mac_addrs);
>   eth_dev->data->hash_mac_addrs = NULL;
>  
> + /* remove all the fdir filters & hash */
> + if (fdir_info->hash_map)
> + rte_free(fdir_info->hash_map);
> + if (fdir_info->hash_handle)
> + rte_hash_free(fdir_info->hash_handle);
> +
> + while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) {
> + TAILQ_REMOVE(&fdir_info->fdir_list,
> +  fdir_filter,
> +  entries);
> + rte_free(fdir_filter);
> + }
> +

What do you think extracting these into a function as done in init() ?

<...>


Re: [dpdk-dev] [PATCH v2 10/18] net/ixgbe: flush all the filters

2017-01-06 Thread Ferruh Yigit
On 12/30/2016 7:53 AM, Wei Zhao wrote:
> Add support for flush all the filters in SW.
> 
> Signed-off-by: Wenzhuo Lu 
> Signed-off-by: Wei Zhao 
> 
> ---
> 
> v2:
> --change flush filter function call relationship
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c | 118 
> ++-
>  drivers/net/ixgbe/ixgbe_ethdev.h |   9 +++
>  drivers/net/ixgbe/ixgbe_fdir.c   |  24 
>  drivers/net/ixgbe/ixgbe_pf.c |   1 +
>  4 files changed, 150 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c 
> b/drivers/net/ixgbe/ixgbe_ethdev.c
> index d68de65..0de1318 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -61,6 +61,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include "ixgbe_logs.h"
>  #include "base/ixgbe_api.h"
> @@ -386,7 +388,8 @@ static int ixgbe_dev_udp_tunnel_port_del(struct 
> rte_eth_dev *dev,
>struct rte_eth_udp_tunnel *udp_tunnel);
>  static int ixgbe_filter_restore(struct rte_eth_dev *dev);
>  static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev);
> -
> +static int ixgbe_flow_flush(struct rte_eth_dev *dev,
> + struct rte_flow_error *error);

I think it is good idea to move all rte_flow related code into a new
file, as done in i40e (i40e_flow.c)? What do you think, can it be done?

<...>



Re: [dpdk-dev] [PATCH v2 11/18] net/ixgbe: parse n-tuple filter

2017-01-06 Thread Ferruh Yigit
On 12/30/2016 7:53 AM, Wei Zhao wrote:
> Add rule validate function and check if the rule is a n-tuple rule,
> and get the n-tuple info.
> 
> Signed-off-by: Wei Zhao 
> Signed-off-by: Wenzhuo Lu 
> 
> ---
> 
> v2:add new error set function
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c | 414 
> ++-
>  1 file changed, 409 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c 
> b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 0de1318..198cc4b 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -388,6 +388,24 @@ static int ixgbe_dev_udp_tunnel_port_del(struct 
> rte_eth_dev *dev,
>struct rte_eth_udp_tunnel *udp_tunnel);

<...>

> +
> +/**
> + * Parse the rule to see if it is a n-tuple rule.
> + * And get the n-tuple filter info BTW.
> + */

It would be nice to comment here valid/expected pattern values
(spec/mask/last). Otherwise it is hard to decode from code also it is
good to document intention, so makes easy if there is any defect.

Also valid actions.

> +static int
> +cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
> +  const struct rte_flow_item pattern[],
> +  const struct rte_flow_action actions[],
> +  struct rte_eth_ntuple_filter *filter,
> +  struct rte_flow_error *error)
> +{
> + const struct rte_flow_item *item;
> + const struct rte_flow_action *act;
> + const struct rte_flow_item_ipv4 *ipv4_spec;
> + const struct rte_flow_item_ipv4 *ipv4_mask;
> + const struct rte_flow_item_tcp *tcp_spec;
> + const struct rte_flow_item_tcp *tcp_mask;
> + const struct rte_flow_item_udp *udp_spec;
> + const struct rte_flow_item_udp *udp_mask;
> + uint32_t index;
> +
> + if (!pattern) {
> + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM,
> +NULL, "NULL pattern.");
> + return -rte_errno;
> + }
> +
> + /* parse pattern */
> + index = 0;
> +
> + /* the first not void item can be MAC or IPv4 */
> + NEXT_ITEM_OF_PATTERN(item, pattern, index);
> +
> + if (item->type != RTE_FLOW_ITEM_TYPE_ETH &&
> + item->type != RTE_FLOW_ITEM_TYPE_IPV4) {
> + rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ITEM,
> + item, "Not supported by ntuple filter");
> + return -rte_errno;
> + }
> + /* Skip Ethernet */
> + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
> + /*Not supported last point for range*/
> + if (item->last) {
> + rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> + item, "Not supported last point for range");
> + return -rte_errno;
> +
> + }
> + /* if the first item is MAC, the content should be NULL */
> + if (item->spec || item->mask) {
> + rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ITEM,
> + item, "Not supported by ntuple filter");
> + return -rte_errno;
> + }
> + /* check if the next not void item is IPv4 */
> + index++;
> + NEXT_ITEM_OF_PATTERN(item, pattern, index);
> + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) {
> + rte_flow_error_set(error,
> + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
> + item, "Not supported by ntuple filter");

Wrong indentation.

> + return -rte_errno;
> + }
> + }
> +
> + /* get the IPv4 info */
> + if (!item->spec || !item->mask) {
> + rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ITEM,
> + item, "Invalid ntuple mask");
> + return -rte_errno;
> + }
> + /*Not supported last point for range*/
> + if (item->last) {
> + rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> + item, "Not supported last point for range");
> + return -rte_errno;
> +
> + }
> +
> + ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask;
> + /**
> +  * Only support src & dst addresses, protocol,
> +  * others should be masked.
> +  */
> + if (ipv4_mask->hdr.version_ihl ||
> + ipv4_mask->hdr.type_of_service ||
> + ipv4_mask->hdr.total_length ||
> + ipv4_mask->hdr.packet_id ||
> + ipv4_mask->hdr.fragment_offset ||
> + ipv4_mask->hdr.time_to_live ||
> + ipv4_mask->hdr.hdr_checksum) {
> + rte_flow_error_set(error,
> + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
> + 

Re: [dpdk-dev] [PATCH v15 0/8] add Tx preparation

2017-01-06 Thread Avi Kivity

On 01/04/2017 09:41 PM, Thomas Monjalon wrote:

2016-12-23 19:40, Tomasz Kulasek:

v15 changes:
  - marked rte_eth_tx_prepare api as experimental
  - improved doxygen comments for nb_seg_max and nb_mtu_seg_max fields
  - removed unused "uint8_t tx_prepare" declaration from testpmd

No you didn't remove this useless declaration. I did it for you.

This feature is now applied! Thanks and congratulations :)



Congrats and thanks!  This will allow us to remove some hacks from seastar.


Re: [dpdk-dev] [PATCH v2 12/18] net/ixgbe: parse ethertype filter

2017-01-06 Thread Ferruh Yigit
On 12/30/2016 7:53 AM, Wei Zhao wrote:
> check if the rule is a ethertype rule, and get the ethertype info.
> Signed-off-by: Wei Zhao 
> Signed-off-by: Wenzhuo Lu 
> 
> ---
> 
> v2:add new error set function
> ---

<...>

> +static int
> +ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr,
> +  const struct rte_flow_item pattern[],
> +  const struct rte_flow_action actions[],
> +  struct rte_eth_ethertype_filter *filter,
> +  struct rte_flow_error *error)
> +{
> + int ret;
> +
> + ret = cons_parse_ethertype_filter(attr, pattern,
> + actions, filter, error);
> +
> + if (ret)
> + return ret;
> +
> + /* Ixgbe doesn't support MAC address. */
> + if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) {
> + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter));

Is memset required for error cases, if so is other error cases also
require it?

> + rte_flow_error_set(error, EINVAL,
> + RTE_FLOW_ERROR_TYPE_ITEM,
> + NULL, "Not supported by ethertype filter");
> + return -rte_errno;
> + }
> +
> + if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM)
> + return -rte_errno;
> +
> + if (filter->ether_type == ETHER_TYPE_IPv4 ||
> + filter->ether_type == ETHER_TYPE_IPv6) {
> + PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in"
> + " ethertype filter.", filter->ether_type);

Not sure about this log here, specially it is in ERR level.
This function is returning error, and public API will return an error,
if we want to notify user with a log, should app do this as library
(here) should do this? More comment welcome?

btw, should rte_flow_error_set() used here (and below) ?

> + return -rte_errno;
> + }
> +
> + if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) {

Isn't this duplicate with above check?

> + PMD_DRV_LOG(ERR, "mac compare is unsupported.");
> + return -rte_errno;
> + }
> +
> + if (filter->flags & RTE_ETHTYPE_FLAGS_DROP) {

Just to double check, isn't drop action by ether filters?

> + PMD_DRV_LOG(ERR, "drop option is unsupported.");
> + return -rte_errno;
> + }
> +
> + return 0;
> +}
> +
> +/**
<...>


Re: [dpdk-dev] [PATCH v2 13/18] net/ixgbe: parse TCP SYN filter

2017-01-06 Thread Ferruh Yigit
On 12/30/2016 7:53 AM, Wei Zhao wrote:
> check if the rule is a TCP SYN rule, and get the SYN info.
> 
> Signed-off-by: Wei Zhao 
> Signed-off-by: Wenzhuo Lu 
> 
> ---
> 
> v2:add new error set function
> ---

<...>

> +static int
> +ixgbe_parse_syn_filter(const struct rte_flow_attr *attr,
> +  const struct rte_flow_item pattern[],
> +  const struct rte_flow_action actions[],
> +  struct rte_eth_syn_filter *filter,
> +  struct rte_flow_error *error)
> +{
> + int ret;
> +
> + ret = cons_parse_syn_filter(attr, pattern,
> + actions, filter, error);
> +

> + if (ret)
> + return ret;
> +
> + return 0;

"return ret;" will do the same.
Or remove ret completely perhaps.

> +}
> +
<...>


[dpdk-dev] [PATCH] examples/ip_pipeline: check vlan and mpls params

2017-01-06 Thread Jyoti, Anand B
>From e346e359ed9c5e8261f09f93629bff56d7c10a11 Mon Sep 17 00:00:00 2001
From: "Jyoti, Anand B" 
Date: Fri, 6 Jan 2017 08:40:55 +0530
Subject: [PATCH] examples/ip_pipeline: check vlan and mpls params

This commit add to CLI command check for the following errors
1. svlan and cvlan IDs greater than 12 bits
2. mpls ID greater than 20 bits
3. max number of supported mpls labels to avoid array overflow

It prevents running CLI commands with invalid parameters.

Signed-off-by: Jyoti, Anand B 
Acked-by: Cristian Dumitrescu 
---
 examples/ip_pipeline/pipeline/pipeline_routing.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/examples/ip_pipeline/pipeline/pipeline_routing.c 
b/examples/ip_pipeline/pipeline/pipeline_routing.c
index 3aadbf9..3deaff9 100644
--- a/examples/ip_pipeline/pipeline/pipeline_routing.c
+++ b/examples/ip_pipeline/pipeline/pipeline_routing.c
@@ -494,6 +494,26 @@ app_pipeline_routing_add_route(struct app_params *app,
/* data */
if (data->port_id >= p->n_ports_out)
return -1;
+
+   /* Valid range of VLAN tags 12 bits */
+   if (data->flags & PIPELINE_ROUTING_ROUTE_QINQ)
+   if ((data->l2.qinq.svlan & 0xF000) ||
+   (data->l2.qinq.cvlan & 0xF000))
+   return -1;
+
+   /* Max number of MPLS labels supported */
+   if (data->flags & PIPELINE_ROUTING_ROUTE_MPLS) {
+   uint32_t i;
+
+   if (data->l2.mpls.n_labels >
+   PIPELINE_ROUTING_MPLS_LABELS_MAX)
+   return -1;
+
+   /* Max MPLS label value 20 bits */
+   for (i = 0; i < data->l2.mpls.n_labels; i++)
+   if (data->l2.mpls.labels[i] & 0xFFF0)
+   return -1;
+   }
}
break;
 
-- 
2.7.4




Re: [dpdk-dev] [PATCH v2 16/18] net/ixgbe: create consistent filter

2017-01-06 Thread Ferruh Yigit
On 12/30/2016 7:53 AM, Wei Zhao wrote:
> This patch adds a function to create the flow directory filter.
> 
> Signed-off-by: Wei Zhao 
> Signed-off-by: Wenzhuo Lu 
> 
> ---
> 
> v2:
> --add new error set function
> ---

<...>

>  
> +struct ixgbe_flow {
> + enum rte_filter_type filter_type;
> + void *rule;
> +};

It is possible to rename this struct to rte_flow, to prevent casting.
Although functionally there is no difference.

> +
>  /*
>   * Structure to store private data for each driver instance (for each port).
>   */
> 



[dpdk-dev] [PATCH] net/bnxt: Add support for new PCI IDs

2017-01-06 Thread Ajit Khaparde
Add suupport for PCI IDs which was removed as a part of
commit 953158857f194991 ("net/bnxt: remove support for few devices")

Signed-off-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt_ethdev.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 7518b6b..b0cfed9 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -63,19 +63,34 @@ static const char bnxt_version[] =
 #define BROADCOM_DEV_ID_57302 0x16c9
 #define BROADCOM_DEV_ID_57304_PF 0x16ca
 #define BROADCOM_DEV_ID_57304_VF 0x16cb
+#define BROADCOM_DEV_ID_57417_MF 0x16cc
 #define BROADCOM_DEV_ID_NS2 0x16cd
+#define BROADCOM_DEV_ID_57311 0x16ce
+#define BROADCOM_DEV_ID_57312 0x16cf
 #define BROADCOM_DEV_ID_57402 0x16d0
 #define BROADCOM_DEV_ID_57404 0x16d1
 #define BROADCOM_DEV_ID_57406_PF 0x16d2
 #define BROADCOM_DEV_ID_57406_VF 0x16d3
 #define BROADCOM_DEV_ID_57402_MF 0x16d4
 #define BROADCOM_DEV_ID_57407_RJ45 0x16d5
+#define BROADCOM_DEV_ID_57412 0x16d6
+#define BROADCOM_DEV_ID_57414 0x16d7
+#define BROADCOM_DEV_ID_57416_RJ45 0x16d8
+#define BROADCOM_DEV_ID_57417_RJ45 0x16d9
 #define BROADCOM_DEV_ID_5741X_VF 0x16dc
+#define BROADCOM_DEV_ID_57412_MF 0x16de
+#define BROADCOM_DEV_ID_57314 0x16df
+#define BROADCOM_DEV_ID_57317_RJ45 0x16e0
 #define BROADCOM_DEV_ID_5731X_VF 0x16e1
+#define BROADCOM_DEV_ID_57417_SFP 0x16e2
+#define BROADCOM_DEV_ID_57416_SFP 0x16e3
+#define BROADCOM_DEV_ID_57317_SFP 0x16e4
 #define BROADCOM_DEV_ID_57404_MF 0x16e7
 #define BROADCOM_DEV_ID_57406_MF 0x16e8
 #define BROADCOM_DEV_ID_57407_SFP 0x16e9
 #define BROADCOM_DEV_ID_57407_MF 0x16ea
+#define BROADCOM_DEV_ID_57414_MF 0x16ec
+#define BROADCOM_DEV_ID_57416_MF 0x16ee
 
 static struct rte_pci_id bnxt_pci_id_map[] = {
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57301) },
@@ -95,6 +110,21 @@ static struct rte_pci_id bnxt_pci_id_map[] = {
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57407_MF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_5741X_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_5731X_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57314) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57417_MF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57311) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57312) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57412) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57414) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57416_RJ45) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57417_RJ45) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57412_MF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57317_RJ45) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57417_SFP) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57416_SFP) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57317_SFP) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57414_MF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57416_MF) },
{ .vendor_id = 0, /* sentinel */ },
 };
 
-- 
2.10.1 (Apple Git-78)



[dpdk-dev] [PATCH v2] doc: fix a link in contribution guide

2017-01-06 Thread Yongseok Koh
A referenced document in the Linux Kernel has been moved to a
sub-directory. And kernel community has moved to RST/Sphinx. The links are
replaced with HTML rendered links.

Signed-off-by: Yongseok Koh 
---
 doc/guides/contributing/patches.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/guides/contributing/patches.rst 
b/doc/guides/contributing/patches.rst
index c7006c191..193889a89 100644
--- a/doc/guides/contributing/patches.rst
+++ b/doc/guides/contributing/patches.rst
@@ -7,7 +7,7 @@ This document outlines the guidelines for submitting code to 
DPDK.
 
 The DPDK development process is modelled (loosely) on the Linux Kernel 
development model so it is worth reading the
 Linux kernel guide on submitting patches:
-`How to Get Your Change Into the Linux Kernel 
`_.
+`How to Get Your Change Into the Linux Kernel 
`_.
 The rationale for many of the DPDK guidelines is explained in greater detail 
in the kernel guidelines.
 
 
@@ -192,7 +192,7 @@ Here are some guidelines for the body of a commit message:
   git commit --signoff # or -s
 
   The purpose of the signoff is explained in the
-  `Developer's Certificate of Origin 
`_
+  `Developer's Certificate of Origin 
`_
   section of the Linux kernel guidelines.
 
   .. Note::
-- 
2.11.0



[dpdk-dev] [PATCH v2 0/5] Elastic Flow Distributor

2017-01-06 Thread Pablo de Lara
EFD is a distributor library that uses perfect hashing to determine a
target/value for a given incoming flow key. It has the following advantages:
first, because it uses perfect hashing it does not store the key itself and
hence lookup performance is not dependent on the key size. Second, the
target/value can be any arbitrary value hence the system designer and/or
operator can better optimize service rates and inter-cluster network traffic
locating.  Third, since the storage requirement is much smaller than a
hash-based flow table (i.e. better fit for CPU cache), EFD can scale to millions
of flow keys. Finally, with current optimized library implementation performance
is fully scalable with number of CPU cores.

The basic idea of EFD is when a given key is to be inserted, a family of hash
functions is searched until the correct hash function that maps the input key to
the correct value is found. However, rather than explicitly storing all keys and
their associated values, EFD stores only indices of hash functions that map keys
to values, and thereby consumes much less space than conventional  flow-based
tables. The lookup operation is very simple, similar to computational-based
scheme, given an input key the lookup operation is reduced to hashing that key
with the correct hash function.

Intuitively, finding a hash function that maps each of a large number (millions)
of input keys to the correct output value is effectively impossible, as a result
EFD, breaks the problem into smaller pieces (divide and conquer). EFD divides
the entire input key set into many small groups. Each group consists of
approximately 20-28 keys (a configurable parameter for the library), then, for
each small group, a brute force search to find a hash function that produces the
correct outputs for each key in the group.
It should be mentioned that since in the online lookup table for EFD doesn’t
store the key itself, the size of the EFD table is independent of the key size
and hence EFD lookup performance which is almost constant irrespective of the
length of the key which is a highly desirable feature especially for longer
keys.

Library code is included in the patch, plus an sample application that shows
how the library can be used.

RFC for this library was already sent:
http://dpdk.org/ml/archives/dev/2016-October/049238.html

For more information on the library, check out the following document: 
https://github.com/pablodelara/perfect_hash_flow_distributor/blob/master/EFD_description.pdf

Changes in v2:

- Added documentation for library and sample app
- Fixed checkpatch errors/warnings
- Added functional and performance tests
- Made key size variable at runtime
- Made code multi-architecture compatible at runtime


Pablo de Lara (5):
  efd: new Elastic Flow Distributor library
  app/test: add EFD functional and perf tests
  examples/flow_distributor: sample app to demonstrate EFD usage
  doc: add EFD library section in Programmers guide
  doc: add flow distributor guide

 MAINTAINERS   |9 +
 app/test/Makefile |3 +
 app/test/test_efd.c   |  494 
 app/test/test_efd_perf.c  |  407 ++
 config/common_base|5 +
 doc/api/doxy-api-index.md |3 +-
 doc/api/doxy-api.conf |1 +
 doc/api/examples.dox  |4 +
 doc/guides/prog_guide/efd_lib.rst |  413 ++
 doc/guides/prog_guide/img/efd_i1.svg  |   78 ++
 doc/guides/prog_guide/img/efd_i10.svg | 1272 +++
 doc/guides/prog_guide/img/efd_i11.svg |  413 ++
 doc/guides/prog_guide/img/efd_i12.svg |  590 +
 doc/guides/prog_guide/img/efd_i13.svg | 1407 +
 doc/guides/prog_guide/img/efd_i2.svg  |  209 +++
 doc/guides/prog_guide/img/efd_i3.svg  |  420 ++
 doc/guides/prog_guide/img/efd_i4.svg  |  364 ++
 doc/guides/prog_guide/img/efd_i5.svg  |  578 +
 doc/guides/prog_guide/img/efd_i6.svg  |  413 ++
 doc/guides/prog_guide/img/efd_i8.svg  |  776 
 doc/guides/prog_guide/img/efd_i9.svg  |  271 
 doc/guides/prog_guide/index.rst   |1 +
 doc/guides/rel_notes/release_17_02.rst|   15 +
 doc/guides/sample_app_ug/flow_distributor.rst |  492 +++
 doc/guides/sample_app_ug/img/flow_distributor.svg |  417 ++
 doc/guides/sample_app_ug/index.rst|1 +
 examples/Makefile |1 +
 examples/flow_distributor/Makefile|   44 +
 examples/flow_distributor/distributor/Makefile|   57 +
 examples/flow_distributor/distributor/args.c  |  200 +++
 examples/flow_distributor/distributor/args.h  |   39 +
 examples/flo

[dpdk-dev] [PATCH v2 2/5] app/test: add EFD functional and perf tests

2017-01-06 Thread Pablo de Lara
Signed-off-by: Byron Marohn 
Signed-off-by: Karla Saur 
Signed-off-by: Saikrishna Edupuganti 
Signed-off-by: Pablo de Lara 
---
 MAINTAINERS  |   1 +
 app/test/Makefile|   3 +
 app/test/test_efd.c  | 494 +++
 app/test/test_efd_perf.c | 407 ++
 4 files changed, 905 insertions(+)
 create mode 100644 app/test/test_efd.c
 create mode 100644 app/test/test_efd_perf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 9c60d67..d812962 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -532,6 +532,7 @@ EFD
 M: Byron Marohn 
 M: Pablo de Lara Guarch 
 F: lib/librte_efd/
+F: app/test/test_efd*
 
 Hashes
 M: Bruce Richardson 
diff --git a/app/test/Makefile b/app/test/Makefile
index 5be023a..5adb26d 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -123,6 +123,9 @@ SRCS-y += test_logs.c
 SRCS-y += test_memcpy.c
 SRCS-y += test_memcpy_perf.c
 
+SRCS-$(CONFIG_RTE_LIBRTE_EFD) += test_efd.c
+SRCS-$(CONFIG_RTE_LIBRTE_EFD) += test_efd_perf.c
+
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
diff --git a/app/test/test_efd.c b/app/test/test_efd.c
new file mode 100644
index 000..1cc8608
--- /dev/null
+++ b/app/test/test_efd.c
@@ -0,0 +1,494 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test.h"
+
+#define EFD_TEST_KEY_LEN 8
+#define TABLE_SIZE (1 << 21)
+#define ITERATIONS 3
+static unsigned int test_socket_id;
+
+/* 5-tuple key type */
+struct flow_key {
+   uint32_t ip_src;
+   uint32_t ip_dst;
+   uint16_t port_src;
+   uint16_t port_dst;
+   uint8_t proto;
+} __attribute__((packed));
+/*
+ * Print out result of unit test efd operation.
+ */
+#if defined(UNIT_TEST_EFD_VERBOSE)
+
+static void print_key_info(const char *msg, const struct flow_key *key,
+   efd_value_t val)
+{
+   const uint8_t *p = (const uint8_t *) key;
+   unsigned int i;
+
+   printf("%s key:0x", msg);
+   for (i = 0; i < sizeof(struct flow_key); i++)
+   printf("%02X", p[i]);
+
+   printf(" @ val %d\n", val);
+}
+#else
+
+static void print_key_info(__attribute__((unused)) const char *msg,
+   __attribute__((unused)) const struct flow_key *key,
+   __attribute__((unused)) efd_value_t val)
+{
+}
+#endif
+
+/* Keys used by unit test functions */
+static struct flow_key keys[5] = {
+   {
+   .ip_src = IPv4(0x03, 0x02, 0x01, 0x00),
+   .ip_dst = IPv4(0x07, 0x06, 0x05, 0x04),
+   .port_src = 0x0908,
+   .port_dst = 0x0b0a,
+   .proto = 0x0c,
+   },
+   {
+   .ip_src = IPv4(0x13, 0x12, 0x11, 0x10),
+   .ip_dst = IPv4(0x17, 0x16, 0x15, 0x14),
+   .port_src = 0x1918,
+   .port_dst = 0x1b1a,
+   .proto = 0x1c,
+   },
+   {
+   .ip_src = IPv4(0x23, 0x22, 0x21, 0x20),
+   .ip_dst = IPv4(0x27, 0x26, 0x25, 0x24),
+   .port_src = 0x2928,
+   .port_dst = 0x2b2a,
+   .proto = 0x2c,
+   },
+   {
+   .ip_src = IPv4(0x33, 0x32, 0x31, 0x30

[dpdk-dev] [PATCH v2 1/5] efd: new Elastic Flow Distributor library

2017-01-06 Thread Pablo de Lara
Elastic Flow Distributor (EFD) is a distributor library that uses
perfect hashing to determine a target/value for a given incoming flow key.
It has the following advantages:

- First, because it uses perfect hashing, it does not store
  the key itself and hence lookup performance is not dependent
  on the key size.

- Second, the target/value can be any arbitrary value hence
  the system designer and/or operator can better optimize service rates
  and inter-cluster network traffic locating.

- Third, since the storage requirement is much smaller than a hash-based
  flow table (i.e. better fit for CPU cache), EFD can scale to
  millions of flow keys.
  Finally, with current optimized library implementation performance
  is fully scalable with number of CPU cores.

Signed-off-by: Byron Marohn 
Signed-off-by: Pablo de Lara 
Signed-off-by: Saikrishna Edupuganti 
---
 MAINTAINERS |5 +
 config/common_base  |5 +
 doc/api/doxy-api-index.md   |3 +-
 doc/api/doxy-api.conf   |1 +
 doc/guides/rel_notes/release_17_02.rst  |   12 +
 lib/Makefile|1 +
 lib/librte_eal/common/include/rte_log.h |1 +
 lib/librte_efd/Makefile |   56 ++
 lib/librte_efd/rte_efd.c| 1354 +++
 lib/librte_efd/rte_efd.h|  294 +++
 lib/librte_efd/rte_efd_version.map  |   12 +
 mk/rte.app.mk   |1 +
 12 files changed, 1744 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_efd/Makefile
 create mode 100644 lib/librte_efd/rte_efd.c
 create mode 100644 lib/librte_efd/rte_efd.h
 create mode 100644 lib/librte_efd/rte_efd_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 9645c9b..9c60d67 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -528,6 +528,11 @@ F: app/test/test_acl.*
 F: examples/l3fwd-acl/
 F: doc/guides/sample_app_ug/l3_forward_access_ctrl.rst
 
+EFD
+M: Byron Marohn 
+M: Pablo de Lara Guarch 
+F: lib/librte_efd/
+
 Hashes
 M: Bruce Richardson 
 M: Pablo de Lara 
diff --git a/config/common_base b/config/common_base
index 8e9dcfa..869d8fb 100644
--- a/config/common_base
+++ b/config/common_base
@@ -467,6 +467,11 @@ CONFIG_RTE_LIBRTE_HASH=y
 CONFIG_RTE_LIBRTE_HASH_DEBUG=n
 
 #
+# Compile librte_efd
+#
+CONFIG_RTE_LIBRTE_EFD=y
+
+#
 # Compile librte_jobstats
 #
 CONFIG_RTE_LIBRTE_JOBSTATS=y
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 72d59b2..0d34e2f 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -90,7 +90,8 @@ There are many libraries, so their headers may be grouped by 
topics:
   [frag/reass] (@ref rte_ip_frag.h),
   [LPM IPv4 route] (@ref rte_lpm.h),
   [LPM IPv6 route] (@ref rte_lpm6.h),
-  [ACL](@ref rte_acl.h)
+  [ACL](@ref rte_acl.h),
+  [EFD](@ref rte_efd.h)
 
 - **QoS**:
   [metering]   (@ref rte_meter.h),
diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
index b340fcf..6892315 100644
--- a/doc/api/doxy-api.conf
+++ b/doc/api/doxy-api.conf
@@ -40,6 +40,7 @@ INPUT   = doc/api/doxy-api-index.md \
   lib/librte_compat \
   lib/librte_cryptodev \
   lib/librte_distributor \
+  lib/librte_efd \
   lib/librte_ether \
   lib/librte_hash \
   lib/librte_ip_frag \
diff --git a/doc/guides/rel_notes/release_17_02.rst 
b/doc/guides/rel_notes/release_17_02.rst
index 180af82..5023038 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -53,6 +53,18 @@ New Features
   information.
 
 
+* **Added Elastic Flow Distributor library (rte_efd).**
+
+  This new library uses perfect hashing to determine a target/value for a
+  given incoming flow key.
+
+  It does not store the key itself for lookup operations, and therefore,
+  lookup performance is not dependent on the key size. Also, the target/value
+  can be any arbitrary value (8 bits by default). Finally, the storage 
requirement
+  is much smaller than a hash-based flow table and therefore, it can better 
fit for
+  CPU cache, being able to scale to millions of flow keys.
+
+
 Resolved Issues
 ---
 
diff --git a/lib/Makefile b/lib/Makefile
index 990f23a..9a41188 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
 DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
diff --git a/lib/librte_eal/common/include/rte_log.h 
b/lib/librte_eal/common/include/rt

[dpdk-dev] [PATCH v2 3/5] examples/flow_distributor: sample app to demonstrate EFD usage

2017-01-06 Thread Pablo de Lara
This new sample app, based on the client/server sample app,
shows the user an scenario using the EFD library.
It consists of:

- A front-end server which has an EFD table that stores the
  node id for each flow key, which will distribute the incoming
  packets to the different nodes

- A back-end node, which has a hash table where node checks,
  after reading packets coming from the server, whether the packet
  is meant to be used in such node, in which case it will be TXed,
  or not, in which case, packet will be dropped.

Signed-off-by: Pablo de Lara 
Signed-off-by: Saikrishna Edupuganti 
---
 MAINTAINERS|   1 +
 doc/api/examples.dox   |   4 +
 examples/Makefile  |   1 +
 examples/flow_distributor/Makefile |  44 +++
 examples/flow_distributor/distributor/Makefile |  57 
 examples/flow_distributor/distributor/args.c   | 200 
 examples/flow_distributor/distributor/args.h   |  39 +++
 examples/flow_distributor/distributor/init.c   | 371 ++
 examples/flow_distributor/distributor/init.h   |  76 +
 examples/flow_distributor/distributor/main.c   | 362 +
 examples/flow_distributor/node/Makefile|  48 +++
 examples/flow_distributor/node/node.c  | 417 +
 examples/flow_distributor/shared/common.h  |  99 ++
 13 files changed, 1719 insertions(+)
 create mode 100644 examples/flow_distributor/Makefile
 create mode 100644 examples/flow_distributor/distributor/Makefile
 create mode 100644 examples/flow_distributor/distributor/args.c
 create mode 100644 examples/flow_distributor/distributor/args.h
 create mode 100644 examples/flow_distributor/distributor/init.c
 create mode 100644 examples/flow_distributor/distributor/init.h
 create mode 100644 examples/flow_distributor/distributor/main.c
 create mode 100644 examples/flow_distributor/node/Makefile
 create mode 100644 examples/flow_distributor/node/node.c
 create mode 100644 examples/flow_distributor/shared/common.h

diff --git a/MAINTAINERS b/MAINTAINERS
index d812962..b124f6e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -533,6 +533,7 @@ M: Byron Marohn 
 M: Pablo de Lara Guarch 
 F: lib/librte_efd/
 F: app/test/test_efd*
+F: examples/flow_distributor/
 
 Hashes
 M: Bruce Richardson 
diff --git a/doc/api/examples.dox b/doc/api/examples.dox
index 1626852..c13e574 100644
--- a/doc/api/examples.dox
+++ b/doc/api/examples.dox
@@ -52,6 +52,10 @@
 @example load_balancer/init.c
 @example load_balancer/main.c
 @example load_balancer/runtime.c
+@example flow_distributor/distributor/args.c
+@example flow_distributor/distributor/init.c
+@example flow_distributor/distributor/main.c
+@example flow_distributor/node/node.c
 @example multi_process/client_server_mp/mp_client/client.c
 @example multi_process/client_server_mp/mp_server/args.c
 @example multi_process/client_server_mp/mp_server/init.c
diff --git a/examples/Makefile b/examples/Makefile
index d49c7f2..b404982 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -45,6 +45,7 @@ DIRS-y += dpdk_qat
 endif
 DIRS-y += ethtool
 DIRS-y += exception_path
+DIRS-$(CONFIG_RTE_LIBRTE_EFD) += flow_distributor
 DIRS-y += helloworld
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += ip_pipeline
 ifeq ($(CONFIG_RTE_LIBRTE_LPM),y)
diff --git a/examples/flow_distributor/Makefile 
b/examples/flow_distributor/Makefile
new file mode 100644
index 000..d085e49
--- /dev/null
+++ b/examples/flow_distributor/Makefile
@@ -0,0 +1,44 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CA

[dpdk-dev] [PATCH v2 5/5] doc: add flow distributor guide

2017-01-06 Thread Pablo de Lara
Signed-off-by: Sameh Gobriel 
Signed-off-by: Pablo de Lara 
---
 MAINTAINERS   |   1 +
 doc/guides/sample_app_ug/flow_distributor.rst | 492 ++
 doc/guides/sample_app_ug/img/flow_distributor.svg | 417 ++
 doc/guides/sample_app_ug/index.rst|   1 +
 4 files changed, 911 insertions(+)
 create mode 100644 doc/guides/sample_app_ug/flow_distributor.rst
 create mode 100644 doc/guides/sample_app_ug/img/flow_distributor.svg

diff --git a/MAINTAINERS b/MAINTAINERS
index 66e9466..0d3b247 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -535,6 +535,7 @@ F: lib/librte_efd/
 F: doc/guides/prog_guide/efd_lib.rst
 F: app/test/test_efd*
 F: examples/flow_distributor/
+F: doc/guides/sample_app_ug/flow_distributor.rst
 
 Hashes
 M: Bruce Richardson 
diff --git a/doc/guides/sample_app_ug/flow_distributor.rst 
b/doc/guides/sample_app_ug/flow_distributor.rst
new file mode 100644
index 000..39820f0
--- /dev/null
+++ b/doc/guides/sample_app_ug/flow_distributor.rst
@@ -0,0 +1,492 @@
+..  BSD LICENSE
+Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+* Neither the name of Intel Corporation nor the names of its
+contributors may be used to endorse or promote products derived
+from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Flow Distributor Sample Application
+===
+
+This sample application demonstrates the use of EFD library as a flow-level
+load balancer, for more information about the EFD Library please refer to the
+DPDK programmer's guide.
+
+This sample application is a variant of the :ref:`client-server sample 
application `
+where a specific target node is specified for every and each flow
+(not in a round-robin fashion as the original load balancing sample 
application).
+
+Overview
+
+
+The architecture of the EFD flow-based load balancer sample application is
+presented in the following figure.
+
+.. _figure_efd_sample_app_overview:
+
+.. figure:: img/flow_distributor.*
+
+   Using EFD as a Flow-Level Load Balancer
+
+As shown in figure, the sample application consists of a front end node
+(distributor) using the EFD library to create a load-balancing table for flows,
+for each flow a target backend worker node is specified. The EFD table does not
+store the flow key (unlike a regular hash table), and hence, it can
+individually load-balance millions of flows (number of targets * maximum number
+of flows fit in a flow table per target) while still fitting in CPU cache.
+
+It should be noted that although they are referred to as nodes, the frontend
+distributor and worker nodes are processes running on the same platform.
+
+Front-end Distributor
+~
+
+Upon initializing, the frontend distributor node (process) creates a flow
+distributor table (based on the EFD library) which is populated with flow
+information and its intended target node.
+
+The sample application assigns a specific target node_id (process) for each of
+the IP destination addresses as follows:
+
+.. code-block:: c
+
+node_id = i % num_nodes; /* Target node id is generated */
+ip_dst = rte_cpu_to_be_32(i); /* Specific ip destination address is
+ assigned to this target node */
+
+then the pair of  is inserted into the flow distribution table.
+
+The main loop of the the distributor node receives a burst of packets, then for
+each packet a flow key (IP destination address) is extracted. The flow
+distributor table is looked up and the target node id is returned.  Packets are
+then enq

[dpdk-dev] [PATCH] net/enic: add support for TSO

2017-01-06 Thread John Daley
The enic TSO implementation requires that the length of the Eth/IP/TCP
headers be passed to the NIC. Other than that, it's just a matter of
setting the mss and offload mode on a per packet basis.

In TSO mode, IP and TCP checksums are offloaded even if not requested
with mb->ol_flags.

Signed-off-by: John Daley 
---
 drivers/net/enic/enic_ethdev.c |  3 +-
 drivers/net/enic/enic_rxtx.c   | 93 --
 2 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c
index e5ceb98..c3ba2aa 100644
--- a/drivers/net/enic/enic_ethdev.c
+++ b/drivers/net/enic/enic_ethdev.c
@@ -475,7 +475,8 @@ static void enicpmd_dev_info_get(struct rte_eth_dev 
*eth_dev,
DEV_TX_OFFLOAD_VLAN_INSERT |
DEV_TX_OFFLOAD_IPV4_CKSUM  |
DEV_TX_OFFLOAD_UDP_CKSUM   |
-   DEV_TX_OFFLOAD_TCP_CKSUM;
+   DEV_TX_OFFLOAD_TCP_CKSUM   |
+   DEV_TX_OFFLOAD_TCP_TSO;
device_info->default_rxconf = (struct rte_eth_rxconf) {
.rx_free_thresh = ENIC_DEFAULT_RX_FREE_THRESH
};
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index f762a26..ed2b721 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -37,6 +37,9 @@
 #include "enic_compat.h"
 #include "rq_enet_desc.h"
 #include "enic.h"
+#include 
+#include 
+#include 
 
 #define RTE_PMD_USE_PREFETCH
 
@@ -129,6 +132,60 @@ enic_cq_rx_desc_n_bytes(struct cq_desc *cqd)
CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
 }
 
+/* Find the offset to L5. This is needed by enic TSO implementation.
+ * Return 0 if not a TCP packet or can't figure out the length.
+ */
+static inline uint8_t tso_header_len(struct rte_mbuf *mbuf)
+{
+   struct ether_hdr *eh;
+   struct vlan_hdr *vh;
+   struct ipv4_hdr *ip4;
+   struct ipv6_hdr *ip6;
+   struct tcp_hdr *th;
+   uint8_t hdr_len;
+   uint16_t ether_type;
+
+   /* offset past Ethernet header */
+   eh = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+   ether_type = eh->ether_type;
+   hdr_len = sizeof(struct ether_hdr);
+   if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+   vh = rte_pktmbuf_mtod_offset(mbuf, struct vlan_hdr *, hdr_len);
+   ether_type = vh->eth_proto;
+   hdr_len += sizeof(struct vlan_hdr);
+   }
+
+   /* offset past IP header */
+   switch (rte_be_to_cpu_16(ether_type)) {
+   case ETHER_TYPE_IPv4:
+   ip4 = rte_pktmbuf_mtod_offset(mbuf, struct ipv4_hdr *, hdr_len);
+   if (ip4->next_proto_id != IPPROTO_TCP)
+   return 0;
+   hdr_len += (ip4->version_ihl & 0xf) * 4;
+   break;
+   case ETHER_TYPE_IPv6:
+   ip6 = rte_pktmbuf_mtod_offset(mbuf, struct ipv6_hdr *, hdr_len);
+   if (ip6->proto != IPPROTO_TCP)
+   return 0;
+   hdr_len += sizeof(struct ipv6_hdr);
+   break;
+   default:
+   return 0;
+   }
+
+   if ((hdr_len + sizeof(struct tcp_hdr)) > mbuf->pkt_len)
+   return 0;
+
+   /* offset past TCP header */
+   th = rte_pktmbuf_mtod_offset(mbuf, struct tcp_hdr *, hdr_len);
+   hdr_len += (th->data_off >> 4) * 4;
+
+   if (hdr_len > mbuf->pkt_len)
+   return 0;
+
+   return hdr_len;
+}
+
 static inline uint8_t
 enic_cq_rx_check_err(struct cq_desc *cqd)
 {
@@ -462,10 +519,12 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
struct vnic_wq_buf *buf;
unsigned int desc_count;
struct wq_enet_desc *descs, *desc_p, desc_tmp;
-   uint16_t mss;
+   uint16_t mss = 0;
uint8_t vlan_tag_insert;
uint8_t eop;
uint64_t bus_addr;
+   uint8_t offload_mode;
+   uint16_t header_len;
 
enic_cleanup_wq(enic, wq);
wq_desc_avail = vnic_wq_desc_avail(wq);
@@ -487,7 +546,6 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
pkt_len = tx_pkt->pkt_len;
data_len = tx_pkt->data_len;
ol_flags = tx_pkt->ol_flags;
-   mss = 0;
vlan_id = 0;
vlan_tag_insert = 0;
bus_addr = (dma_addr_t)
@@ -497,13 +555,17 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts,
desc_p = descs + head_idx;
 
eop = (data_len == pkt_len);
-
-   if (ol_flags & ol_flags_mask) {
-   if (ol_flags & PKT_TX_VLAN_PKT) {
-   vlan_tag_insert = 1;
-   vlan_id = tx_pkt->vlan_tci;
+   offload_mode = WQ_ENET_OFFLOAD_MODE_CSUM;
+   header_len = 0;
+
+   if (tx_pkt->tso_segsz) {
+   header_len = tso_header_len(tx_pkt);
+   if (