[dpdk-dev] [PATCH RFC 0/6] support of QinQ stripping and insertion of i40e

2015-05-05 Thread Helin Zhang
As i40e hardware can be reconfigured to support QinQ stripping and
insertion, this patch set is to enable that with the update of
'struct rte_mbuf', and testpmd commands.
Note that the Vector-PMD will be updated soon later.

Helin Zhang (6):
  mbuf: update mbuf structure for QinQ support
  i40e: reconfigure the hardware to support QinQ stripping/insertion
  i40e: support of QinQ stripping/insertion in RX/TX
  ethdev: add QinQ offload capability flags
  i40e: update of offload capability flags
  app/testpmd: support of QinQ stripping and insertion

 app/test-pmd/cmdline.c| 78 +---
 app/test-pmd/config.c | 23 +-
 app/test-pmd/flowgen.c|  8 ++--
 app/test-pmd/macfwd.c |  5 ++-
 app/test-pmd/macswap.c|  5 ++-
 app/test-pmd/rxonly.c |  5 ++-
 app/test-pmd/testpmd.h|  6 ++-
 app/test-pmd/txonly.c | 10 +++--
 app/test/packet_burst_generator.c |  4 +-
 lib/librte_ether/rte_ethdev.h | 28 ++--
 lib/librte_ether/rte_ether.h  |  4 +-
 lib/librte_mbuf/rte_mbuf.h| 22 +++--
 lib/librte_pmd_e1000/em_rxtx.c|  8 ++--
 lib/librte_pmd_e1000/igb_rxtx.c   |  8 ++--
 lib/librte_pmd_enic/enic_ethdev.c |  2 +-
 lib/librte_pmd_enic/enic_main.c   |  2 +-
 lib/librte_pmd_fm10k/fm10k_rxtx.c |  2 +-
 lib/librte_pmd_i40e/i40e_ethdev.c | 50 +
 lib/librte_pmd_i40e/i40e_ethdev_vf.c  | 13 ++
 lib/librte_pmd_i40e/i40e_rxtx.c   | 85 +++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +++--
 lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c |  6 +--
 22 files changed, 297 insertions(+), 88 deletions(-)

-- 
1.9.3



[dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for QinQ support

2015-05-05 Thread Helin Zhang
To support QinQ, 'vlan_tci' should be replaced by 'vlan_tci0' and
'vlan_tci1'. Also new offload flags of 'PKT_RX_QINQ_PKT' and
'PKT_TX_QINQ_PKT' should be added.

Signed-off-by: Helin Zhang 
---
 app/test-pmd/flowgen.c|  2 +-
 app/test-pmd/macfwd.c |  2 +-
 app/test-pmd/macswap.c|  2 +-
 app/test-pmd/rxonly.c |  2 +-
 app/test-pmd/txonly.c |  2 +-
 app/test/packet_burst_generator.c |  4 ++--
 lib/librte_ether/rte_ether.h  |  4 ++--
 lib/librte_mbuf/rte_mbuf.h| 22 +++---
 lib/librte_pmd_e1000/em_rxtx.c|  8 
 lib/librte_pmd_e1000/igb_rxtx.c   |  8 
 lib/librte_pmd_enic/enic_ethdev.c |  2 +-
 lib/librte_pmd_enic/enic_main.c   |  2 +-
 lib/librte_pmd_fm10k/fm10k_rxtx.c |  2 +-
 lib/librte_pmd_i40e/i40e_rxtx.c   |  8 
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +--
 lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c |  6 +++---
 16 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 72016c9..f24b00c 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -207,7 +207,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
pkt->nb_segs= 1;
pkt->pkt_len= pkt_size;
pkt->ol_flags   = ol_flags;
-   pkt->vlan_tci   = vlan_tci;
+   pkt->vlan_tci0  = vlan_tci;
pkt->l2_len = sizeof(struct ether_hdr);
pkt->l3_len = sizeof(struct ipv4_hdr);
pkts_burst[nb_pkt]  = pkt;
diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c
index 035e5eb..590b613 100644
--- a/app/test-pmd/macfwd.c
+++ b/app/test-pmd/macfwd.c
@@ -120,7 +120,7 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
mb->ol_flags = ol_flags;
mb->l2_len = sizeof(struct ether_hdr);
mb->l3_len = sizeof(struct ipv4_hdr);
-   mb->vlan_tci = txp->tx_vlan_id;
+   mb->vlan_tci0 = txp->tx_vlan_id;
}
nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
fs->tx_packets += nb_tx;
diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c
index 6729849..c355399 100644
--- a/app/test-pmd/macswap.c
+++ b/app/test-pmd/macswap.c
@@ -122,7 +122,7 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
mb->ol_flags = ol_flags;
mb->l2_len = sizeof(struct ether_hdr);
mb->l3_len = sizeof(struct ipv4_hdr);
-   mb->vlan_tci = txp->tx_vlan_id;
+   mb->vlan_tci0 = txp->tx_vlan_id;
}
nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
fs->tx_packets += nb_tx;
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index ac56090..aa2cf7f 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -159,7 +159,7 @@ pkt_burst_receive(struct fwd_stream *fs)
   mb->hash.fdir.hash, mb->hash.fdir.id);
}
if (ol_flags & PKT_RX_VLAN_PKT)
-   printf(" - VLAN tci=0x%x", mb->vlan_tci);
+   printf(" - VLAN tci=0x%x", mb->vlan_tci0);
if (is_encapsulation) {
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index ca32c85..4a2827f 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -266,7 +266,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
pkt->nb_segs = tx_pkt_nb_segs;
pkt->pkt_len = tx_pkt_length;
pkt->ol_flags = ol_flags;
-   pkt->vlan_tci  = vlan_tci;
+   pkt->vlan_tci0  = vlan_tci;
pkt->l2_len = sizeof(struct ether_hdr);
pkt->l3_len = sizeof(struct ipv4_hdr);
pkts_burst[nb_pkt] = pkt;
diff --git a/app/test/packet_burst_generator.c 
b/app/test/packet_burst_generator.c
index b46eed7..959644c 100644
--- a/app/test/packet_burst_generator.c
+++ b/app/test/packet_burst_generator.c
@@ -270,7 +270,7 @@ nomore_mbuf:
pkt->l2_len = eth_hdr_size;

if (ipv4) {
-   pkt->vlan_tci  = ETHER_TYPE_IPv4;
+   pkt->vlan_tci0  = ETHER_TYPE_IPv4;
pkt->l3_len = sizeof(struct ipv4_hdr);

if (vlan_enabled)
@@ -278,7 +278,7 @@ nomore_mbuf:
else
pkt->ol_flags = PKT_RX_IPV4_HDR;
} else {
-   pkt->vlan_tci  = ETHER_TYPE_IPv6;
+   pkt->vlan_tci0  = ETHER_TYPE_IPv6;
pkt->l3_len = sizeof(struct ipv6_hdr);

if (vlan_enabled)
diff --git a/lib/librte_ether/r

[dpdk-dev] [PATCH RFC 2/6] i40e: reconfigure the hardware to support QinQ stripping/insertion

2015-05-05 Thread Helin Zhang
Reconfiguration is needed to support QinQ stripping and insertion,
as hardware does not support them by default.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 48 +++
 1 file changed, 48 insertions(+)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index 43762f2..9b4bf06 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -211,6 +211,7 @@ static int i40e_dev_filter_ctrl(struct rte_eth_dev *dev,
void *arg);
 static void i40e_configure_registers(struct i40e_hw *hw);
 static void i40e_hw_init(struct i40e_hw *hw);
+static int i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi);

 static const struct rte_pci_id pci_id_i40e_map[] = {
 #define RTE_PCI_DEV_ID_DECL_I40E(vend, dev) {RTE_PCI_DEVICE(vend, dev)},
@@ -3055,6 +3056,7 @@ i40e_vsi_setup(struct i40e_pf *pf,
 * macvlan filter which is expected and cannot be removed.
 */
i40e_update_default_filter_setting(vsi);
+   i40e_config_qinq(hw, vsi);
} else if (type == I40E_VSI_SRIOV) {
memset(&ctxt, 0, sizeof(ctxt));
/**
@@ -3095,6 +3097,8 @@ i40e_vsi_setup(struct i40e_pf *pf,
 * Since VSI is not created yet, only configure parameter,
 * will add vsi below.
 */
+
+   i40e_config_qinq(hw, vsi);
} else if (type == I40E_VSI_VMDQ2) {
memset(&ctxt, 0, sizeof(ctxt));
/*
@@ -5714,3 +5718,47 @@ i40e_configure_registers(struct i40e_hw *hw)
"0x%"PRIx32, reg_table[i].val, reg_table[i].addr);
}
 }
+
+#define I40E_VSI_TSR(_i)(0x00050800 + ((_i) * 4))
+#define I40E_VSI_TSR_QINQ_CONFIG0xc030
+#define I40E_VSI_L2TAGSTXVALID(_i)  (0x00042800 + ((_i) * 4))
+#define I40E_VSI_L2TAGSTXVALID_QINQ 0xab
+static int
+i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi)
+{
+   uint32_t reg;
+   int ret;
+
+   if (vsi->vsi_id >= I40E_MAX_NUM_VSIS) {
+   PMD_DRV_LOG(ERR, "VSI ID exceeds the maximum");
+   return -EINVAL;
+   }
+
+   /* Configure for double VLAN RX stripping */
+   reg = I40E_READ_REG(hw, I40E_VSI_TSR(vsi->vsi_id));
+   if ((reg & I40E_VSI_TSR_QINQ_CONFIG) != I40E_VSI_TSR_QINQ_CONFIG) {
+   reg |= I40E_VSI_TSR_QINQ_CONFIG;
+   ret = i40e_aq_debug_write_register(hw,
+   I40E_VSI_TSR(vsi->vsi_id), reg, NULL);
+   if (ret < 0) {
+   PMD_DRV_LOG(ERR, "Failed to update VSI_TSR[%d]",
+   vsi->vsi_id);
+   return I40E_ERR_CONFIG;
+   }
+   }
+
+   /* Configure for double VLAN TX insertion */
+   reg = I40E_READ_REG(hw, I40E_VSI_L2TAGSTXVALID(vsi->vsi_id));
+   if ((reg & 0xff) != I40E_VSI_L2TAGSTXVALID_QINQ) {
+   reg = I40E_VSI_L2TAGSTXVALID_QINQ;
+   ret = i40e_aq_debug_write_register(hw,
+   I40E_VSI_L2TAGSTXVALID(vsi->vsi_id), reg, NULL);
+   if (ret < 0) {
+   PMD_DRV_LOG(ERR, "Failed to update "
+   "VSI_L2TAGSTXVALID[%d]", vsi->vsi_id);
+   return I40E_ERR_CONFIG;
+   }
+   }
+
+   return 0;
+}
-- 
1.9.3



[dpdk-dev] [PATCH RFC 3/6] i40e: support of QinQ stripping/insertion in RX/TX

2015-05-05 Thread Helin Zhang
To support QinQ stripping and insertion, QinQ L2 tags should be
extracted from RX descriptors and stored in mbuf for RX stripping,
and should be read from mbuf and set correspondingly in TX
descriptors.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e_rxtx.c | 83 +++--
 1 file changed, 55 insertions(+), 28 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 1fe377c..e8c96af 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -95,18 +95,41 @@ static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);

+static inline void
+i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
+{
+   if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
+   (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
+   mb->ol_flags |= PKT_RX_VLAN_PKT;
+   mb->vlan_tci0 =
+   rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
+   PMD_RX_LOG(DEBUG, "Descriptor l2tag1: %u",
+   rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1));
+   } else
+   mb->vlan_tci0 = 0;
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+   if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
+   (1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
+   mb->ol_flags |= PKT_RX_QINQ_PKT;
+   mb->vlan_tci1 = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
+   PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
+   rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_1),
+   rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2));
+   } else
+   mb->vlan_tci1 = 0;
+#endif
+   PMD_RX_LOG(DEBUG, "Mbuf vlan_tci0: %u, vlan_tci1: %u",
+   mb->vlan_tci0, mb->vlan_tci1);
+}
+
 /* Translate the rx descriptor status to pkt flags */
 static inline uint64_t
 i40e_rxd_status_to_pkt_flags(uint64_t qword)
 {
uint64_t flags;

-   /* Check if VLAN packet */
-   flags = qword & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) ?
-   PKT_RX_VLAN_PKT : 0;
-
/* Check if RSS_HASH */
-   flags |= (((qword >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
+   flags = (((qword >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
I40E_RX_DESC_FLTSTAT_RSS_HASH) ==
I40E_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;

@@ -697,16 +720,12 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
mb = rxep[j].mbuf;
qword1 = rte_le_to_cpu_64(\
rxdp[j].wb.qword1.status_error_len);
-   rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK) >>
-   I40E_RXD_QW1_STATUS_SHIFT;
pkt_len = ((qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
mb->data_len = pkt_len;
mb->pkt_len = pkt_len;
-   mb->vlan_tci0 = rx_status &
-   (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) ?
-   rte_le_to_cpu_16(\
-   rxdp[j].wb.qword0.lo_dword.l2tag1) : 0;
+   mb->ol_flags = 0;
+   i40e_rxd_to_vlan_tci(mb, &rxdp[j]);
pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
@@ -720,7 +739,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
if (pkt_flags & PKT_RX_FDIR)
pkt_flags |= i40e_rxd_build_fdir(&rxdp[j], mb);

-   mb->ol_flags = pkt_flags;
+   mb->ol_flags |= pkt_flags;
}

for (j = 0; j < I40E_LOOK_AHEAD; j++)
@@ -946,10 +965,8 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
rxm->pkt_len = rx_packet_len;
rxm->data_len = rx_packet_len;
rxm->port = rxq->port_id;
-
-   rxm->vlan_tci0 = rx_status &
-   (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) ?
-   rte_le_to_cpu_16(rxd.wb.qword0.lo_dword.l2tag1) : 0;
+   rxm->ol_flags = 0;
+   i40e_rxd_to_vlan_tci(rxm, &rxd);
pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
pkt_flags |= i40e_rxd_ptype_to_pkt_flags(qword1);
@@ -961,7 +978,7 @@ i40e_recv_pkts(void *

[dpdk-dev] [PATCH RFC 4/6] ethdev: add QinQ offload capability flags

2015-05-05 Thread Helin Zhang
As offload capabilities of QinQ stripping and insertion are
supported by some of the supported hardware, the offload capability
flags should be added accordingly.

Signed-off-by: Helin Zhang 
---
 lib/librte_ether/rte_ethdev.h | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 4648290..1855b2e 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -881,23 +881,25 @@ struct rte_eth_conf {
 /**
  * RX offload capabilities of a device.
  */
-#define DEV_RX_OFFLOAD_VLAN_STRIP  0x0001
-#define DEV_RX_OFFLOAD_IPV4_CKSUM  0x0002
-#define DEV_RX_OFFLOAD_UDP_CKSUM   0x0004
-#define DEV_RX_OFFLOAD_TCP_CKSUM   0x0008
-#define DEV_RX_OFFLOAD_TCP_LRO 0x0010
+#define DEV_RX_OFFLOAD_VLAN_STRIP   0x0001
+#define DEV_RX_OFFLOAD_QINQ_STRIP   0x0002
+#define DEV_RX_OFFLOAD_IPV4_CKSUM   0x0004
+#define DEV_RX_OFFLOAD_UDP_CKSUM0x0008
+#define DEV_RX_OFFLOAD_TCP_CKSUM0x0010
+#define DEV_RX_OFFLOAD_TCP_LRO  0x0020

 /**
  * TX offload capabilities of a device.
  */
-#define DEV_TX_OFFLOAD_VLAN_INSERT 0x0001
-#define DEV_TX_OFFLOAD_IPV4_CKSUM  0x0002
-#define DEV_TX_OFFLOAD_UDP_CKSUM   0x0004
-#define DEV_TX_OFFLOAD_TCP_CKSUM   0x0008
-#define DEV_TX_OFFLOAD_SCTP_CKSUM  0x0010
-#define DEV_TX_OFFLOAD_TCP_TSO 0x0020
-#define DEV_TX_OFFLOAD_UDP_TSO 0x0040
-#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x0080 /**< Used for tunneling 
packet. */
+#define DEV_TX_OFFLOAD_VLAN_INSERT  0x0001
+#define DEV_TX_OFFLOAD_QINQ_INSERT  0x0002
+#define DEV_TX_OFFLOAD_IPV4_CKSUM   0x0004
+#define DEV_TX_OFFLOAD_UDP_CKSUM0x0008
+#define DEV_TX_OFFLOAD_TCP_CKSUM0x0010
+#define DEV_TX_OFFLOAD_SCTP_CKSUM   0x0020
+#define DEV_TX_OFFLOAD_TCP_TSO  0x0040
+#define DEV_TX_OFFLOAD_UDP_TSO  0x0080
+#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x0100

 struct rte_eth_dev_info {
struct rte_pci_device *pci_dev; /**< Device PCI information. */
-- 
1.9.3



[dpdk-dev] [PATCH RFC 5/6] i40e: update of offload capability flags

2015-05-05 Thread Helin Zhang
As hardware supports QinQ stripping and insertion, the offload
flags of them should be added in both PF and VF sides.

Signed-off-by: Helin Zhang 
---
 lib/librte_pmd_i40e/i40e_ethdev.c|  2 ++
 lib/librte_pmd_i40e/i40e_ethdev_vf.c | 13 +
 2 files changed, 15 insertions(+)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index 9b4bf06..a980d83 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -1529,11 +1529,13 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
dev_info->max_vfs = dev->pci_dev->max_vfs;
dev_info->rx_offload_capa =
DEV_RX_OFFLOAD_VLAN_STRIP |
+   DEV_RX_OFFLOAD_QINQ_STRIP |
DEV_RX_OFFLOAD_IPV4_CKSUM |
DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM;
dev_info->tx_offload_capa =
DEV_TX_OFFLOAD_VLAN_INSERT |
+   DEV_TX_OFFLOAD_QINQ_INSERT |
DEV_TX_OFFLOAD_IPV4_CKSUM |
DEV_TX_OFFLOAD_UDP_CKSUM |
DEV_TX_OFFLOAD_TCP_CKSUM |
diff --git a/lib/librte_pmd_i40e/i40e_ethdev_vf.c 
b/lib/librte_pmd_i40e/i40e_ethdev_vf.c
index a0d808f..c623429 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev_vf.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev_vf.c
@@ -1643,6 +1643,19 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
dev_info->max_rx_pktlen = I40E_FRAME_SIZE_MAX;
dev_info->reta_size = ETH_RSS_RETA_SIZE_64;
dev_info->flow_type_rss_offloads = I40E_RSS_OFFLOAD_ALL;
+   dev_info->rx_offload_capa =
+   DEV_RX_OFFLOAD_VLAN_STRIP |
+   DEV_RX_OFFLOAD_QINQ_STRIP |
+   DEV_RX_OFFLOAD_IPV4_CKSUM |
+   DEV_RX_OFFLOAD_UDP_CKSUM |
+   DEV_RX_OFFLOAD_TCP_CKSUM;
+   dev_info->tx_offload_capa =
+   DEV_TX_OFFLOAD_VLAN_INSERT |
+   DEV_TX_OFFLOAD_QINQ_INSERT |
+   DEV_TX_OFFLOAD_IPV4_CKSUM |
+   DEV_TX_OFFLOAD_UDP_CKSUM |
+   DEV_TX_OFFLOAD_TCP_CKSUM |
+   DEV_TX_OFFLOAD_SCTP_CKSUM;

dev_info->default_rxconf = (struct rte_eth_rxconf) {
.rx_thresh = {
-- 
1.9.3



[dpdk-dev] [PATCH RFC 6/6] app/testpmd: support of QinQ stripping and insertion

2015-05-05 Thread Helin Zhang
As QinQ stripping and insertion have been supported, test commands
should be updated. In detail, "tx_vlan set vlan_id (port_id)" will
be changed to "tx_vlan set (port_id) vlan_id0[, vlan_id1]" to
support both single and double VLAN tag insertion; also VLAN tags
stripped from received packets will be printed in 'rxonly' mode.

Signed-off-by: Helin Zhang 
---
 app/test-pmd/cmdline.c | 78 +-
 app/test-pmd/config.c  | 23 +--
 app/test-pmd/flowgen.c |  8 --
 app/test-pmd/macfwd.c  |  5 +++-
 app/test-pmd/macswap.c |  5 +++-
 app/test-pmd/rxonly.c  |  3 ++
 app/test-pmd/testpmd.h |  6 +++-
 app/test-pmd/txonly.c  | 10 +--
 8 files changed, 120 insertions(+), 18 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index f01db2a..a19d32a 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -304,9 +304,9 @@ static void cmd_help_long_parsed(void *parsed_result,
"rx_vxlan_port rm (udp_port) (port_id)\n"
"Remove an UDP port for VXLAN packet filter on a 
port\n\n"

-   "tx_vlan set vlan_id (port_id)\n"
-   "Set hardware insertion of VLAN ID in packets sent"
-   " on a port.\n\n"
+   "tx_vlan set (port_id) vlan_id0[, vlan_id1]\n"
+   "Set hardware insertion of VLAN IDs (single or 
double VLAN "
+   "depends on the number of VLAN IDs) in packets sent on 
a port.\n\n"

"tx_vlan set pvid port_id vlan_id (on|off)\n"
"Set port based TX VLAN insertion.\n\n"
@@ -2799,8 +2799,8 @@ cmdline_parse_inst_t cmd_rx_vlan_filter = {
 struct cmd_tx_vlan_set_result {
cmdline_fixed_string_t tx_vlan;
cmdline_fixed_string_t set;
-   uint16_t vlan_id;
uint8_t port_id;
+   uint16_t vlan_id;
 };

 static void
@@ -2809,6 +2809,13 @@ cmd_tx_vlan_set_parsed(void *parsed_result,
   __attribute__((unused)) void *data)
 {
struct cmd_tx_vlan_set_result *res = parsed_result;
+   int vlan_offload = rte_eth_dev_get_vlan_offload(res->port_id);
+
+   if (vlan_offload & ETH_VLAN_EXTEND_OFFLOAD) {
+   printf("Error, as QinQ has been enabled.\n");
+   return;
+   }
+
tx_vlan_set(res->port_id, res->vlan_id);
 }

@@ -2828,13 +2835,69 @@ cmdline_parse_token_num_t cmd_tx_vlan_set_portid =
 cmdline_parse_inst_t cmd_tx_vlan_set = {
.f = cmd_tx_vlan_set_parsed,
.data = NULL,
-   .help_str = "enable hardware insertion of a VLAN header with a given "
-   "TAG Identifier in packets sent on a port",
+   .help_str = "enable hardware insertion of a single VLAN header "
+   "with a given TAG Identifier in packets sent on a port",
.tokens = {
(void *)&cmd_tx_vlan_set_tx_vlan,
(void *)&cmd_tx_vlan_set_set,
-   (void *)&cmd_tx_vlan_set_vlanid,
(void *)&cmd_tx_vlan_set_portid,
+   (void *)&cmd_tx_vlan_set_vlanid,
+   NULL,
+   },
+};
+
+/* *** ENABLE HARDWARE INSERTION OF Double VLAN HEADER IN TX PACKETS *** */
+struct cmd_tx_vlan_set_qinq_result {
+   cmdline_fixed_string_t tx_vlan;
+   cmdline_fixed_string_t set;
+   uint8_t port_id;
+   uint16_t vlan_id0;
+   uint16_t vlan_id1;
+};
+
+static void
+cmd_tx_vlan_set_qinq_parsed(void *parsed_result,
+   __attribute__((unused)) struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   struct cmd_tx_vlan_set_qinq_result *res = parsed_result;
+   int vlan_offload = rte_eth_dev_get_vlan_offload(res->port_id);
+
+   if (!(vlan_offload & ETH_VLAN_EXTEND_OFFLOAD)) {
+   printf("Error, as QinQ hasn't been enabled.\n");
+   return;
+   }
+
+   tx_qinq_set(res->port_id, res->vlan_id0, res->vlan_id1);
+}
+
+cmdline_parse_token_string_t cmd_tx_vlan_set_qinq_tx_vlan =
+   TOKEN_STRING_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
+   tx_vlan, "tx_vlan");
+cmdline_parse_token_string_t cmd_tx_vlan_set_qinq_set =
+   TOKEN_STRING_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
+   set, "set");
+cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_portid =
+   TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
+   port_id, UINT8);
+cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_vlanid0 =
+   TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
+   vlan_id0, UINT16);
+cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_vlanid1 =
+   TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
+   vlan_id1, UINT16);
+
+cmdline_parse_inst_t cmd_tx_vlan_set_qinq = {
+   .f = cmd_tx_vlan_set_qinq_parsed,
+   .data = NULL,
+   .help_str = "enable hardware insertion of a doubl

[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Wiles, Keith


On 5/4/15, 1:34 PM, "Marc Sune"  wrote:

>
>
>On 01/05/15 20:17, Wiles, Keith wrote:
>>
>> On 5/1/15, 1:09 PM, "Stephen Hemminger" 
>>wrote:
>>
>>> On Fri, 1 May 2015 15:56:32 +
>>> "Wiles, Keith"  wrote:
>>>
 Hi Everyone,

 I believe the DPDK community would benefit from moving to GitHub as
the
 primary DPDK site. http://github.com

 I believe the DPDK community can benefit from being at a very well
know
 world wide site. GitHub seems to have the most eyes of any of the open
 source Git repos today and it appears they have more then twice as
many
 developers. GitHub has a number of features I see as some good
 additions to
 our community using the GitHub organization account type.

 The cost for an organization account is $0 as long as we do not need
 more
 then 5 private repos.
>
>Minor issue:
>
>https://github.com/pricing
>
>Private repos for both users and organizations are not for free in
>github (they've never been afaik). They are in bitbucket, up to 5
>contributors.

Sorry, I could have sworn they gave 5 free, but you are correct we should
not require any private repos as we can use the teams and permissions.

Odd did they always have a cost for private repos for personal accounts? I
was thinking I had gotten 5 free in the beginning, but I have had to move
up to 10 repos a 8 months ago and I forget.
>
>But I don't get how private repositories have any influence in this
>discussion. Private repositories will be owned by companies and not DPDK
>as a community anyway.
>
>marc
>
  10 private repos is $25/month and had other plans
 for more. I do not see us needing more then 5 private repos today and
 the
 only reason I can see having a private repo is to do some prep work on
 the
 repo before making public. Every contributor would need to create a
 GitHub
 personal account, which is at no cost unless you need more then 5
 private
 repos. In both accounts you can have unlimited public repos.


 
https://help.github.com/articles/where-can-i-find-open-source-projects-
to
 -w
 ork-on/

 http://www.sitepoint.com/using-git-open-source-projects/

 - Adding more committers can lead to a security problems for 6Wind (I
 assume).
 - 6Wind appearing to own DPDK.org is not a good message to the
 community.
   - Not assuming 6Wind?s dpdk.org site will disappear only where the
 community stores the master repos and how the community interacts with
 the
 master.
 - Permission and access levels in dpdk.org is only one level and we
can
 benefit from having 4 levels and teams as well.
 - The patch process today suffers from timely reviews, which will not
be
 fixed by moving.
   - GitHub has a per pull request discussions area, which gives a
clean
 way to review all discussions on a specific change.
 - The current patch model is clone/modify/commit/send patch set
 - The model with GitHub is fork on GitHub/modify/commit/send pull
 request
 - The patchwork web site is reasonable, but has some draw backs in
 maintaining the site.
   - GitHub manages the patches via pull requests and can be easily
seen
 via a web browser.
   - The down side is you do have to use a web browser to do some work,
 but
 the bulk of the everyday work would be done as it is today.
 - I think we all have a web browser now :-)
 - GitHub has team support and gives a group better control plus
 collaboration is much easier as we have a external location to work.
   - Most companies have some pretty high security level and being to
 collaborate between two or more companies is very difficult if one
 company
 is hosting the repo behind a firewall.
   - Using GitHub and teams would make collaboration a lot easier or
 collaboration between two or more user accounts as well.
 - GitHub has a Web Page system, which can be customized for the
 community
 needs via a public or private repo.
 - We still need a dpdk.org email list I believe as I did not find one
at
 GitHub.
   - We can also forward GitHub emails to the list.
   - I believe you can reply to an email from GitHub and the email will
 get
 appended to the discussion thread.

>>> In my experience the github pull model causes less review, not more.
>>> It only works if maintainers are motivated to do this as their full
>>>time
>>> job.
>>>
>>> With email, the patches are right in front of developers and easier to
>>> quote
>>> for review comments.
>> We are not getting the eyes on the review today, which means to me it
>>will
>> not matter if we move to GitHub method in the future.

Correct, we need to add a process to require reviews in some way, what
that process needs to be defined.

>>
>> Personally I am able to see the differences with the GitHub

[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Wiles, Keith
Hi Marc

On 5/4/15, 2:08 PM, "Marc Sune"  wrote:

>
>
>On 02/05/15 15:59, Wiles, Keith wrote:
>>
>> On 5/2/15, 6:40 AM, "Neil Horman"  wrote:
>>
>>> On Fri, May 01, 2015 at 01:36:58PM -0700, Matthew Hall wrote:
 On Fri, May 01, 2015 at 10:59:32PM +0300, Aaro Koskinen wrote:
> Projects like GCC, GLIBC, binutils, busybox, etc or what?
>
> A.
 You'll notice all of these are low-level UNIX hacker sorts of tools
 mostly,
 with the partial exception of busybox. But even that is mainly for
 embedded
 use. It doesn't mean I don't think they're good and useful, but it
does
 limit
 the possible size of the community in my view.

 Since we are talking about how to get the largest widest community
 possible
 for DPDK, it could require doing things a bit differently from how
many
 low-level tools have historically done things.

>>> Why?
>>>
>>> Contributors to GCC: ~600 (based on svn) review
>>> Contrubutors to glibc : ~300 (based on git) review
>>> Contributors to binutils: ~600
>>> Contributors to busybox: ~300
>>>
>>> Contributors to DPDK: ~125
>> I think the DPDK community can grow the number above and as we move
>>toward
>> VNF/NFV I think it will grow to a much wider group of developers and
>>not a
>> niche project as you stated. We can be much more then some of the above
>> IMHO.
>
>Keith,
>
>Since I didn't really know where to post this, I do it here.
>
>Like you, I think hosting the repository in github is a good idea to
>increase visibility to more developers.
>
>I am not so sure the development workflow can be shifted completely to
>github pull requests; there is a lot of controversy on this.
>
>So I would propose a middle-ground, *if* we think we can make it work:
>
>1) The mailing-list, or mailing-lists, and the github pull requests
>should be synchronized. For this we could set a small cron job or BOT
>that inspects via the github API [*] the existing pull requests and
>emails the new ones to the DPDK mailing list. All pull requests can be
>downloaded as diffs and patches:
>
>https://github.com///pull/.diff
>https://github.com///pull/.patch
>
>[*] https://developer.github.com/v3/
>
>The BOT could even do very basic checkings, such as the discussed "dpdk
>checkpatch" over the PR, and publish automatically comments on the PR
>based on conformance/no conformance of the patch style.
>
>2) Discussion in the PR could be "echoed" by the bot in the mailing
>list, respecting the subject and threading, also via github's API.
>Automatic e-mails by github doesn't seem adequate to be echoed rawly in
>the list.
>
>3) The synchronization needs to happen the other way around too. I am
>not completely sure which is the best way:
>
>a) Open an issue and reference the mailing list (DPDK mailman) for the
>patch and nothing more.
>b) More work but probably better; in a fork for the BOT of the official
>DPDK repository:
>
>i) Make the bot get the patch from the mailing list, create a
>branch, apply on top of current HEAD. If fails, notify the user to
>rebase its patched, informing on top of which version could not be
>applied
>ii) Issue a pull request "github.com/dpdk_bot/dpdk branch the feature>" -> "github.com/dpdk-conmmunity/dpdk branch master"
>
>
>4) Discussions in the mailing list about a PULL request or a patch sent
>in the mailing list should be recovered by the BOT and echoed in the
>pull request
>
>5) Normal issues: since the current DPDK doesn't have an issue tracker
>(afaik) it is easy. We could simply use that one and echo a _digested_
>version of the comments into the mailing list.
>
>With this approach both "mailing list users" and "github users" should
>be able to work in parallel. Keith; what do you think? It really needs
>work, but I guess it could do the job.
>
>If you like it we could set up a small (parallel) mailing and work with
>your repository to try this "combined" workflow.

To me this seems reasonable and we can work on this with the sandbox.

Need to play with it more, but I do not have a email list someplace to
play and write the bot. Lets talk more about this outside the list and if
someone else is wanting to help please let us know and we can add you to
the discussions.

>
>Marc
>
>p.s. if by chance someone from github is listening reading, a
>functionality similar to this one would be welcome.
>
>>> Now I grant you that dpdk is a newer, much more niche project, but its
>>> disingenuous to state that we _have_ to do things differently to reach
>>>a
>>> wider
>>> audience.  We can, but its by no means a prerequisite to gainining a
>>>wider
>>> audience.
>>>
>



[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Wiles, Keith


On 5/4/15, 10:48 AM, "Matthew Hall"  wrote:

>On Mon, May 04, 2015 at 12:43:48PM +, Qiu, Michael wrote:
>> What mail client do you use? I think  mail client supporting thread mode
>> is important for patch review.
>
>Like many UNIX people, I use mutt.
>
>My concern is that, if we're making the widespread adoption, usage, and
>contributions for DPDK dependent on selection or debate of the features
>of 
>various MUAs, I'm not sure that we're looking at this from the right
>angle.
>
>I'm just trying to figure out how to get DPDK in the place where the most
>eyeballs are, rather than trying to drag the eyeballs to the place where
>DPDK 
>is.

+1, I agree with this statement completely and I feel discussions about an
MUA is non-productive and out of scope.
>
>Matthew.



[dpdk-dev] From: Cunming Liang

2015-05-05 Thread Cunming Liang
v7 changes
 - decouple epoll event and intr operation
 - add condition check in the case intr vector is disabled
 - renaming some APIs

v6 changes
 - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set.
 - using vector number instead of queue_id as interrupt API params.
 - patch reorder and split.

v5 changes
 - Rebase the patchset onto the HEAD
 - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
 - Export wait-for-rx interrupt function for shared libraries
 - Split-off a new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect
 - Change sample applicaiton to accomodate EAL function spec change
   accordingly

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Adjust position of new-added structure fields and functions to
   avoid breaking ABI

v3 changes
 - Add return value for interrupt enable/disable functions
 - Move spinlok from PMD to L3fwd-power
 - Remove unnecessary variables in e1000_mac_info
 - Fix miscelleous review comments

v2 changes
 - Fix compilation issue in Makefile for missed header file.
 - Consolidate internal and community review comments of v1 patch set.

The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.

DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
   pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
   which then wakes up DPDK polling thread). In this way, it introduces
   non-deterministic wakeup latency for DPDK polling thread as well as packet
   latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
   LSC interrupt and interrupts assigned to dedicated rx queues.

This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF 
only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
   up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
   VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
   user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
   switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
   and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
   in L3fwd-power now. Feel free to turn in on if you want to support both LSC
   and rx queue interrupts on a PF.

Cunming Liang (10):
  eal/linux: add interrupt vectors support in intr_handle
  eal/linux: add rte_epoll_wait/ctl support
  eal/linux: add API to set rx interrupt event monitor
  eal/bsd: dummy for new intr definition
  eal/linux: fix comments typo on vfio msi
  eal/linux: add interrupt vectors handling on VFIO
  ethdev: add rx intr enable, disable and ctl functions
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
switch

 examples/l3fwd-power/main.c| 206 --
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |   6 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c   | 232 +--
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |  12 +
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |  97 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map|   4 +
 lib/librte_ether/rte_ethdev.c  | 132 +++
 lib/librte_ether/rte_ethdev.h  | 104 +
 lib/librte_ether/rte_ether_version.map |   4 +
 lib/librte_pmd_e1000/e1000_ethdev.h|   3 +
 lib/librte_pmd_e1000/igb_ethdev.c  | 256 +++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c| 425 -
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h|   7 +
 13 files changed, 1394 insertions(+), 94 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v7 01/10] eal/linux: add interrupt vectors support in intr_handle

2015-05-05 Thread Cunming Liang
The patch adds interrupt vectors support in rte_intr_handle.
'vec_en' is set when interrupt vectors are detected and associated event fds 
are set.
Those event fds are stored in efds[].
'intr_vec' is reserved for device driver to initialize the vector mapping table.
When the event fds add to a specified epoll instance, 'eptrs' will hold the 
rte_epoll_event object pointer.

Signed-off-by: Danny Zhou 
Signed-off-by: Cunming Liang 
---
v7 changes:
 - add eptrs[], it's used to store the register rte_epoll_event instances.
 - add vec_en, to log the vector capability status.

v6 changes:
 - add mapping table between irq vector number and queue id.

v5 changes:
 - Create this new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect.

 lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 6a159c7..e1f4a7a 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -38,6 +38,8 @@
 #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
 #define _RTE_LINUXAPP_INTERRUPTS_H_

+#define RTE_MAX_RXTX_INTR_VEC_ID 32
+
 enum rte_intr_handle_type {
RTE_INTR_HANDLE_UNKNOWN = 0,
RTE_INTR_HANDLE_UIO,  /**< uio device handle */
@@ -48,6 +50,8 @@ enum rte_intr_handle_type {
RTE_INTR_HANDLE_MAX
 };

+struct rte_epoll_event;
+
 /** Handle for interrupts. */
 struct rte_intr_handle {
union {
@@ -57,6 +61,12 @@ struct rte_intr_handle {
};
int fd;  /**< interrupt event file descriptor */
enum rte_intr_handle_type type;  /**< handle type */
+   int max_intr;/**< max interrupt requested */
+   int vec_en;  /**< intr vectors enabled */
+   int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
+   struct rte_epoll_event *eptrs[RTE_MAX_RXTX_INTR_VEC_ID];
+/**< intr vector epoll event ptr */
+   int *intr_vec;   /**< intr vector number array */
 };

 #endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
-- 
1.8.1.4



[dpdk-dev] [PATCH v7 02/10] eal/linux: add rte_epoll_wait/ctl support

2015-05-05 Thread Cunming Liang
The patch adds 'rte_epoll_wait' and 'rte_epoll_ctl' for async event wakeup.
It defines 'struct rte_epoll_event' as the event param.
The 'op' uses the same enum as epoll_wait/ctl does.
The epoll event support to carry a raw user data and to register a callback 
which is exectuted during wakeup.

Signed-off-by: Cunming Liang 
---
v7 changes
 - split v6[4/8] into two patches, one for epoll event(this one)
   another for rx intr(next patch)
 - introduce rte_epoll_event definition
 - rte_epoll_wait/ctl for more generic RTE epoll API

v6 changes
 - split rte_intr_wait_rx_pkt into two function, wait and set.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set to remove queue visibility on eal.
 - rte_intr_rx_wait to support multiplexing.
 - allow epfd as input to support flexible event fd combination.

 lib/librte_eal/linuxapp/eal/eal_interrupts.c   | 97 ++
 .../linuxapp/eal/include/exec-env/rte_interrupts.h | 66 ++-
 lib/librte_eal/linuxapp/eal/rte_eal_version.map|  3 +
 3 files changed, 165 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c 
b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 66deda2..b641745 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -69,6 +69,8 @@

 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1)

+static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
+
 /**
  * union for pipe fds.
  */
@@ -859,3 +861,98 @@ rte_eal_intr_init(void)
return -ret;
 }

+static void
+eal_epoll_process_event(struct epoll_event *evs, int n,
+   struct rte_epoll_event *events)
+{
+   int i;
+   struct rte_epoll_event *rev;
+   for (i = 0; i < n; i++) {
+   rev = (struct rte_epoll_event *)evs[i].data.ptr;
+   if (rev) {
+   events[i].fd= rev->fd;
+   events[i].event = rev->event;
+   events[i].data  = rev->data;
+   if (rev->cb_fun)
+   rev->cb_fun(rev->fd, rev->cb_arg);
+   }
+   }
+}
+
+static inline int
+eal_init_tls_epfd(void)
+{
+   int pfd = epoll_create(255);
+   if (pfd < 0) {
+   RTE_LOG(ERR, EAL,
+   "Cannot create epoll instance\n");
+   return -1;
+   }
+   return pfd;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+   if (RTE_PER_LCORE(_epfd) == -1)
+   RTE_PER_LCORE(_epfd) = eal_init_tls_epfd();
+
+   return RTE_PER_LCORE(_epfd);
+}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+  int maxevents, int timeout)
+{
+   struct epoll_event evs[maxevents];
+   int rc;
+
+   if (!events) {
+   RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+   return -1;
+   }
+
+   /* using per thread epoll fd */
+   if (epfd == RTE_EPOLL_PER_THREAD)
+   epfd = rte_intr_tls_epfd();
+
+   rc = epoll_wait(epfd, evs, maxevents, timeout);
+   if (likely(rc > 0))
+   /* epoll_wait has at least one fd ready to read */
+   eal_epoll_process_event(evs, rc, events);
+   else if (rc < 0) {
+   /* epoll_wait fail */
+   RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n",
+   strerror(errno));
+   rc = -1;
+   }
+
+   return rc;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+ struct rte_epoll_event *event)
+{
+   struct epoll_event ev;
+
+   if (!event) {
+   RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+   return -1;
+   }
+
+   /* using per thread epoll fd */
+   if (epfd == RTE_EPOLL_PER_THREAD)
+   epfd = rte_intr_tls_epfd();
+
+   event->fd   = fd;  /* ignore fd in rev */
+   ev.data.ptr = (void *)event;
+   ev.events   = event->event;
+   if (epoll_ctl(epfd, op, fd, &ev) < 0) {
+   RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n",
+   op, fd, strerror(errno));
+   return -1;
+   }
+
+   return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index e1f4a7a..af405cf 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -50,7 +50,19 @@ enum rte_intr_handle_type {
RTE_INTR_HANDLE_MAX
 };

-struct rte_epoll_event;
+#define RTE_INTR_EVENT_ADD1UL
+#defineRTE_INTR_EVENT_DEL2UL
+
+typedef void (*rte_intr_event_cb_t)(int fd, void *arg);
+
+/** interrupt epoll event obj, taken by epoll_event.ptr */
+struct rte_epoll_event {
+   int fd;   /**< OUT: event fd */
+   uint32_t event;   /**< event type */
+   void *data;   

[dpdk-dev] [PATCH v7 03/10] eal/linux: add API to set rx interrupt event monitor

2015-05-05 Thread Cunming Liang
The patch adds 'rte_intr_rx_ctl' to add or delete interrupt vector events 
monitor on specified epoll instance.

Signed-off-by: Cunming Liang 
---
v7 changes
 - rename rte_intr_rx_set to rte_intr_rx_ctl.
 - rte_intr_rx_ctl uses rte_epoll_ctl to register epoll event instance.
 - the intr rx event instance includes a intr process callback.

v6 changes
 - split rte_intr_wait_rx_pkt into two function, wait and set.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set to remove queue visibility on eal.
 - rte_intr_rx_wait to support multiplexing.
 - allow epfd as input to support flexible event fd combination.

 lib/librte_eal/linuxapp/eal/eal_interrupts.c   | 95 ++
 .../linuxapp/eal/include/exec-env/rte_interrupts.h | 23 ++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map|  1 +
 3 files changed, 119 insertions(+)

diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c 
b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index b641745..1090d7b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -862,6 +862,35 @@ rte_eal_intr_init(void)
 }

 static void
+eal_intr_proc_rxtx_intr(int fd, struct rte_intr_handle *intr_handle)
+{
+   union rte_intr_read_buffer buf;
+   int bytes_read = 1;
+
+   if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) {
+   RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX\n");
+   return;
+   }
+
+#ifdef VFIO_PRESENT
+   bytes_read = sizeof(buf.vfio_intr_count);
+#endif
+
+   /**
+* read out to clear the ready-to-be-read flag
+* for epoll_wait.
+*/
+   bytes_read = read(fd, &buf, bytes_read);
+   if (bytes_read < 0)
+   RTE_LOG(ERR, EAL, "Error reading from file "
+   "descriptor %d: %s\n", fd,
+   strerror(errno));
+   else if (bytes_read == 0)
+   RTE_LOG(ERR, EAL, "Read nothing from file "
+   "descriptor %d\n", fd);
+}
+
+static void
 eal_epoll_process_event(struct epoll_event *evs, int n,
struct rte_epoll_event *events)
 {
@@ -956,3 +985,69 @@ rte_epoll_ctl(int epfd, int op, int fd,

return 0;
 }
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
+   int op, unsigned int vec, void *data, int socket)
+{
+   struct rte_epoll_event *rev;
+   int epfd_op;
+   int rc = 0;
+
+   if (!intr_handle || vec >= RTE_MAX_RXTX_INTR_VEC_ID ||
+   !intr_handle->vec_en) {
+   RTE_LOG(ERR, EAL, "Wrong intr vector number.\n");
+   return -1;
+   }
+
+   if (socket == SOCKET_ID_ANY)
+   socket = rte_socket_id();
+
+   switch (op) {
+   case RTE_INTR_EVENT_ADD:
+   epfd_op = EPOLL_CTL_ADD;
+   if (intr_handle->eptrs[vec] != NULL) {
+   RTE_LOG(ERR, EAL, "Event already been added.\n");
+   return -1;
+   }
+
+   /* new event */
+   rev = rte_zmalloc_socket("eptrs", sizeof(*rev),
+RTE_CACHE_LINE_SIZE, socket);
+   if (rev == NULL) {
+   RTE_LOG(ERR, EAL, "event obj alloc fail\n");
+   return -1;
+   }
+
+   /* attach to intr vector fd */
+   rev->fd = intr_handle->efds[vec];
+   rev->event  = EPOLLIN | EPOLLPRI | EPOLLET;
+   rev->data   = data;
+   rev->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
+   rev->cb_arg = (void *)intr_handle;
+
+   rc = rte_epoll_ctl(epfd, epfd_op, rev->fd, rev);
+   if (!rc)
+   intr_handle->eptrs[vec] = rev;
+   else
+   rte_free(rev);
+
+   break;
+   case RTE_INTR_EVENT_DEL:
+   epfd_op = EPOLL_CTL_DEL;
+   if (intr_handle->eptrs[vec] != NULL) {
+   rev = intr_handle->eptrs[vec];
+   rc = rte_epoll_ctl(epfd, epfd_op, rev->fd, rev);
+   if (!rc) {
+   rte_free(rev);
+   intr_handle->eptrs[vec] = NULL;
+   }
+   }
+   break;
+   default:
+   RTE_LOG(ERR, EAL, "event op type mismatch\n");
+   rc = -1;
+   }
+
+   return rc;
+}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index af405cf..3d9f6d7 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -133,4 +133,27 @@ rte_epoll_ctl(int epfd, int op, int fd,
 int
 rte_intr_tls_epfd(void);

+/**
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ * @param epfd
+ *   Ep

[dpdk-dev] [PATCH v7 04/10] eal/bsd: dummy for new intr definition

2015-05-05 Thread Cunming Liang
To make bsd compiling happy with new intr changes.

Signed-off-by: Cunming Liang 
---
v7 changes
 - remove stub 'linux only' function from source file

 lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h 
b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
index 87a9cf6..f7fb6af 100644
--- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
@@ -38,6 +38,8 @@
 #ifndef _RTE_LINUXAPP_INTERRUPTS_H_
 #define _RTE_LINUXAPP_INTERRUPTS_H_

+#define RTE_MAX_RXTX_INTR_VEC_ID32
+
 enum rte_intr_handle_type {
RTE_INTR_HANDLE_UNKNOWN = 0,
RTE_INTR_HANDLE_UIO,  /**< uio device handle */
@@ -49,6 +51,10 @@ enum rte_intr_handle_type {
 struct rte_intr_handle {
int fd;  /**< file descriptor */
enum rte_intr_handle_type type;  /**< handle type */
+   int max_intr;/**< max interrupt requested */
+   int vec_en;  /**< intr vectors enabled */
+   int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */
+   uint16_t *intr_vec;   /**< intr vector number array */
 };

 #endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
-- 
1.8.1.4



[dpdk-dev] [PATCH v7 05/10] eal/linux: fix comments typo on vfio msi

2015-05-05 Thread Cunming Liang
Signed-off-by: Danny Zhou 
Signed-off-by: Cunming Liang 
---
 lib/librte_eal/linuxapp/eal/eal_interrupts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c 
b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 1090d7b..178a88e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -219,7 +219,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
return 0;
 }

-/* enable MSI-X interrupts */
+/* enable MSI interrupts */
 static int
 vfio_enable_msi(struct rte_intr_handle *intr_handle) {
int len, ret;
@@ -265,7 +265,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
return 0;
 }

-/* disable MSI-X interrupts */
+/* disable MSI interrupts */
 static int
 vfio_disable_msi(struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
-- 
1.8.1.4



[dpdk-dev] [PATCH v7 06/10] eal/linux: add interrupt vectors handling on VFIO

2015-05-05 Thread Cunming Liang
This patch does below:
 - Create VFIO eventfds for each interrupt vector
 - Assign per interrupt vector's eventfd to VFIO by ioctl

Signed-off-by: Danny Zhou 
Signed-off-by: Cunming Liang 
---
v7 changes
 - cleanup unnecessary code change
 - split event and intr operation to other patches

 lib/librte_eal/linuxapp/eal/eal_interrupts.c | 36 
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c   | 12 ++
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c 
b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 178a88e..dfe857e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -128,6 +128,9 @@ static pthread_t intr_thread;
 #ifdef VFIO_PRESENT

 #define IRQ_SET_BUF_LEN  (sizeof(struct vfio_irq_set) + sizeof(int))
+/* irq set buffer length for queue interrupts and LSC interrupt */
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+ sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1))

 /* enable legacy (INTx) interrupts */
 static int
@@ -293,8 +296,8 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
 /* enable MSI-X interrupts */
 static int
 vfio_enable_msix(struct rte_intr_handle *intr_handle) {
-   int len, ret;
-   char irq_set_buf[IRQ_SET_BUF_LEN];
+   int len, ret, max_intr;
+   char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
int *fd_ptr;

@@ -302,12 +305,19 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {

irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
-   irq_set->count = 1;
+   if ((!intr_handle->max_intr) ||
+   (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID))
+   max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
+   else
+   max_intr = intr_handle->max_intr;
+
+   irq_set->count = max_intr;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 
VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
-   *fd_ptr = intr_handle->fd;
+   memcpy(fd_ptr, intr_handle->efds, sizeof(intr_handle->efds));
+   fd_ptr[max_intr - 1] = intr_handle->fd;

ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);

@@ -317,22 +327,6 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
return -1;
}

-   /* manually trigger interrupt to enable it */
-   memset(irq_set, 0, len);
-   len = sizeof(struct vfio_irq_set);
-   irq_set->argsz = len;
-   irq_set->count = 1;
-   irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
-   irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
-   irq_set->start = 0;
-
-   ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
-   if (ret) {
-   RTE_LOG(ERR, EAL, "Error triggering MSI-X interrupts for fd 
%d\n",
-   intr_handle->fd);
-   return -1;
-   }
return 0;
 }

@@ -340,7 +334,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
 static int
 vfio_disable_msix(struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
-   char irq_set_buf[IRQ_SET_BUF_LEN];
+   char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
int len, ret;

len = sizeof(struct vfio_irq_set);
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index aea1fb1..387f54c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -308,6 +308,18 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int 
vfio_dev_fd)
case VFIO_PCI_MSIX_IRQ_INDEX:
internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
+   for (i = 0; i < RTE_MAX_RXTX_INTR_VEC_ID; i++) {
+   fd = eventfd(0, 0);
+   if (fd < 0) {
+   RTE_LOG(ERR, EAL,
+   "cannot setup eventfd,"
+   "error %i (%s)\n",
+   errno, strerror(errno));
+   return -1;
+   }
+   dev->intr_handle.efds[i] = fd;
+   }
+   dev->intr_handle.vec_en = 1;
break;
case VFIO_PCI_MSI_IRQ_INDEX:
internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
-- 
1.8.1.4



[dpdk-dev] [PATCH v7 07/10] ethdev: add rx intr enable, disable and ctl functions

2015-05-05 Thread Cunming Liang
The patch adds two dev_ops functions to enable and disable rx queue interrupts.
In addtion, it adds rte_eth_dev_rx_intr_ctl/rx_intr_q to support per port or 
per queue rx intr event set.

Signed-off-by: Danny Zhou 
Signed-off-by: Cunming Liang 
---
v7 changes
 - remove rx_intr_vec_get
 - add rx_intr_ctl and rx_intr_ctl_q

v6 changes
 - add rx_intr_vec_get to retrieve the vector num of the queue.

v5 changes
 - Rebase the patchset onto the HEAD

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Put new functions at the end of eth_dev_ops to avoid breaking ABI

v3 changes
 - Add return value for interrupt enable/disable functions

 lib/librte_ether/rte_ethdev.c  | 132 +
 lib/librte_ether/rte_ethdev.h  | 104 ++
 lib/librte_ether/rte_ether_version.map |   4 +
 3 files changed, 240 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 024fe8b..cdde14c 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -3281,6 +3281,138 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
}
rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 }
+
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
+{
+   uint32_t vec;
+   struct rte_eth_dev *dev;
+   struct rte_intr_handle *intr_handle;
+   uint16_t qid;
+   int rc;
+
+   if (!rte_eth_dev_is_valid_port(port_id)) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return -ENODEV;
+   }
+
+   dev = &rte_eth_devices[port_id];
+   if (dev == NULL) {
+   PMD_DEBUG_TRACE("Invalid port device\n");
+   return -ENODEV;
+   }
+
+   intr_handle = &dev->pci_dev->intr_handle;
+   if (!intr_handle->intr_vec) {
+   PMD_DEBUG_TRACE("RX Intr vector unset\n");
+   return -EPERM;
+   }
+
+   for (qid = 0; qid < dev->data->nb_rx_queues; qid++) {
+   if (intr_handle->intr_vec[qid] < 0) {
+   PMD_DEBUG_TRACE("RX Intr vector invalid on %d\n", qid);
+   continue;
+   }
+
+   vec = intr_handle->intr_vec[qid];
+   rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec,
+data, rte_eth_dev_socket_id(port_id));
+   if (rc) {
+   PMD_DEBUG_TRACE("p %d q %d rx ctl error"
+   " op %d epfd %d vec %u\n",
+   port_id, qid, op, epfd, vec);
+   }
+   }
+
+   return 0;
+}
+
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+ int epfd, int op, void *data)
+{
+   uint32_t vec;
+   struct rte_eth_dev *dev;
+   struct rte_intr_handle *intr_handle;
+   int rc;
+
+   if (!rte_eth_dev_is_valid_port(port_id)) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return -ENODEV;
+   }
+
+   dev = &rte_eth_devices[port_id];
+   if (dev == NULL) {
+   PMD_DEBUG_TRACE("Invalid port device\n");
+   return -ENODEV;
+   }
+
+   if (queue_id >= dev->data->nb_rx_queues) {
+   PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+   return -EINVAL;
+   }
+
+   intr_handle = &dev->pci_dev->intr_handle;
+   if (!intr_handle->intr_vec || intr_handle->intr_vec[queue_id] < 0) {
+   PMD_DEBUG_TRACE("RX Intr vector unset on %d\n", rx_queue_id);
+   return -EPERM;
+   }
+
+   vec = intr_handle->intr_vec[queue_id];
+   rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec,
+data, rte_eth_dev_socket_id(port_id));
+   if (rc) {
+   PMD_DEBUG_TRACE("p %d q %d rx ctl error"
+   " op %d epfd %d vec %u\n",
+   port_id, queue_id, op, epfd, vec);
+   return rc;
+   }
+
+   return 0;
+}
+
+int
+rte_eth_dev_rx_intr_enable(uint8_t port_id,
+  uint16_t queue_id)
+{
+   struct rte_eth_dev *dev;
+
+   if (!rte_eth_dev_is_valid_port(port_id)) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return -ENODEV;
+   }
+
+   dev = &rte_eth_devices[port_id];
+   if (dev == NULL) {
+   PMD_DEBUG_TRACE("Invalid port device\n");
+   return -ENODEV;
+   }
+
+   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+   return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_intr_disable(uint8_t port_id,
+   uint16_t queue_id)
+{
+   struct rte_eth_dev *dev;
+
+   if (!rte_eth_dev_is_valid_port(port_id)) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+

[dpdk-dev] [PATCH v7 08/10] ixgbe: enable rx queue interrupts for both PF and VF

2015-05-05 Thread Cunming Liang
The patch does below things for ixgbe PF and VF:
- Setup NIC to generate MSI-X interrupts
- Set the IVAR register to map interrupt causes to vectors
- Implement interrupt enable/disable functions

Signed-off-by: Danny Zhou 
Signed-off-by: Yong Liu 
Signed-off-by: Cunming Liang 
---
v7 changes
 - add condition check when intr vector is not enabled

v6 changes
 - fill queue-vector mapping table

v5 changes
 - Rebase the patchset onto the HEAD

v3 changes
 - Remove spinlok from PMD

v2 changes
 - Consolidate review comments related to coding style

 lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 425 +++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |   7 +
 2 files changed, 428 insertions(+), 4 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c 
b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index 366aa45..ee0e10b 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -82,6 +82,9 @@
  */
 #define IXGBE_FC_LO0x40

+/* Default minimum inter-interrupt interval for EITR configuration */
+#define IXGBE_MIN_INTER_INTERRUPT_INTERVAL_DEFAULT0x79E
+
 /* Timer value included in XOFF frames. */
 #define IXGBE_FC_PAUSE 0x680

@@ -171,6 +174,7 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev,
uint16_t reta_size);
 static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev);
 static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
 static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
 static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle,
@@ -183,11 +187,14 @@ static void ixgbe_dcb_init(struct ixgbe_hw *hw,struct 
ixgbe_dcb_config *dcb_conf

 /* For Virtual Function support */
 static int eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev);
+static int ixgbevf_dev_interrupt_get_status(struct rte_eth_dev *dev);
+static int ixgbevf_dev_interrupt_action(struct rte_eth_dev *dev);
 static int  ixgbevf_dev_configure(struct rte_eth_dev *dev);
 static int  ixgbevf_dev_start(struct rte_eth_dev *dev);
 static void ixgbevf_dev_stop(struct rte_eth_dev *dev);
 static void ixgbevf_dev_close(struct rte_eth_dev *dev);
 static void ixgbevf_intr_disable(struct ixgbe_hw *hw);
+static void ixgbevf_intr_enable(struct ixgbe_hw *hw);
 static void ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
struct rte_eth_stats *stats);
 static void ixgbevf_dev_stats_reset(struct rte_eth_dev *dev);
@@ -197,6 +204,15 @@ static void ixgbevf_vlan_strip_queue_set(struct 
rte_eth_dev *dev,
uint16_t queue, int on);
 static void ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on);
+static void ixgbevf_dev_interrupt_handler(struct rte_intr_handle *handle,
+   void *param);
+static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
+   uint16_t queue_id);
+static int ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
+uint16_t queue_id);
+static void ixgbevf_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
+uint8_t queue, uint8_t msix_vector);
+static void ixgbevf_configure_msix(struct rte_eth_dev *dev);

 /* For Eth VMDQ APIs support */
 static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct
@@ -214,6 +230,14 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
 static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev,
uint8_t rule_id);

+static int ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
+   uint16_t queue_id);
+static int ixgbe_dev_rx_queue_intr_disable(struct rte_eth_dev *dev,
+   uint16_t queue_id);
+static void ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction,
+   uint8_t queue, uint8_t msix_vector);
+static void ixgbe_configure_msix(struct rte_eth_dev *dev);
+
 static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t tx_rate);
 static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf,
@@ -262,7 +286,7 @@ static int ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, 
uint16_t mtu);
  */
 #define UPDATE_VF_STAT(reg, last, cur) \
 {   \
-   u32 latest = IXGBE_READ_REG(hw, reg);   \
+   uint32_t latest = IXGBE_READ_REG(hw, reg);   \
cur += latest - last;   \
last = latest;  \
 }
@@ -343,6 +367,8 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
.tx_queue_start   = ixgbe_dev_tx_queue_start,
.tx_queue_stop= ixgbe_dev_tx_queue_stop,
.rx_queu

[dpdk-dev] [PATCH v7 09/10] igb: enable rx queue interrupts for PF

2015-05-05 Thread Cunming Liang
The patch does below for igb PF:
- Setup NIC to generate MSI-X interrupts
- Set the IVAR register to map interrupt causes to vectors
- Implement interrupt enable/disable functions

Signed-off-by: Danny Zhou 
Signed-off-by: Cunming Liang 
---
v7 changes
 - add condition check when intr vector is not enabled

v6 changes
 - fill queue-vector mapping table

v5 changes
 - Rebase the patchset onto the HEAD

v3 changes
 - Remove unnecessary variables in e1000_mac_info
 - Remove spinlok from PMD

v2 changes
 - Consolidate review comments related to coding style

 lib/librte_pmd_e1000/e1000_ethdev.h |   3 +
 lib/librte_pmd_e1000/igb_ethdev.c   | 256 
 2 files changed, 234 insertions(+), 25 deletions(-)

diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h 
b/lib/librte_pmd_e1000/e1000_ethdev.h
index c451faa..13c4cad 100644
--- a/lib/librte_pmd_e1000/e1000_ethdev.h
+++ b/lib/librte_pmd_e1000/e1000_ethdev.h
@@ -108,6 +108,9 @@
ETH_RSS_IPV6_TCP_EX | \
ETH_RSS_IPV6_UDP_EX)

+/* maximum number of other interrupts besides Rx & Tx interrupts */
+#define E1000_MAX_OTHER_INTR   1
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
uint32_t flags;
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c 
b/lib/librte_pmd_e1000/igb_ethdev.c
index 4415155..d7ec696 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -96,6 +96,7 @@ static int  eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
 static int  eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
struct rte_eth_fc_conf *fc_conf);
 static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
 static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -194,6 +195,16 @@ static int eth_igb_filter_ctrl(struct rte_eth_dev *dev,
 enum rte_filter_op filter_op,
 void *arg);

+static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev,
+   uint16_t queue_id);
+static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev,
+   uint16_t queue_id);
+static void eth_igb_assign_msix_vector(struct e1000_hw *hw, int8_t direction,
+   uint8_t queue, uint8_t msix_vector);
+static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
+static void eth_igb_write_ivar(struct e1000_hw *hw, uint8_t msix_vector,
+   uint8_t index, uint8_t offset);
+
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
  */
@@ -253,6 +264,8 @@ static const struct eth_dev_ops eth_igb_ops = {
.vlan_tpid_set= eth_igb_vlan_tpid_set,
.vlan_offload_set = eth_igb_vlan_offload_set,
.rx_queue_setup   = eth_igb_rx_queue_setup,
+   .rx_queue_intr_enable = eth_igb_rx_queue_intr_enable,
+   .rx_queue_intr_disable = eth_igb_rx_queue_intr_disable,
.rx_queue_release = eth_igb_rx_queue_release,
.rx_queue_count   = eth_igb_rx_queue_count,
.rx_descriptor_done   = eth_igb_rx_descriptor_done,
@@ -463,6 +476,7 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
struct e1000_filter_info *filter_info =
E1000_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private);
uint32_t ctrl_ext;
+   struct rte_eth_dev_info dev_info;

pci_dev = eth_dev->pci_dev;
eth_dev->dev_ops = ð_igb_ops;
@@ -584,6 +598,23 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
 eth_dev->data->port_id, pci_dev->id.vendor_id,
 pci_dev->id.device_id);

+   /* set max interrupt vfio request */
+   memset(&dev_info, 0, sizeof(dev_info));
+   eth_igb_infos_get(eth_dev, &dev_info);
+
+   if (pci_dev->intr_handle.vec_en) {
+   pci_dev->intr_handle.max_intr = dev_info.max_rx_queues +
+   E1000_MAX_OTHER_INTR;
+   pci_dev->intr_handle.intr_vec =
+   rte_zmalloc("intr_vec",
+   dev_info.max_rx_queues * sizeof(int), 0);
+   if (pci_dev->intr_handle.intr_vec == NULL) {
+   PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
+" intr_vec\n", dev_info.max_rx_queues);
+   return -ENOMEM;
+   }
+   }
+
rte_intr_callback_register(&(pci_dev->intr_handle),
eth_igb_interrupt_handler, (void *)eth_dev);

@@ -752,7 +783,7 @@ eth_igb_start(struct rte_eth_dev *dev)
 {
struct e1000_hw *hw =
E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-   int ret, i, mask;
+   int ret, mask;
uint32_t ctrl_ext;

PMD_INIT_FUNC_TRA

[dpdk-dev] [PATCH v7 10/10] l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode switch

2015-05-05 Thread Cunming Liang
Demonstrate how to handle per rx queue interrupt in a NAPI-like
implementation in usersapce. PDK polling thread mainly works in
polling mode and switch to interrupt mode only if there is no
any packet received in recent polls.
Usersapce interrupt notification generally takes a lot more cycles
than kernel, so one-shot interrupt is used here to guarantee minimum
overhead and DPDK polling thread returns to polling mode immediately
once it receives an interrupt notificaiton for incoming packet.

Signed-off-by: Danny Zhou 
Signed-off-by: Cunming Liang 
---
v7 changes
 - using new APIs
 - demo multiple port/queue pair wait on the same epoll instance

v6 changes
 - Split event fd add and wait

v5 changes
 - Change invoked function name and parameter to accomodate EAL change

v3 changes
 - Add spinlock to ensure thread safe when accessing interrupt mask
   register

v2 changes
 - Remove unused function which is for debug purpose

 examples/l3fwd-power/main.c | 206 +++-
 1 file changed, 164 insertions(+), 42 deletions(-)

diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index bb0b66f..08b36e0 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -74,12 +74,14 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1

 #define MAX_PKT_BURST 32

-#define MIN_ZERO_POLL_COUNT 5
+#define MIN_ZERO_POLL_COUNT 10

 /* around 100ms at 2 Ghz */
 #define TIMER_RESOLUTION_CYCLES   2ULL
@@ -155,6 +157,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
 /* ethernet addresses of ports */
 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];

+/* ethernet addresses of ports */
+static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
+
 /* mask of enabled ports */
 static uint32_t enabled_port_mask = 0;
 /* Ports set in promiscuous mode off by default. */
@@ -187,6 +192,9 @@ struct lcore_rx_queue {
 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
 #define MAX_RX_QUEUE_PER_PORT 128

+#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
+
+
 #define MAX_LCORE_PARAMS 1024
 struct lcore_params {
uint8_t port_id;
@@ -213,7 +221,7 @@ static uint16_t nb_lcore_params = 
sizeof(lcore_params_array_default) /

 static struct rte_eth_conf port_conf = {
.rxmode = {
-   .mq_mode= ETH_MQ_RX_RSS,
+   .mq_mode = ETH_MQ_RX_RSS,
.max_rx_pkt_len = ETHER_MAX_LEN,
.split_hdr_size = 0,
.header_split   = 0, /**< Header Split disabled */
@@ -225,11 +233,14 @@ static struct rte_eth_conf port_conf = {
.rx_adv_conf = {
.rss_conf = {
.rss_key = NULL,
-   .rss_hf = ETH_RSS_IP,
+   .rss_hf = ETH_RSS_UDP,
},
},
.txmode = {
-   .mq_mode = ETH_DCB_NONE,
+   .mq_mode = ETH_MQ_TX_NONE,
+   },
+   .intr_conf = {
+   .rxq = 1, /**< rxq interrupt feature enabled */
},
 };

@@ -401,19 +412,22 @@ power_timer_cb(__attribute__((unused)) struct rte_timer 
*tim,
/* accumulate total execution time in us when callback is invoked */
sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
(float)SCALING_PERIOD;
-
/**
 * check whether need to scale down frequency a step if it sleep a lot.
 */
-   if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD)
-   rte_power_freq_down(lcore_id);
+   if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
+   if (rte_power_freq_down)
+   rte_power_freq_down(lcore_id);
+   }
else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
-   stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST)
+   stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
/**
 * scale down a step if average packet per iteration less
 * than expectation.
 */
-   rte_power_freq_down(lcore_id);
+   if (rte_power_freq_down)
+   rte_power_freq_down(lcore_id);
+   }

/**
 * initialize another timer according to current frequency to ensure
@@ -706,22 +720,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,

 }

-#define SLEEP_GEAR1_THRESHOLD100
-#define SLEEP_GEAR2_THRESHOLD1000
+#define MINIMUM_SLEEP_TIME 1
+#define SUSPEND_THRESHOLD  300

 static inline uint32_t
 power_idle_heuristic(uint32_t zero_rx_packet_count)
 {
-   /* If zero count is less than 100, use it as the sleep time in us */
-   if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD)
-   return zero_rx_packet_count;
-   /* If zero count is less than 1000, sleep time should be 100 us */
-   else if ((zero_rx_packet_count >= SLEEP_GEAR1_

[dpdk-dev] [PATCH v7 00/10] Interrupt mode PMD

2015-05-05 Thread Cunming Liang
v7 changes
 - decouple epoll event and intr operation
 - add condition check in the case intr vector is disabled
 - renaming some APIs

v6 changes
 - split rte_intr_wait_rx_pkt into two APIs 'wait' and 'set'.
 - rewrite rte_intr_rx_wait/rte_intr_rx_set.
 - using vector number instead of queue_id as interrupt API params.
 - patch reorder and split.

v5 changes
 - Rebase the patchset onto the HEAD
 - Isolate ethdev from EAL for new-added wait-for-rx interrupt function
 - Export wait-for-rx interrupt function for shared libraries
 - Split-off a new patch file for changed struct rte_intr_handle that
   other patches depend on, to avoid breaking git bisect
 - Change sample applicaiton to accomodate EAL function spec change
   accordingly

v4 changes
 - Export interrupt enable/disable functions for shared libraries
 - Adjust position of new-added structure fields and functions to
   avoid breaking ABI

v3 changes
 - Add return value for interrupt enable/disable functions
 - Move spinlok from PMD to L3fwd-power
 - Remove unnecessary variables in e1000_mac_info
 - Fix miscelleous review comments

v2 changes
 - Fix compilation issue in Makefile for missed header file.
 - Consolidate internal and community review comments of v1 patch set.

The patch series introduce low-latency one-shot rx interrupt into DPDK with
polling and interrupt mode switch control example.

DPDK userspace interrupt notification and handling mechanism is based on UIO
with below limitation:
1) It is designed to handle LSC interrupt only with inefficient suspended
   pthread wakeup procedure (e.g. UIO wakes up LSC interrupt handling thread
   which then wakes up DPDK polling thread). In this way, it introduces
   non-deterministic wakeup latency for DPDK polling thread as well as packet
   latency if it is used to handle Rx interrupt.
2) UIO only supports a single interrupt vector which has to been shared by
   LSC interrupt and interrupts assigned to dedicated rx queues.

This patchset includes below features:
1) Enable one-shot rx queue interrupt in ixgbe PMD(PF & VF) and igb PMD(PF 
only).
2) Build on top of the VFIO mechanism instead of UIO, so it could support
   up to 64 interrupt vectors for rx queue interrupts.
3) Have 1 DPDK polling thread handle per Rx queue interrupt with a dedicated
   VFIO eventfd, which eliminates non-deterministic pthread wakeup latency in
   user space.
4) Demonstrate interrupts control APIs and userspace NAIP-like polling/interrupt
   switch algorithms in L3fwd-power example.

Known limitations:
1) It does not work for UIO due to a single interrupt eventfd shared by LSC
   and rx queue interrupt handlers causes a mess.
2) LSC interrupt is not supported by VF driver, so it is by default disabled
   in L3fwd-power now. Feel free to turn in on if you want to support both LSC
   and rx queue interrupts on a PF.

Cunming Liang (10):
  eal/linux: add interrupt vectors support in intr_handle
  eal/linux: add rte_epoll_wait/ctl support
  eal/linux: add API to set rx interrupt event monitor
  eal/bsd: dummy for new intr definition
  eal/linux: fix comments typo on vfio msi
  eal/linux: add interrupt vectors handling on VFIO
  ethdev: add rx intr enable, disable and ctl functions
  ixgbe: enable rx queue interrupts for both PF and VF
  igb: enable rx queue interrupts for PF
  l3fwd-power: enable one-shot rx interrupt and polling/interrupt mode
switch

 examples/l3fwd-power/main.c| 206 --
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |   6 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c   | 232 +--
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |  12 +
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |  97 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map|   4 +
 lib/librte_ether/rte_ethdev.c  | 132 +++
 lib/librte_ether/rte_ethdev.h  | 104 +
 lib/librte_ether/rte_ether_version.map |   4 +
 lib/librte_pmd_e1000/e1000_ethdev.h|   3 +
 lib/librte_pmd_e1000/igb_ethdev.c  | 256 +++--
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c| 425 -
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h|   7 +
 13 files changed, 1394 insertions(+), 94 deletions(-)

-- 
1.8.1.4



[dpdk-dev] [PATCH v6 1/2] Simplify the ifdefs in rte.app.mk.

2015-05-05 Thread Olivier MATZ
Hi Keith,

On 05/04/2015 06:50 PM, Wiles, Keith wrote:
>
>
> Sent from my iPhone
>
>> On May 4, 2015, at 8:27 AM, Olivier MATZ  wrote:
>>
>>
>>
>>> On 05/04/2015 04:36 PM, Wiles, Keith wrote:
>>>
>>>
 On 5/4/15, 2:18 AM, "Olivier MATZ"  wrote:

 Hi Keith,

> On 05/01/2015 04:22 PM, Keith Wiles wrote:
> Trying to simplify the ifdefs in rte.app.mk to make the code
> more readable and maintainable by moving LDLIBS variable to use
> the same style as LDLIBS-y being used in the rest of the code.
>
> Added a new variable called EXTRA_LDLIBS to be used by example apps
> instead of using LDLIBS directly. The new internal variable _LDLIBS
> should not be used outside of the rte.app.mk file. The makefiles
> can still use LDLIBS, but I would suggest using EXTRA_LDLIBS instead.

 Why are you suggesting to change LIBS to EXTRA_LIBS?
>>>
>>> Hi Olivier,
>>>
>>> I do not change LIBS to EXTRA_LIBS as I did not touch those variables.
>>>
>>> I did add EXTRA_LDLIBS and let LDLIBS as it was in the patch. I also
>>> created LDLIBS-y as an internal variable. Did I miss your point here?
>>
>> In your previous mail, you say "The makefiles can still use LDLIBS,
>> but I would suggest using EXTRA_LDLIBS instead."
>>
>> The question is: why are you suggesting that?
>>
>> And in the patch you are submitting, you are replacing LDLIBS
>> by EXTRA_LDLIBS in examples/dpdk_qat/Makefile and
>> examples/vm_power_manager/Makefile.
>>
>
> I thought use the extra variable was the right way in those make files. Could 
> have left them using LDLIBS but does it make any difference?

Why are you feeling changing to EXTRA_* is the right way?
Are you seeing a problem with the current use of LDLIBS?

Like I said previously (and Bruce agreed on that), using EXTRA_*
should be avoided in Makefiles because it prevents to append
something to the variable from the command line.


Regards,
Olivier


>
>> Regards,
>> Olivier
>>
>>
>>
>>>
>>> ++Keith
>>>
 We discussed in a previous thread that EXTRA_* variables should
 (as much as possible) be kept empty in Makefiles as it allows a
 user to append things in them.

 By the way, it would be easier to follow the different versions
 of your patches if you add "--in-reply-to " in your
 git-send-email command, as described in http://dpdk.org/dev

 Regards,
 Olivier


>
> Signed-off-by: Keith Wiles 
> ---
> examples/dpdk_qat/Makefile |   4 +-
> examples/vm_power_manager/Makefile |   2 +-
> mk/rte.app.mk  | 242
> +
> 3 files changed, 63 insertions(+), 185 deletions(-)
>
> diff --git a/examples/dpdk_qat/Makefile b/examples/dpdk_qat/Makefile
> index f1e06a1..90ca1d3 100644
> --- a/examples/dpdk_qat/Makefile
> +++ b/examples/dpdk_qat/Makefile
> @@ -77,8 +77,8 @@ else
> ICP_LIBRARY_PATH = $(ICP_ROOT)/build/libicp_qa_al.a
> endif
>
> -LDLIBS += -L$(ICP_ROOT)/build
> -LDLIBS += $(ICP_LIBRARY_PATH) \
> +EXTRA_LDLIBS += -L$(ICP_ROOT)/build
> +EXTRA_LDLIBS += $(ICP_LIBRARY_PATH) \
>  -lz \
>  -losal \
>  -ladf_proxy \
> diff --git a/examples/vm_power_manager/Makefile
> b/examples/vm_power_manager/Makefile
> index 113dbc4..8fb78d4 100644
> --- a/examples/vm_power_manager/Makefile
> +++ b/examples/vm_power_manager/Makefile
> @@ -48,7 +48,7 @@ SRCS-y += channel_monitor.c
> CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/
> CFLAGS += $(WERROR_FLAGS)
>
> -LDLIBS += -lvirt
> +EXTRA_LDLIBS += -lvirt
>
> # workaround for a gcc bug with noreturn attribute
> # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 62a76ae..b8030d2 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -1,7 +1,7 @@
> #   BSD LICENSE
> #
> -#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> -#   Copyright(c) 2014 6WIND S.A.
> +#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> +#   Copyright(c) 2014-2015 6WIND S.A.
> #   All rights reserved.
> #
> #   Redistribution and use in source and binary forms, with or without
> @@ -51,7 +51,7 @@ LDSCRIPT = $(RTE_LDSCRIPT)
> endif
>
> # default path for libs
> -LDLIBS += -L$(RTE_SDK_BIN)/lib
> +_LDLIBS-y += -L$(RTE_SDK_BIN)/lib
>
> #
> # Include libraries depending on config if NO_AUTOLIBS is not set
> @@ -59,215 +59,93 @@ LDLIBS += -L$(RTE_SDK_BIN)/lib
> #
> ifeq ($(NO_AUTOLIBS),)
>
> -LDLIBS += --whole-archive
> +_LDLIBS-y += --whole-archive
>
> -ifeq ($(CONFIG_RTE_BUILD_COMBINE_LIBS),y)
> -LDLIBS += -l$(RTE_LIBNAME)
> -endif
> +_LDLIBS-$(CONFIG_RTE_BUILD_COMBINE_LIBS)+= -l$(RTE_LIBNAME)
>
> ifeq ($(CONFIG_RTE_BUILD_COMBI

[dpdk-dev] Issues with rte_hash_crc.h when compiling with C++

2015-05-05 Thread Pavel Odintsov
Hello!

Could anybody help me with this issue? :(

In this file widely used enum forward declarations which completely
incompatible with C++ and need some rewrite.

On Wed, Apr 29, 2015 at 3:17 PM, Pavel Odintsov
 wrote:
> Hello!
>
> I have C++ application compiles and works nice. But when I include
> rte_hash_crc.h header everything goes away.
>
>   CC main.o
> In file included from
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:46:0,
>  from
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_hash_crc.h:48,
>  from /root/interceptor/main.cpp:25:
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:50:6:
> error: use of enum ?rte_cpu_flag_t? without previous declaration
>  enum rte_cpu_flag_t;
>   ^
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:55:6:
> error: use of enum ?cpu_register_t? without previous declaration
>  enum cpu_register_t;
>   ^
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:79:35:
> error: uninitialized const ?cpu_feature_table? [-fpermissive]
>  static const struct feature_entry cpu_feature_table[];
>^
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:64:8:
> note: ?const struct feature_entry? has no user-provided default
> constructor
>  struct feature_entry {
> ^
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:65:11:
> note: and the implicitly-defined constructor does not initialize
> ?uint32_t feature_entry::leaf?
>   uint32_t leaf;/**< cpuid leaf */
>^
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:79:53:
> error: storage size of ?cpu_feature_table? isn?t known
>  static const struct feature_entry cpu_feature_table[];
>  ^
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:101:31:
> error: use of enum ?rte_cpu_flag_t? without previous declaration
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
>^
> In file included from
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_hash_crc.h:48:0,
>  from /root/interceptor/main.cpp:25:
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:
> In function ?int rte_cpu_get_flag_enabled(rte_cpu_flag_t)?:
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:278:53:
> error: conflicting declaration of C function ?int
> rte_cpu_get_flag_enabled(rte_cpu_flag_t)?
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
>  ^
> In file included from
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:46:0,
>  from
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_hash_crc.h:48,
>  from /root/interceptor/main.cpp:25:
> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:101:1:
> note: previous declaration ?int rte_cpu_get_flag_enabled(int)?
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
>  ^
> /usr/src/dpdk-2.0.0/mk/internal/rte.compile-pre.mk:145: recipe for
> target 'main.o' failed
> make[1]: *** [main.o] Error 1
> /usr/src/dpdk-2.0.0/mk/rte.extapp.mk:42: recipe for target 'all' failed
> make: *** [all] Error 2
>
> I prepared my environment with this manual:
> http://www.stableit.ru/2015/04/how-to-code-for-dpdk-with-c.html
>
> Could anybody help me with this header file and C++?
>
> --
> Sincerely yours, Pavel Odintsov



-- 
Sincerely yours, Pavel Odintsov


[dpdk-dev] Issues with rte_hash_crc.h when compiling with C++

2015-05-05 Thread Pavel Odintsov
I fixed this issue with very dirty hacks.

I commented mentioned here lines in file
/usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:

//enum rte_cpu_flag_t;
//enum cpu_register_t;
// static const struct feature_entry cpu_feature_table[];
//static inline int
//rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);

And everything compiled OK. But I thought C++ tests and compatibility
is should for dpdk.

On Tue, May 5, 2015 at 10:57 AM, Pavel Odintsov
 wrote:
> Hello!
>
> Could anybody help me with this issue? :(
>
> In this file widely used enum forward declarations which completely
> incompatible with C++ and need some rewrite.
>
> On Wed, Apr 29, 2015 at 3:17 PM, Pavel Odintsov
>  wrote:
>> Hello!
>>
>> I have C++ application compiles and works nice. But when I include
>> rte_hash_crc.h header everything goes away.
>>
>>   CC main.o
>> In file included from
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:46:0,
>>  from
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_hash_crc.h:48,
>>  from /root/interceptor/main.cpp:25:
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:50:6:
>> error: use of enum ?rte_cpu_flag_t? without previous declaration
>>  enum rte_cpu_flag_t;
>>   ^
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:55:6:
>> error: use of enum ?cpu_register_t? without previous declaration
>>  enum cpu_register_t;
>>   ^
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:79:35:
>> error: uninitialized const ?cpu_feature_table? [-fpermissive]
>>  static const struct feature_entry cpu_feature_table[];
>>^
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:64:8:
>> note: ?const struct feature_entry? has no user-provided default
>> constructor
>>  struct feature_entry {
>> ^
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:65:11:
>> note: and the implicitly-defined constructor does not initialize
>> ?uint32_t feature_entry::leaf?
>>   uint32_t leaf;/**< cpuid leaf */
>>^
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:79:53:
>> error: storage size of ?cpu_feature_table? isn?t known
>>  static const struct feature_entry cpu_feature_table[];
>>  ^
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:101:31:
>> error: use of enum ?rte_cpu_flag_t? without previous declaration
>>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
>>^
>> In file included from
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_hash_crc.h:48:0,
>>  from /root/interceptor/main.cpp:25:
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:
>> In function ?int rte_cpu_get_flag_enabled(rte_cpu_flag_t)?:
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:278:53:
>> error: conflicting declaration of C function ?int
>> rte_cpu_get_flag_enabled(rte_cpu_flag_t)?
>>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
>>  ^
>> In file included from
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_cpuflags.h:46:0,
>>  from
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/rte_hash_crc.h:48,
>>  from /root/interceptor/main.cpp:25:
>> /usr/src/dpdk-2.0.0/x86_64-native-linuxapp-gcc/include/generic/rte_cpuflags.h:101:1:
>> note: previous declaration ?int rte_cpu_get_flag_enabled(int)?
>>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
>>  ^
>> /usr/src/dpdk-2.0.0/mk/internal/rte.compile-pre.mk:145: recipe for
>> target 'main.o' failed
>> make[1]: *** [main.o] Error 1
>> /usr/src/dpdk-2.0.0/mk/rte.extapp.mk:42: recipe for target 'all' failed
>> make: *** [all] Error 2
>>
>> I prepared my environment with this manual:
>> http://www.stableit.ru/2015/04/how-to-code-for-dpdk-with-c.html
>>
>> Could anybody help me with this header file and C++?
>>
>> --
>> Sincerely yours, Pavel Odintsov
>
>
>
> --
> Sincerely yours, Pavel Odintsov



-- 
Sincerely yours, Pavel Odintsov


[dpdk-dev] [PATCH] scripts: fix relpath.sh output when build dir is a symlink

2015-05-05 Thread Olivier Matz
The script relpath.sh return the relative path of the first directory
from the second directory. It is used to generate relative symlinks,
which can be useful if the build directory is embedded in the dpdk
directory: the whole dpdk can be moved without breaking the links,
which is helpful for an installation.

In case the build directory is a symlink, the script was not generating
the proper relative path. Fix this by calling "readlink -f" on the
arguments.

Signed-off-by: Olivier Matz 
---
 scripts/relpath.sh | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/relpath.sh b/scripts/relpath.sh
index 00030e5..7d2f48f 100755
--- a/scripts/relpath.sh
+++ b/scripts/relpath.sh
@@ -43,8 +43,13 @@ if [ $# -ne 2 ]; then
exit 1
 fi

-REL1=${1#/}
-REL2=${2#/}
+# get the real absolute path, derefencing symlinks
+ABS1=$(readlink -f $1)
+ABS2=$(readlink -f $2)
+
+# remove leading slash
+REL1=${ABS1#/}
+REL2=${ABS2#/}

 left1=${REL1%%/*}
 right1=${REL1#*/}
-- 
2.1.4



[dpdk-dev] [PATCH] scripts: fix relpath.sh output when build dir is a symlink

2015-05-05 Thread Olivier MATZ
Hi Simon,

On 05/05/2015 11:00 AM, Olivier Matz wrote:
> The script relpath.sh return the relative path of the first directory
> from the second directory. It is used to generate relative symlinks,
> which can be useful if the build directory is embedded in the dpdk
> directory: the whole dpdk can be moved without breaking the links,
> which is helpful for an installation.
>
> In case the build directory is a symlink, the script was not generating
> the proper relative path. Fix this by calling "readlink -f" on the
> arguments.
>
> Signed-off-by: Olivier Matz 

Can you have a try with this patch?

Regards,
Olivier




[dpdk-dev] [PATCH] scripts: fix relpath.sh output when build dir is a symlink

2015-05-05 Thread Simon Kågström
On 2015-05-05 11:00, Olivier MATZ wrote:
> On 05/05/2015 11:00 AM, Olivier Matz wrote:
>> The script relpath.sh return the relative path of the first directory
>> from the second directory. It is used to generate relative symlinks,
>> which can be useful if the build directory is embedded in the dpdk
>> directory: the whole dpdk can be moved without breaking the links,
>> which is helpful for an installation.
>>
>> In case the build directory is a symlink, the script was not generating
>> the proper relative path. Fix this by calling "readlink -f" on the
>> arguments.
> 
> Can you have a try with this patch?

Yes, this fixes my build issue, thanks!

Verified-by: Simon Kagstrom 

// Simon


[dpdk-dev] [PATCH 1/3] lib: set LDLIBS for each library

2015-05-05 Thread Gonzalez Monroy, Sergio
On 04/05/2015 08:55, Olivier MATZ wrote:
> Hi Sergio,
>
> On 04/15/2015 11:30 AM, Sergio Gonzalez Monroy wrote:
>> This patch introduces a new LDLIBS variable to be set per library.
>> Its purpose is to especify the library's dependent libraries to
>> be explicitly linked against.
>>
>> Given the circular dependencies between eal, malloc, mempool and ring,
>> we work around it by not linking eal against its dependent DPDK libraries.
>> Therefore, eal will not have proper DT_NEEDED entries (ie. no DT_NEEDED
>> entries for librte_malloc and librte_mempool).
>>
>> This means that any application that links against eal, needs to be
>> certain of linking against malloc, mempool and ring too, to prevent
>> a case where the application does not directly use mempool (therefore
>> no DT_NEEDED entry). In such case, the application will fail to start as
>> eal does not have a DT_NEEDED entry for mempool either.
>>
>> Signed-off-by: Sergio Gonzalez Monroy 
>> ---
>> [...]
>> --- a/lib/librte_ip_frag/Makefile
>> +++ b/lib/librte_ip_frag/Makefile
>> @@ -41,6 +41,8 @@ EXPORT_MAP := rte_ipfrag_version.map
>>   
>>   LIBABIVER := 1
>>   
>> +LDLIBS += -lrte_eal -lrte_malloc -lethdev
>> +
>>   #source files
>>   SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c
>>   SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_fragmentation.c
>> @@ -55,5 +57,6 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += 
>> rte_ip_frag.h
>>   
>>   # this library depends on rte_ether
>>   DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool lib/librte_ether
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_malloc lib/librte_mbuf
>>   
>>   include $(RTE_SDK)/mk/rte.lib.mk
> It seems that in the rest of the patch LDLIBS and and DEPDIRS-y are
> often similar, but that's not the case here. Can you confirm it's
> because librte_ip_frag only uses inlined functions from librte_mempool
> and librte_mbuf?
>
I have rebased the code a few times, so I may have missed some info :)
I did try to update DEPDIRS with required info but it was all manual work.

As you menion, LDLIBS and DEPDIRS are mostly similar but in DEPDIRS we 
also specify  dirs
that would install required headers such as ether, mempool and mbuf.

By the way, I just realized that -lethdev is not required. It is not an 
issue because we link
with --as-needed so only required libs would have DT_NEEDED entries (if 
you do ldd on the
shared library you would not see a dependency for ethdev).

I could have added eal too as DEPDIRS, but it is already implicit by 
having mempool and malloc.

> Did you use a specific scripting method to find the good value for
> LDLIBS or did you do it manually? Is there a way to check that the
> values are correct?
>
I did add '-z defs' to LDLIBS (except for eal) so the linker would 
complain if any of the
symbols was not resolve at build time. With this flag we can check that 
all the required libs
are specified. If we specify libs that are not required, the linker 
would ignored them anyway.

So initially I set LDLIBS to the values of DEPDIRS, then checked that if 
some libraries were still
required, finally updating proper LDLIBS. To answer your question, I 
guess all was manual work.

My first patch did add this flag by default when linking libs but I 
removed as we decided
to leave eal without proper DT_NEEDED entries.

> Thanks,
> Olivier
>
>
I'll be sending a v2 with corrected commit for the last patch and I'll 
remove -lether from
librte_ip_frag.

Let me know if you have any other comments/suggestions.

Thanks for the review!
Sergio

>> diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
>> index 16defdb..fab6f5f 100644
>> --- a/lib/librte_ivshmem/Makefile
>> +++ b/lib/librte_ivshmem/Makefile
>> @@ -40,6 +40,8 @@ EXPORT_MAP := rte_ivshmem_version.map
>>   
>>   LIBABIVER := 1
>>   
>> +LDLIBS += -lrte_mempool
>> +
>>   # all source are stored in SRCS-y
>>   SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
>>   
>> diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile
>> index 136a448..04589d4 100644
>> --- a/lib/librte_jobstats/Makefile
>> +++ b/lib/librte_jobstats/Makefile
>> @@ -41,6 +41,8 @@ EXPORT_MAP := rte_jobstats_version.map
>>   
>>   LIBABIVER := 1
>>   
>> +LDLIBS += -lrte_eal
>> +
>>   # all source are stored in SRCS-y
>>   SRCS-$(CONFIG_RTE_LIBRTE_JOBSTATS) := rte_jobstats.c
>>   
>> diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
>> index 7107832..504ecf7 100644
>> --- a/lib/librte_kni/Makefile
>> +++ b/lib/librte_kni/Makefile
>> @@ -40,6 +40,8 @@ EXPORT_MAP := rte_kni_version.map
>>   
>>   LIBABIVER := 1
>>   
>> +LDLIBS += -lrte_eal -lrte_malloc -lethdev
>> +
>>   # all source are stored in SRCS-y
>>   SRCS-$(CONFIG_RTE_LIBRTE_KNI) := rte_kni.c
>>   
>> diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile
>> index 87b09f2..173e1ac 100644
>> --- a/lib/librte_kvargs/Makefile
>> +++ b/lib/librte_kvargs/Makefile
>> @@ -42,6 +42,8 @@ EXPORT_MAP := rte_kvar

[dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for QinQ support

2015-05-05 Thread Ananyev, Konstantin
Hi Helin,

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Helin Zhang
> Sent: Tuesday, May 05, 2015 3:32 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for QinQ 
> support
> 
> To support QinQ, 'vlan_tci' should be replaced by 'vlan_tci0' and
> 'vlan_tci1'. Also new offload flags of 'PKT_RX_QINQ_PKT' and
> 'PKT_TX_QINQ_PKT' should be added.
> 
> Signed-off-by: Helin Zhang 
> ---
>  app/test-pmd/flowgen.c|  2 +-
>  app/test-pmd/macfwd.c |  2 +-
>  app/test-pmd/macswap.c|  2 +-
>  app/test-pmd/rxonly.c |  2 +-
>  app/test-pmd/txonly.c |  2 +-
>  app/test/packet_burst_generator.c |  4 ++--
>  lib/librte_ether/rte_ether.h  |  4 ++--
>  lib/librte_mbuf/rte_mbuf.h| 22 +++---
>  lib/librte_pmd_e1000/em_rxtx.c|  8 
>  lib/librte_pmd_e1000/igb_rxtx.c   |  8 
>  lib/librte_pmd_enic/enic_ethdev.c |  2 +-
>  lib/librte_pmd_enic/enic_main.c   |  2 +-
>  lib/librte_pmd_fm10k/fm10k_rxtx.c |  2 +-
>  lib/librte_pmd_i40e/i40e_rxtx.c   |  8 
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +--
>  lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c |  6 +++---
>  16 files changed, 51 insertions(+), 36 deletions(-)
> 
> diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
> index 72016c9..f24b00c 100644
> --- a/app/test-pmd/flowgen.c
> +++ b/app/test-pmd/flowgen.c
> @@ -207,7 +207,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
>   pkt->nb_segs= 1;
>   pkt->pkt_len= pkt_size;
>   pkt->ol_flags   = ol_flags;
> - pkt->vlan_tci   = vlan_tci;
> + pkt->vlan_tci0  = vlan_tci;
>   pkt->l2_len = sizeof(struct ether_hdr);
>   pkt->l3_len = sizeof(struct ipv4_hdr);
>   pkts_burst[nb_pkt]  = pkt;
> diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c
> index 035e5eb..590b613 100644
> --- a/app/test-pmd/macfwd.c
> +++ b/app/test-pmd/macfwd.c
> @@ -120,7 +120,7 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
>   mb->ol_flags = ol_flags;
>   mb->l2_len = sizeof(struct ether_hdr);
>   mb->l3_len = sizeof(struct ipv4_hdr);
> - mb->vlan_tci = txp->tx_vlan_id;
> + mb->vlan_tci0 = txp->tx_vlan_id;
>   }
>   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
>   fs->tx_packets += nb_tx;
> diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c
> index 6729849..c355399 100644
> --- a/app/test-pmd/macswap.c
> +++ b/app/test-pmd/macswap.c
> @@ -122,7 +122,7 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
>   mb->ol_flags = ol_flags;
>   mb->l2_len = sizeof(struct ether_hdr);
>   mb->l3_len = sizeof(struct ipv4_hdr);
> - mb->vlan_tci = txp->tx_vlan_id;
> + mb->vlan_tci0 = txp->tx_vlan_id;
>   }
>   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
>   fs->tx_packets += nb_tx;
> diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
> index ac56090..aa2cf7f 100644
> --- a/app/test-pmd/rxonly.c
> +++ b/app/test-pmd/rxonly.c
> @@ -159,7 +159,7 @@ pkt_burst_receive(struct fwd_stream *fs)
>  mb->hash.fdir.hash, mb->hash.fdir.id);
>   }
>   if (ol_flags & PKT_RX_VLAN_PKT)
> - printf(" - VLAN tci=0x%x", mb->vlan_tci);
> + printf(" - VLAN tci=0x%x", mb->vlan_tci0);
>   if (is_encapsulation) {
>   struct ipv4_hdr *ipv4_hdr;
>   struct ipv6_hdr *ipv6_hdr;
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> index ca32c85..4a2827f 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -266,7 +266,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
>   pkt->nb_segs = tx_pkt_nb_segs;
>   pkt->pkt_len = tx_pkt_length;
>   pkt->ol_flags = ol_flags;
> - pkt->vlan_tci  = vlan_tci;
> + pkt->vlan_tci0  = vlan_tci;
>   pkt->l2_len = sizeof(struct ether_hdr);
>   pkt->l3_len = sizeof(struct ipv4_hdr);
>   pkts_burst[nb_pkt] = pkt;
> diff --git a/app/test/packet_burst_generator.c 
> b/app/test/packet_burst_generator.c
> index b46eed7..959644c 100644
> --- a/app/test/packet_burst_generator.c
> +++ b/app/test/packet_burst_generator.c
> @@ -270,7 +270,7 @@ nomore_mbuf:
>   pkt->l2_len = eth_hdr_size;
> 
>   if (ipv4) {
> - pkt->vlan_tci  = ETHER_TYPE_IPv4;
> + pkt->vlan_tci0  = ETHER_TYPE_IPv4;
>   pkt->l3_len = sizeof(struct ipv4_hdr);
> 
>   if (vlan_enabled)
> @@ -278,7 +278,7 @@ nomore_mbuf

[dpdk-dev] [PATCH 1/3] lib: set LDLIBS for each library

2015-05-05 Thread Olivier MATZ
Hi,

On 05/05/2015 11:21 AM, Gonzalez Monroy, Sergio wrote:
> On 04/05/2015 08:55, Olivier MATZ wrote:
>> Hi Sergio,
>>
>> On 04/15/2015 11:30 AM, Sergio Gonzalez Monroy wrote:
>>> This patch introduces a new LDLIBS variable to be set per library.
>>> Its purpose is to especify the library's dependent libraries to
>>> be explicitly linked against.
>>>
>>> Given the circular dependencies between eal, malloc, mempool and ring,
>>> we work around it by not linking eal against its dependent DPDK
>>> libraries.
>>> Therefore, eal will not have proper DT_NEEDED entries (ie. no DT_NEEDED
>>> entries for librte_malloc and librte_mempool).
>>>
>>> This means that any application that links against eal, needs to be
>>> certain of linking against malloc, mempool and ring too, to prevent
>>> a case where the application does not directly use mempool (therefore
>>> no DT_NEEDED entry). In such case, the application will fail to start as
>>> eal does not have a DT_NEEDED entry for mempool either.
>>>
>>> Signed-off-by: Sergio Gonzalez Monroy 
>>> ---
>>> [...]
>>> --- a/lib/librte_ip_frag/Makefile
>>> +++ b/lib/librte_ip_frag/Makefile
>>> @@ -41,6 +41,8 @@ EXPORT_MAP := rte_ipfrag_version.map
>>>   LIBABIVER := 1
>>> +LDLIBS += -lrte_eal -lrte_malloc -lethdev
>>> +
>>>   #source files
>>>   SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c
>>>   SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_fragmentation.c
>>> @@ -55,5 +57,6 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include +=
>>> rte_ip_frag.h
>>>   # this library depends on rte_ether
>>>   DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool
>>> lib/librte_ether
>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_malloc
>>> lib/librte_mbuf
>>>   include $(RTE_SDK)/mk/rte.lib.mk
>> It seems that in the rest of the patch LDLIBS and and DEPDIRS-y are
>> often similar, but that's not the case here. Can you confirm it's
>> because librte_ip_frag only uses inlined functions from librte_mempool
>> and librte_mbuf?
>>
> I have rebased the code a few times, so I may have missed some info :)
> I did try to update DEPDIRS with required info but it was all manual work.
>
> As you menion, LDLIBS and DEPDIRS are mostly similar but in DEPDIRS we
> also specify  dirs
> that would install required headers such as ether, mempool and mbuf.
>
> By the way, I just realized that -lethdev is not required. It is not an
> issue because we link
> with --as-needed so only required libs would have DT_NEEDED entries (if
> you do ldd on the
> shared library you would not see a dependency for ethdev).
>
> I could have added eal too as DEPDIRS, but it is already implicit by
> having mempool and malloc.
>
>> Did you use a specific scripting method to find the good value for
>> LDLIBS or did you do it manually? Is there a way to check that the
>> values are correct?
>>
> I did add '-z defs' to LDLIBS (except for eal) so the linker would
> complain if any of the
> symbols was not resolve at build time. With this flag we can check that
> all the required libs
> are specified. If we specify libs that are not required, the linker
> would ignored them anyway.
>
> So initially I set LDLIBS to the values of DEPDIRS, then checked that if
> some libraries were still
> required, finally updating proper LDLIBS. To answer your question, I
> guess all was manual work.
>
> My first patch did add this flag by default when linking libs but I
> removed as we decided
> to leave eal without proper DT_NEEDED entries.
>
>> Thanks,
>> Olivier
>>
>>
> I'll be sending a v2 with corrected commit for the last patch and I'll
> remove -lether from
> librte_ip_frag.
>
> Let me know if you have any other comments/suggestions.

The rest of the patch looks good to me. The commit log describes
pretty well what changes are done, but maybe adding some words
about why we need this change would be even better, as the cover letter
won't appear in git history.

Regards,
Olivier



>
> Thanks for the review!
> Sergio
>
>>> diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
>>> index 16defdb..fab6f5f 100644
>>> --- a/lib/librte_ivshmem/Makefile
>>> +++ b/lib/librte_ivshmem/Makefile
>>> @@ -40,6 +40,8 @@ EXPORT_MAP := rte_ivshmem_version.map
>>>   LIBABIVER := 1
>>> +LDLIBS += -lrte_mempool
>>> +
>>>   # all source are stored in SRCS-y
>>>   SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
>>> diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile
>>> index 136a448..04589d4 100644
>>> --- a/lib/librte_jobstats/Makefile
>>> +++ b/lib/librte_jobstats/Makefile
>>> @@ -41,6 +41,8 @@ EXPORT_MAP := rte_jobstats_version.map
>>>   LIBABIVER := 1
>>> +LDLIBS += -lrte_eal
>>> +
>>>   # all source are stored in SRCS-y
>>>   SRCS-$(CONFIG_RTE_LIBRTE_JOBSTATS) := rte_jobstats.c
>>> diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
>>> index 7107832..504ecf7 100644
>>> --- a/lib/librte_kni/Makefile
>>> +++ b/lib/librte_kni/Makefile
>>> @@ -40,6 +40,8 @@ EXPORT_MAP := rte_kni_version.m

[dpdk-dev] [PATCH] kni: fix compilation issue on kernel 3.19

2015-05-05 Thread De Lara Guarch, Pablo


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Pablo de Lara
> Sent: Monday, May 04, 2015 10:46 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH] kni: fix compilation issue on kernel 3.19
> 
> Due to commit c0371da6 in kernel 3.19, which removed msg_iov
> and msg_iovlen from struct msghdr, DPDK would not build.
> 
> This patch makes use of struct iov_iter, which has references
> to those two variables.
> 
> Reported-by: Thomas Monjalon 
> Signed-off-by: Pablo de Lara 

Self-NACK. Kernel 4.0 has other changes that require other fixes, incompatible 
with this patch.
Will send a v2 compatible with both kernels 3.19 and 4.0


[dpdk-dev] Error about dpdk ovs on AWS VM

2015-05-05 Thread topperxin
Hi list
 I am doing something about compile DPDK OVS on AWS VM, while I met 
some problem,
 1. I can only set the hugepagesize to 2M, not to 1G, the reason is the 
cpu only has
 flag 'pse', which support only 2M huge page size, while not has 
pdpe1gb, which support 1G huge
 page size. So, if I only use 2M huge page size, can I running DPDK 
OVS ?
 2. When I run the command:
  /vswitchd/ovs-vswitchd --dpdk -c 0x1 -n 2 -- unix:$DB_SOCK 
--pidfile --detach

 I will get some Error log, such as below, and then when I want to 
bridge one of the containers NIC as dpdk type, I will get error as below:
=
 ovs-vsctl: Error detected while setting up '1eb915d295254_l'.  See 
ovs-vswitchd log for details.
=

Who can give me some advice on how to debug these errors? thanks a lot.


2015-05-05T11:32:29Z|1|dpdk|INFO|No cuse_dev_name provided - defaulting to 
/dev/vhost-net
EAL: Detected lcore 0 as core 0 on socket 0
EAL: Detected lcore 1 as core 0 on socket 0
EAL: Support maximum 128 logical core(s) by configuration.
EAL: Detected 2 lcore(s)
EAL: VFIO modules not all loaded, skip VFIO support...
EAL: Setting up memory...
EAL: Ask a virtual area of 0xe0 bytes
EAL: Virtual area found at 0x7f61d340 (size = 0xe0)
EAL: Ask a virtual area of 0x20 bytes
EAL: Virtual area found at 0x7f61d300 (size = 0x20)
EAL: Ask a virtual area of 0x3ec0 bytes
EAL: Virtual area found at 0x7f619420 (size = 0x3ec0)
EAL: Ask a virtual area of 0x20 bytes
EAL: Virtual area found at 0x7f6193e0 (size = 0x20)
EAL: Ask a virtual area of 0x20 bytes
EAL: Virtual area found at 0x7f6193a0 (size = 0x20)
EAL: Requesting 512 pages of size 2MB from socket 0
EAL: TSC frequency is ~2793323 KHz
EAL: WARNING: cpu flags constant_tsc=yes nonstop_tsc=no -> using unreliable 
clock cycles !
EAL: Master lcore 0 is ready (tid=d632db40;cpuset=[0])
EAL: PCI device :00:03.0 on NUMA socket -1
EAL:   probe driver: 8086:10ed rte_ixgbevf_pmd
EAL:   PCI memory mapped at 0x7f61d420
EAL:   PCI memory mapped at 0x7f61d4204000
PMD: eth_ixgbevf_dev_init(): port 0 vendorID=0x8086 deviceID=0x10ed 
mac.type=ixgbe_mac_82599_vf
EAL: PCI device :00:04.0 on NUMA socket -1
EAL:   probe driver: 8086:10ed rte_ixgbevf_pmd
EAL:   Not managed by a supported kernel driver, skipped
Zone 0: name:, phys:0x3700, len:0xb0, 
virt:0x7f61d340, socket_id:0, flags:0
Zone 1: name:, phys:0xa780, len:0x2080, 
virt:0x7f61d300, socket_id:0, flags:0
Zone 2: name:, phys:0x37b0, len:0x28a0c0, 
virt:0x7f61d3f0, socket_id:0, flags:0
Zone 3: name:, phys:0x37d8a0c0, len:0x1f400, 
virt:0x7f61d418a0c0, socket_id:0, flags:0
2015-05-05T11:32:30Z|2|ovs_numa|INFO|Discovered 2 CPU cores on NUMA node 0
2015-05-05T11:32:30Z|3|ovs_numa|INFO|Discovered 1 NUMA nodes and 2 CPU cores
2015-05-05T11:32:30Z|4|reconnect|INFO|unix:/usr/local/var/run/openvswitch/db.sock:
 connecting...
VHOST_CONFIG: char device /dev/vhost-net already exists
2015-05-05T11:32:30Z|5|dpdk|ERR|CUSE device setup failure.
2015-05-05T11:32:30Z|6|netdev|ERR|failed to initialize dpdkvhost network 
device class: Unknown error -1
2015-05-05T11:32:30Z|7|reconnect|INFO|unix:/usr/local/var/run/openvswitch/db.sock:
 connected
2015-05-05T11:32:30Z|8|ofproto_dpif|INFO|netdev at ovs-netdev: Datapath 
supports recirculation
2015-05-05T11:32:30Z|9|ofproto_dpif|INFO|netdev at ovs-netdev: MPLS label 
stack length probed as 3
2015-05-05T11:32:30Z|00010|ofproto_dpif|INFO|netdev at ovs-netdev: Datapath 
supports unique flow ids
2015-05-05T11:32:30Z|00011|bridge|INFO|bridge br0: added interface br0 on port 
65534
2015-05-05T11:32:30Z|00012|netdev_linux|WARN|br0: obtaining netdev stats via 
vport failed (No such device)
PMD: ixgbevf_dev_configure(): Configured Virtual Function port id: 0
PMD: ixgbevf_dev_configure(): VF can't disable HW CRC Strip
PMD: ixgbe_dev_tx_queue_setup(): sw_ring=0x7f61d3ef4000 hw_ring=0x7f61d41a9500 
dma_addr=0x37da9500
PMD: ixgbe_set_tx_function(): Using simple tx code path
PMD: ixgbe_set_tx_function(): Vector tx enabled.
PMD: check_rx_burst_bulk_alloc_preconditions(): Rx Burst Bulk Alloc 
Preconditions: rxq->rx_free_thresh=0, RTE_PMD_IXGBE_RX_MAX_BURST=32
PMD: ixgbe_dev_rx_queue_setup(): queue[0] doesn't meet Rx Bulk Alloc 
preconditions - canceling the feature for the whole port[0]
PMD: ixgbe_dev_rx_queue_setup(): sw_ring=0x7f61d3eefc80 sw_rsc_ring=(nil) 
hw_ring=0x7f61d41b9500 dma_addr=0x37db9500
PMD: ixgbevf_dev_rx_init(): forcing scatter mode
2015-05-05T11:32:30Z|00013|dpdk|INFO|Port 0: 06:33:ae:98:8c:8c
PMD: ixgbevf_dev_configure(): Configured Virtual Function port id: 0
PMD: ixgbevf_dev_configure(): VF can't disable HW CRC Strip
PMD: ixgbe_dev_tx_queue_setup(): sw

[dpdk-dev] [PATCH v2] kni: fix compilation issue in KNI vhost on kernel 3.19/4.0

2015-05-05 Thread Pablo de Lara
Due to commit c0371da6 in kernel 3.19, which removed msg_iov
and msg_iovlen from struct msghdr, DPDK would not build.
Also, functions memcpy_toiovecend and memcpy_fromiovecend
were removed in commits ba7438ae and 57dd8a07, being substituted by
copy_from_iter and copy_to_iter.

This patch makes use of struct iov_iter, which has references
to msg_iov and msg_iovln, and makes use of copy_from_iter
and copy_to_iter.

Reported-by: Thomas Monjalon 
Signed-off-by: Pablo de Lara 
---
 lib/librte_eal/linuxapp/kni/compat.h|4 +++
 lib/librte_eal/linuxapp/kni/kni_vhost.c |   33 +++---
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/compat.h 
b/lib/librte_eal/linuxapp/kni/compat.h
index 1313523..1ad22ba 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -19,3 +19,7 @@
 #define sk_sleep(s) (s)->sk_sleep

 #endif /* < 2.6.35 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
+#define HAVE_IOV_ITER_MSGHDR
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c 
b/lib/librte_eal/linuxapp/kni/kni_vhost.c
index 7141f83..a35fa92 100644
--- a/lib/librte_eal/linuxapp/kni/kni_vhost.c
+++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c
@@ -76,7 +76,7 @@ static struct proto kni_raw_proto = {
 };

 static inline int
-kni_vhost_net_tx(struct kni_dev *kni, struct iovec *iov,
+kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
 unsigned offset, unsigned len)
 {
struct rte_kni_mbuf *pkt_kva = NULL;
@@ -84,7 +84,11 @@ kni_vhost_net_tx(struct kni_dev *kni, struct iovec *iov,
int ret;

KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
-  offset, len, (int)iov->iov_len);
+#ifdef HAVE_IOV_ITER_MSGHDR
+  offset, len, (int)m->msg_iter.iov->iov_len);
+#else
+  offset, len, (int)m->iov->iov_len);
+#endif

/**
 * Check if it has at least one free entry in tx_q and
@@ -108,7 +112,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct iovec *iov,
data_kva = pkt_kva->buf_addr + pkt_kva->data_off
   - kni->mbuf_va + kni->mbuf_kva;

-   memcpy_fromiovecend(data_kva, iov, offset, len);
+#ifdef HAVE_IOV_ITER_MSGHDR
+   copy_from_iter(data_kva, len, &m->msg_iter);
+#else
+   memcpy_fromiovecend(data_kva, m->iov, offset, len);
+#endif
+
if (unlikely(len < ETH_ZLEN)) {
memset(data_kva + len, 0, ETH_ZLEN - len);
len = ETH_ZLEN;
@@ -143,7 +152,7 @@ drop:
 }

 static inline int
-kni_vhost_net_rx(struct kni_dev *kni, struct iovec *iov,
+kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
 unsigned offset, unsigned len)
 {
uint32_t pkt_len;
@@ -177,10 +186,18 @@ kni_vhost_net_rx(struct kni_dev *kni, struct iovec *iov,
goto drop;

KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
-  offset, len, pkt_len, (int)iov->iov_len);
+#ifdef HAVE_IOV_ITER_MSGHDR
+  offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
+#else
+  offset, len, pkt_len, (int)m->iov->iov_len);
+#endif

data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva;
-   if (unlikely(memcpy_toiovecend(iov, data_kva, offset, pkt_len)))
+#ifdef HAVE_IOV_ITER_MSGHDR
+   if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter)))
+#else
+   if (unlikely(memcpy_toiovecend(m->iov, data_kva, offset, pkt_len)))
+#endif
goto drop;

/* Update statistics */
@@ -362,7 +379,7 @@ kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz))
return -EINVAL;

-   return kni_vhost_net_tx(q->kni, m->msg_iov, vnet_hdr_len, len);
+   return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len);
 }

 static int
@@ -391,7 +408,7 @@ kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock,
 #endif

if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
-   m->msg_iov, vnet_hdr_len, len
+   m, vnet_hdr_len, len
return 0;

 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
-- 
1.7.4.1



[dpdk-dev] [PATCH] eal/bsdapp: fix compilation on FreeBSD

2015-05-05 Thread Bruce Richardson
Fixes: 6065355a "pci: make device id tables const"

Following the above commit, compilation on FreeBSD with clang was broken,
giving the error message:

.../lib/librte_eal/bsdapp/eal/eal_pci.c:438:16: fatal error: assigning to
  'struct rte_pci_id *' from 'const struct rte_pci_id *' discards qualifiers
  [-Wincompatible-pointer-types-discards-qualifiers]
for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) {
  ^ 

This patch fixes the issue by adding "const" to the type of id_table.

Signed-off-by: Bruce Richardson 
---
 lib/librte_eal/bsdapp/eal/eal_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c 
b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 30f0232..61e8921 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -432,7 +432,7 @@ error:
 int
 rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device 
*dev)
 {
-   struct rte_pci_id *id_table;
+   const struct rte_pci_id *id_table;
int ret;

for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) {
-- 
2.1.0



[dpdk-dev] [PATCH v2] Add toeplitz hash algorithm used by RSS

2015-05-05 Thread Vladimir Medvedkin
Software implementation of the Toeplitz hash function used by RSS.
Can be used either for packet distribution on single queue NIC
or for simulating of RSS computation on specific NIC (for example
after GRE header decapsulating).

v2 changes
- Add ipv6 support
- Various style fixes

Signed-off-by: Vladimir Medvedkin 
---
 lib/librte_hash/Makefile|   1 +
 lib/librte_hash/rte_thash.h | 209 
 2 files changed, 210 insertions(+)
 create mode 100644 lib/librte_hash/rte_thash.h

diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
index 3696cb1..981230b 100644
--- a/lib/librte_hash/Makefile
+++ b/lib/librte_hash/Makefile
@@ -49,6 +49,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h

 # this lib needs eal
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
new file mode 100644
index 000..42c7bf6
--- /dev/null
+++ b/lib/librte_hash/rte_thash.h
@@ -0,0 +1,209 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_THASH_H
+#define _RTE_THASH_H
+
+/**
+ * @file
+ *
+ * toeplitz hash functions.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Software implementation of the Toeplitz hash function used by RSS.
+ * Can be used either for packet distribution on single queue NIC
+ * or for simulating of RSS computation on specific NIC (for example
+ * after GRE header decapsulating)
+ */
+
+#include 
+#include 
+#include 
+
+#ifdef __SSE3__
+static const __m128i bswap_mask = {0x0405060700010203, 0x0C0D0E0F08090A0B};
+#endif
+
+enum rte_thash_len {
+   RTE_THASH_V4_L3 = 2,/*calculate hash of ipv4 header only*/
+   RTE_THASH_V4_L4 = 3,/*calculate hash of ipv4 + transport headers*/
+   RTE_THASH_V6_L3 = 8,/*calculate hash of ipv6 header only */
+   RTE_THASH_V6_L4 = 9,/*calculate hash of ipv6 + transport headers */
+};
+
+/**
+ * IPv4 tuple
+ * addreses and ports have to be CPU byte order
+ */
+struct rte_ipv4_tuple {
+   uint32_tsrc_addr;
+   uint32_tdst_addr;
+   uint16_tdport;
+   uint16_tsport;
+};
+
+/**
+ * IPv6 tuple
+ * Addresses have to be filled by rte_thash_load_v6_addr()
+ * ports have to be CPU byte order
+ */
+struct rte_ipv6_tuple {
+   uint8_t src_addr[16];
+   uint8_t dst_addr[16];
+   uint16_tdport;
+   uint16_tsport;
+};
+
+union rte_thash_tuple {
+   struct rte_ipv4_tuple   v4;
+   struct rte_ipv6_tuple   v6;
+} __attribute__((aligned(16)));
+
+/**
+ * Prepare special converted key to use with rte_softrss_be()
+ * @param orig
+ *   pointer to original RSS key
+ * @param targ
+ *   pointer to target RSS key
+ * @param len
+ *   RSS key length
+ */
+static inline void
+rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len)
+{
+   int i;
+
+   for (i = 0; i < (len >> 2); i++) {
+   targ[i] = rte_be_to_cpu_32(orig[i]);
+   }
+}
+
+/**
+ * Prepare and load IPv6 address
+ * @param orig
+ *   Pointer to 

[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Neil Horman
On Mon, May 04, 2015 at 10:25:00PM -0500, Jim Thompson wrote:
> 
> > On May 4, 2015, at 10:12 PM, Wiles, Keith  wrote:
> > 
> > 
> > 
> > On 5/4/15, 10:48 AM, "Matthew Hall"  wrote:
> > 
> >> On Mon, May 04, 2015 at 12:43:48PM +, Qiu, Michael wrote:
> >>> What mail client do you use? I think  mail client supporting thread mode
> >>> is important for patch review.
> >> 
> >> Like many UNIX people, I use mutt.
> >> 
> >> My concern is that, if we're making the widespread adoption, usage, and
> >> contributions for DPDK dependent on selection or debate of the features
> >> of 
> >> various MUAs, I'm not sure that we're looking at this from the right
> >> angle.
> >> 
> >> I'm just trying to figure out how to get DPDK in the place where the most
> >> eyeballs are, rather than trying to drag the eyeballs to the place where
> >> DPDK 
> >> is.
> > 
> > +1, I agree with this statement completely and I feel discussions about an
> > MUA is non-productive and out of scope.
> 
> +1.  I?ve avoided the whole discussion, because ? ok, ?non-productive and out 
> of scope? is a polite way of saying it.
> 
> jim
> 
> 

Very well, since you seem to want to avoid talking about ways to get what you
want in a workflow, lets go back to where the conversation started:

http://dpdk.org/ml/archives/dev/2015-May/017225.html

We got into this debate because you wanted to move the project to github, and as
supporting reasons, listed a plethora of features that you liked about the site.
This entire subtread has been meant to illustrate how you can have the features
you want that you see as adventageous in the github environment without actualy
moving to github.  We've focused on email quote collapsing because we kept
responding to one another, though I'm sure we could have the same debate on any
one of the workflow features github offers.

Can we all agree then, that for the list posted in your email above, any github
environmental feature can be recreated with proper tooling, available today,
without forcing the github environment on everybody?  Further, can we agree
that, given that those features are not unique to github, they are not
compelling reasons to move the project there?

Neil



[dpdk-dev] [PATCH v2] kni: fix compilation issue in KNI vhost on kernel 3.19/4.0

2015-05-05 Thread De Lara Guarch, Pablo


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Pablo de Lara
> Sent: Tuesday, May 05, 2015 1:35 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2] kni: fix compilation issue in KNI vhost on
> kernel 3.19/4.0
> 
> Due to commit c0371da6 in kernel 3.19, which removed msg_iov
> and msg_iovlen from struct msghdr, DPDK would not build.
> Also, functions memcpy_toiovecend and memcpy_fromiovecend
> were removed in commits ba7438ae and 57dd8a07, being substituted by
> copy_from_iter and copy_to_iter.
> 
> This patch makes use of struct iov_iter, which has references
> to msg_iov and msg_iovln, and makes use of copy_from_iter
> and copy_to_iter.
> 
> Reported-by: Thomas Monjalon 
> Signed-off-by: Pablo de Lara 

Self-NACK again. Missing checks and wrong variable names.


[dpdk-dev] [PATCH v3] kni: fix compilation issue in KNI vhost on kernel 3.19/4.0

2015-05-05 Thread Pablo de Lara
Due to commit c0371da6 in kernel 3.19, which removed msg_iov
and msg_iovlen from struct msghdr, DPDK would not build.
Also, functions memcpy_toiovecend and memcpy_fromiovecend
were removed in commits ba7438ae and 57dd8a07, being substituted by
copy_from_iter and copy_to_iter.

This patch makes use of struct iov_iter, which has references
to msg_iov and msg_iovln, and makes use of copy_from_iter
and copy_to_iter.

Changes in v2:
- Replaced functions memcpy_toiovecend and memcpy_fromiovecend
  with copy_from_iter and copy_to_iter

Changes in v3:
- Fixed variable names
- Add missing checks

Reported-by: Thomas Monjalon 
Signed-off-by: Pablo de Lara 
---
 lib/librte_eal/linuxapp/kni/compat.h|4 +++
 lib/librte_eal/linuxapp/kni/kni_vhost.c |   37 --
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/compat.h 
b/lib/librte_eal/linuxapp/kni/compat.h
index 1313523..1ad22ba 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -19,3 +19,7 @@
 #define sk_sleep(s) (s)->sk_sleep

 #endif /* < 2.6.35 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
+#define HAVE_IOV_ITER_MSGHDR
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c 
b/lib/librte_eal/linuxapp/kni/kni_vhost.c
index 7141f83..83d3351 100644
--- a/lib/librte_eal/linuxapp/kni/kni_vhost.c
+++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c
@@ -76,7 +76,7 @@ static struct proto kni_raw_proto = {
 };

 static inline int
-kni_vhost_net_tx(struct kni_dev *kni, struct iovec *iov,
+kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
 unsigned offset, unsigned len)
 {
struct rte_kni_mbuf *pkt_kva = NULL;
@@ -84,7 +84,11 @@ kni_vhost_net_tx(struct kni_dev *kni, struct iovec *iov,
int ret;

KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
-  offset, len, (int)iov->iov_len);
+#ifdef HAVE_IOV_ITER_MSGHDR
+  offset, len, (int)m->msg_iter.iov->iov_len);
+#else
+  offset, len, (int)m->msg_iov->iov_len);
+#endif

/**
 * Check if it has at least one free entry in tx_q and
@@ -108,7 +112,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct iovec *iov,
data_kva = pkt_kva->buf_addr + pkt_kva->data_off
   - kni->mbuf_va + kni->mbuf_kva;

-   memcpy_fromiovecend(data_kva, iov, offset, len);
+#ifdef HAVE_IOV_ITER_MSGHDR
+   copy_from_iter(data_kva, len, &m->msg_iter);
+#else
+   memcpy_fromiovecend(data_kva, m->msg_iov, offset, len);
+#endif
+
if (unlikely(len < ETH_ZLEN)) {
memset(data_kva + len, 0, ETH_ZLEN - len);
len = ETH_ZLEN;
@@ -143,7 +152,7 @@ drop:
 }

 static inline int
-kni_vhost_net_rx(struct kni_dev *kni, struct iovec *iov,
+kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
 unsigned offset, unsigned len)
 {
uint32_t pkt_len;
@@ -177,10 +186,18 @@ kni_vhost_net_rx(struct kni_dev *kni, struct iovec *iov,
goto drop;

KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
-  offset, len, pkt_len, (int)iov->iov_len);
+#ifdef HAVE_IOV_ITER_MSGHDR
+  offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
+#else
+  offset, len, pkt_len, (int)m->msg_iov->iov_len);
+#endif

data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva;
-   if (unlikely(memcpy_toiovecend(iov, data_kva, offset, pkt_len)))
+#ifdef HAVE_IOV_ITER_MSGHDR
+   if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter)))
+#else
+   if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len)))
+#endif
goto drop;

/* Update statistics */
@@ -348,7 +365,11 @@ kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
return 0;

KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
+#ifdef HAVE_IOV_ITER_MSGHDR
+  len, q->flags, (int)m->msg_iter.iov->iov_len);
+#else
   len, q->flags, (int)m->msg_iovlen);
+#endif

 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
if (likely(q->flags & IFF_VNET_HDR)) {
@@ -362,7 +383,7 @@ kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz))
return -EINVAL;

-   return kni_vhost_net_tx(q->kni, m->msg_iov, vnet_hdr_len, len);
+   return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len);
 }

 static int
@@ -391,7 +412,7 @@ kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock,
 #endif

if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
-   m->msg_iov, vnet_hdr_len, len
+   m, vnet_hdr_len, len
return 0;

 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
-- 
1.7.4.1



[dpdk-dev] [RFC PATCH V2] librte_pmd_ring: changes to support PCI Port Hotplug

2015-05-05 Thread Bernard Iremonger
This patch depends on the Port Hotplug Framework.
It implements the rte_dev_uninit_t() function for the ring pmd.

Changes in V2:

Fix crash in the rte_pmd_ring_devuninit() function.

Signed-off-by: Bernard Iremonger 
---
 lib/librte_pmd_ring/rte_eth_ring.c |   92 +++
 1 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/lib/librte_pmd_ring/rte_eth_ring.c 
b/lib/librte_pmd_ring/rte_eth_ring.c
index 6832f01..6d32e6b 100644
--- a/lib/librte_pmd_ring/rte_eth_ring.c
+++ b/lib/librte_pmd_ring/rte_eth_ring.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -252,6 +252,15 @@ static const struct eth_dev_ops ops = {
.mac_addr_add = eth_mac_addr_add,
 };

+static struct eth_driver rte_ring_pmd = {
+   .pci_drv = {
+   .name = "rte_ring_pmd",
+   .drv_flags = RTE_PCI_DRV_DETACHABLE,
+   },
+};
+
+static struct rte_pci_id id_table;
+
 int
 rte_eth_from_rings(const char *name, struct rte_ring *const rx_queues[],
const unsigned nb_rx_queues,
@@ -263,8 +272,6 @@ rte_eth_from_rings(const char *name, struct rte_ring *const 
rx_queues[],
struct rte_pci_device *pci_dev = NULL;
struct pmd_internals *internals = NULL;
struct rte_eth_dev *eth_dev = NULL;
-   struct eth_driver *eth_drv = NULL;
-   struct rte_pci_id *id_table = NULL;

unsigned i;

@@ -288,10 +295,6 @@ rte_eth_from_rings(const char *name, struct rte_ring 
*const rx_queues[],
if (pci_dev == NULL)
goto error;

-   id_table = rte_zmalloc_socket(name, sizeof(*id_table), 0, numa_node);
-   if (id_table == NULL)
-   goto error;
-
internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
if (internals == NULL)
goto error;
@@ -301,9 +304,6 @@ rte_eth_from_rings(const char *name, struct rte_ring *const 
rx_queues[],
if (eth_dev == NULL)
goto error;

-   eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, numa_node);
-   if (eth_drv == NULL)
-   goto error;

/* now put it all together
 * - store queue data in internals,
@@ -323,21 +323,22 @@ rte_eth_from_rings(const char *name, struct rte_ring 
*const rx_queues[],
internals->tx_ring_queues[i].rng = tx_queues[i];
}

-   eth_drv->pci_drv.name = ring_ethdev_driver_name;
-   eth_drv->pci_drv.id_table = id_table;
+   rte_ring_pmd.pci_drv.name = ring_ethdev_driver_name;
+   rte_ring_pmd.pci_drv.id_table = &id_table;

pci_dev->numa_node = numa_node;
-   pci_dev->driver = ð_drv->pci_drv;
+   pci_dev->driver = &rte_ring_pmd.pci_drv;

data->dev_private = internals;
data->port_id = eth_dev->data->port_id;
+   memmove(data->name, eth_dev->data->name, sizeof(data->name));
data->nb_rx_queues = (uint16_t)nb_rx_queues;
data->nb_tx_queues = (uint16_t)nb_tx_queues;
data->dev_link = pmd_link;
data->mac_addrs = &internals->address;

eth_dev->data = data;
-   eth_dev->driver = eth_drv;
+   eth_dev->driver = &rte_ring_pmd;
eth_dev->dev_ops = &ops;
eth_dev->pci_dev = pci_dev;
TAILQ_INIT(&(eth_dev->link_intr_cbs));
@@ -531,20 +532,34 @@ rte_pmd_ring_devinit(const char *name, const char *params)

RTE_LOG(INFO, PMD, "Initializing pmd_ring for %s\n", name);

-   if (params == NULL || params[0] == '\0')
-   eth_dev_ring_create(name, rte_socket_id(), DEV_CREATE);
+   if (params == NULL || params[0] == '\0') {
+   ret = eth_dev_ring_create(name, rte_socket_id(), DEV_CREATE);
+   if (ret == -1) {
+   RTE_LOG(INFO, PMD, "Attach to pmd_ring for %s\n", name);
+   ret = eth_dev_ring_create(name, rte_socket_id(),
+   DEV_ATTACH);
+   }
+   }
else {
kvlist = rte_kvargs_parse(params, valid_arguments);

if (!kvlist) {
RTE_LOG(INFO, PMD, "Ignoring unsupported parameters 
when creating"
" rings-backed ethernet device\n");
-   eth_dev_ring_create(name, rte_socket_id(), DEV_CREATE);
-   return 0;
+   ret = eth_dev_ring_create(name, rte_socket_id(),
+   DEV_CREATE);
+   if (ret == -1) {
+   RTE_LOG(INFO, PMD, "Attach to pmd_ring for 
%s\n",
+   name);
+   ret = eth_dev_ring_create(name, rte_socket_id(),
+

[dpdk-dev] [PATCH v3 0/6] update jhash function

2015-05-05 Thread Pablo de Lara
Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patchset updates the current jhash in DPDK,
including two new functions that generate two hashes from a single key.

It also separates the existing hash function performance tests to
another file, to make it quicker to run.

changes in v3:

- Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
  functions

changes in v2:

- Split single commit in three commits, one that updates the existing functions
  and another that adds two new functions and use one of those functions
  as a base to be called by the other ones.
- Remove some unnecessary ifdefs in the code.
- Add new macros to help on the reutilization of constants
- Separate hash function performance tests to another file
  and improve cycle measurements.
- Rename existing function rte_jhash2 to rte_jhash_32b
  (something more meaninful) and mark rte_jhash2 as
  deprecated

De Lara Guarch, Pablo (1):
  hash: rename rte_jhash2 to rte_jhash_32b

Pablo de Lara (5):
  test/hash: move hash function perf tests to separate file
  test/hash: improve accuracy on cycle measurements
  hash: update jhash function with the latest available
  hash: add two new functions to jhash library
  hash: remove duplicated code

 app/test/Makefile   |1 +
 app/test/test_func_reentrancy.c |2 +-
 app/test/test_hash.c|4 +-
 app/test/test_hash_func_perf.c  |  145 +
 app/test/test_hash_perf.c   |   71 +
 lib/librte_hash/rte_jhash.h |  336 +--
 6 files changed, 400 insertions(+), 159 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

-- 
1.7.4.1



[dpdk-dev] [PATCH v3 2/6] test/hash: improve accuracy on cycle measurements

2015-05-05 Thread Pablo de Lara
Cycles per hash calculation were measured per single operation.
It is much more accurate to run several iterations between measurements
and divide by number of iterations.

Signed-off-by: Pablo de Lara 
---
 app/test/test_hash_func_perf.c |   18 +-
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
index ba31c53..004c9be 100644
--- a/app/test/test_hash_func_perf.c
+++ b/app/test/test_hash_func_perf.c
@@ -82,21 +82,21 @@ static const char *get_hash_name(rte_hash_function f)
 static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
uint32_t key_len)
 {
-   static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-   uint64_t ticks = 0, start, end;
+   static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
+   uint64_t ticks, start, end;
unsigned i, j;

for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
for (j = 0; j < key_len; j++)
-   key[j] = (uint8_t) rte_rand();
-
-   start = rte_rdtsc();
-   f(key, key_len, init_val);
-   end = rte_rdtsc();
-   ticks += end - start;
+   key[i][j] = (uint8_t) rte_rand();
}

+   start = rte_rdtsc();
+   for (i = 0; i < HASHTEST_ITERATIONS; i++)
+   f(key[i], key_len, init_val);
+   end = rte_rdtsc();
+   ticks = end - start;
+
printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) 
key_len,
(unsigned) init_val, (double)ticks / 
HASHTEST_ITERATIONS);
 }
-- 
1.7.4.1



[dpdk-dev] [PATCH v3 5/6] hash: remove duplicated code

2015-05-05 Thread Pablo de Lara
rte_jhash is basically like _rte_jhash_2hashes but it returns only 1 hash, 
instead of 2.
In order to remove duplicated code, rte_jhash calls _rte_jhash_2hashes,
passing 0 as the second seed and returning just the first hash value.
(performance penalty is negligible)

The same is done with rte_jhash2. Also, rte_jhash2 is just an specific case
where keys are multiple of 32 bits, and where no key alignment check is 
required.
So,to avoid duplicated code, the function calls _rte_jhash_2hashes with 
check_align = 0
(to use the optimal path)

Signed-off-by: Pablo de Lara 
---
 lib/librte_hash/rte_jhash.h |  283 ++-
 1 files changed, 62 insertions(+), 221 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 9e82d06..6f05c4c 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -108,22 +108,8 @@ extern "C" {
 #define LOWER16b_MASK rte_le_to_cpu_32(0x)
 #define LOWER24b_MASK rte_le_to_cpu_32(0xff)

-/**
- * The most generic version, hashes an arbitrary sequence
- * of bytes.  No alignment or length assumptions are made about
- * the input key.
- *
- * @param key
- *   Key to calculate hash of.
- * @param length
- *   Length of key in bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash(const void *key, uint32_t length, uint32_t initval)
+static inline void
+__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t 
*pb, unsigned check_align)
 {
uint32_t a, b, c;
union {
@@ -132,12 +118,18 @@ rte_jhash(const void *key, uint32_t length, uint32_t 
initval)
} u;

/* Set up the internal state */
-   a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
+   a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+   c += *pb;

u.ptr = key;

-   /* Check key alignment. For x86 architecture, first case is always 
optimal */
-   if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 
0x3) == 0) {
+   /*
+* Check key alignment. For x86 architecture, first case is always 
optimal
+* If check_align is not set, first case will be used
+*/
+
+   if ((!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686")
+   || (!check_align) || (u.i & 0x3) == 0)) {
const uint32_t *k = (const uint32_t *)key;

while (length > 12) {
@@ -178,7 +170,9 @@ rte_jhash(const void *key, uint32_t length, uint32_t 
initval)
a += k[0] & LOWER8b_MASK; break;
/* zero length strings require no mixing */
case 0:
-   return c;
+   *pc = c;
+   *pb = b;
+   return;
};
} else {
const uint8_t *k = (const uint8_t *)key;
@@ -233,63 +227,16 @@ rte_jhash(const void *key, uint32_t length, uint32_t 
initval)
a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
break;
case 0:
-   return c;
+   *pc = c;
+   *pb = b;
+   return;
}
}

__rte_jhash_final(a, b, c);

-   return c;
-}
-
-/**
- * A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- *
- * @param k
- *   Key to calculate hash of.
- * @param length
- *   Length of key in units of 4 bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
-{
-   uint32_t a, b, c;
-
-   /* Set up the internal state */
-   a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + 
initval;
-
-   /* Handle most of the key */
-   while (length > 3) {
-   a += k[0];
-   b += k[1];
-   c += k[2];
-
-   __rte_jhash_mix(a, b, c);
-
-   k += 3;
-   length -= 3;
-   }
-
-   /* Handle the last 3 uint32_t's */
-   switch (length) {
-   case 3:
-   c += k[2];
-   case 2:
-   b += k[1];
-   case 1:
-   a += k[0];
-   __rte_jhash_final(a, b, c);
-   /* case 0: nothing left to add */
-   case 0:
-   break;
-   };
-
-   return c;
+   *pc = c;
+   *pb = b;
 }

 /**
@@ -310,127 +257,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t 
initval)
 static inline void
 rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-   uint32_t a, b, c;
-   union {
-   const void *ptr;
-   size_t i;
-   } u;
-
-   /* Set up the internal state */
-   a

[dpdk-dev] [PATCH v3 4/6] hash: add two new functions to jhash library

2015-05-05 Thread Pablo de Lara
With the jhash update, two new functions were introduced:

- rte_jhash_2hashes: Same as rte_jhash, but takes two seeds
 and return two hashes (uint32_ts)

- rte_jhash2_2hashes: Same as rte_jhash2, but takes two seeds
 and return two hashes (uint32_ts)

Signed-off-by: Pablo de Lara 
---
 lib/librte_hash/rte_jhash.h |  195 +++
 1 files changed, 195 insertions(+), 0 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 0e96b7c..9e82d06 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -292,6 +292,201 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t 
initval)
return c;
 }

+/**
+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+   uint32_t a, b, c;
+   union {
+   const void *ptr;
+   size_t i;
+   } u;
+
+   /* Set up the internal state */
+   a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+   c += *pb;
+
+   u.ptr = key;
+
+   /* Check key alignment. For x86 architecture, first case is always 
optimal */
+   if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 
0x3) == 0) {
+   const uint32_t *k = (const uint32_t *)key;
+
+   while (length > 12) {
+   a += k[0];
+   b += k[1];
+   c += k[2];
+
+   __rte_jhash_mix(a, b, c);
+
+   k += 3;
+   length -= 12;
+   }
+
+   switch (length) {
+   case 12:
+   c += k[2]; b += k[1]; a += k[0]; break;
+   case 11:
+   c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+   case 10:
+   c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+   case 9:
+   c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+   case 8:
+   b += k[1]; a += k[0]; break;
+   case 7:
+   b += k[1] & LOWER24b_MASK; a += k[0]; break;
+   case 6:
+   b += k[1] & LOWER16b_MASK; a += k[0]; break;
+   case 5:
+   b += k[1] & LOWER8b_MASK; a += k[0]; break;
+   case 4:
+   a += k[0]; break;
+   case 3:
+   a += k[0] & LOWER24b_MASK; break;
+   case 2:
+   a += k[0] & LOWER16b_MASK; break;
+   case 1:
+   a += k[0] & LOWER8b_MASK; break;
+   /* zero length strings require no mixing */
+   case 0:
+   *pc = c;
+   *pb = b;
+   return;
+   };
+   } else {
+   const uint8_t *k = (const uint8_t *)key;
+
+   /* all but the last block: affect some 32 bits of (a, b, c) */
+   while (length > 12) {
+   a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+   a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+   a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+   a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+   b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+   b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+   b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+   b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+   c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+   c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+   c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+   c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+
+   __rte_jhash_mix(a, b, c);
+
+   k += 12;
+   length -= 12;
+   }
+
+   /* last block: affect all 32 bits of (c) */
+   /* all the case statements fall through */
+   switch (length) {
+   case 12:
+   c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+   case 11:
+   c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+   case 10:

[dpdk-dev] [PATCH v3 1/6] test/hash: move hash function perf tests to separate file

2015-05-05 Thread Pablo de Lara
This patch moves hash function performance tests to a separate file,
so user can check performance of the existing hash functions quicker,
without having to run all the other hash operation performance tests,
which takes some time.

Signed-off-by: Pablo de Lara 
---
 app/test/Makefile  |1 +
 app/test/test_hash_func_perf.c |  145 
 app/test/test_hash_perf.c  |   71 +---
 3 files changed, 147 insertions(+), 70 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 4aca77c..77a9c42 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -83,6 +83,7 @@ SRCS-y += test_memcpy_perf.c

 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_func_perf.c

 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm6.c
diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
new file mode 100644
index 000..ba31c53
--- /dev/null
+++ b/app/test/test_hash_func_perf.c
@@ -0,0 +1,145 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "test.h"
+
+#include 
+#include 
+#include 
+
+/***
+ * Hash function performance test configuration section. Each performance test
+ * will be performed HASHTEST_ITERATIONS times.
+ *
+ * The three arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define HASHTEST_ITERATIONS 100
+
+static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
+static uint32_t hashtest_initvals[] = {0};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 
31, 32, 33, 63, 64};
+/**/
+
+/*
+ * To help print out name of hash functions.
+ */
+static const char *get_hash_name(rte_hash_function f)
+{
+   if (f == rte_jhash)
+   return "jhash";
+
+   if (f == rte_hash_crc)
+   return "rte_hash_crc";
+
+   return "UnknownHash";
+}
+
+/*
+ * Test a hash function.
+ */
+static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
+   uint32_t key_len)
+{
+   static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+   uint64_t ticks = 0, start, end;
+   unsigned i, j;
+
+   for (i = 0; i < HASHTEST_ITERATIONS; i++) {
+
+   for (j = 0; j < key_len; j++)
+   key[j] = (uint8_t) rte_rand();
+
+   start = rte_rdtsc();
+   f(key, key_len, init_val);
+   end = rte_rdtsc();
+   ticks += end - start;
+   }
+
+   printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) 
key_len,
+   (unsigned) init_val, (double)ticks / 
HASHTEST_ITERATIONS);
+}
+
+/*
+ * Test all hash functions.
+ */
+static void run_hash_func_tests(void)
+{
+   unsigned i, j, k;
+
+   printf(" *** Hash function performance test results ***\n");
+   printf(" Number of iterations for each test

[dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available

2015-05-05 Thread Pablo de Lara
Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patch integrates that code into the rte_jhash library.

Signed-off-by: Pablo de Lara 
---
 lib/librte_hash/rte_jhash.h |  261 +++
 1 files changed, 188 insertions(+), 73 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..0e96b7c 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,68 @@ extern "C" {
 #endif

 #include 
+#include 
+#include 

 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins at burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins at burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */

+#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-   a -= b; a -= c; a ^= (c>>13); \
-   b -= c; b -= a; b ^= (a<<8); \
-   c -= a; c -= b; c ^= (b>>13); \
-   a -= b; a -= c; a ^= (c>>12); \
-   b -= c; b -= a; b ^= (a<<16); \
-   c -= a; c -= b; c ^= (b>>5); \
-   a -= b; a -= c; a ^= (c>>3); \
-   b -= c; b -= a; b ^= (a<<10); \
-   c -= a; c -= b; c ^= (b>>15); \
+   a -= c; a ^= rot(c, 4); c += b; \
+   b -= a; b ^= rot(a, 6); a += c; \
+   c -= b; c ^= rot(b, 8); b += a; \
+   a -= c; a ^= rot(c, 16); c += b; \
+   b -= a; b ^= rot(a, 19); a += c; \
+   c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+   c ^= b; c -= rot(b, 14); \
+   a ^= c; a -= rot(c, 11); \
+   b ^= a; b -= rot(a, 25); \
+   c ^= b; c -= rot(b, 16); \
+   a ^= c; a -= rot(c, 4);  \
+   b ^= a; b -= rot(a, 14); \
+   c ^= b; c -= rot(b, 24); \
 } while (0)

 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO  0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO  0xdeadbeef
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define RTE_JHASH_BYTE0_SHIFT 0
+#define RTE_JHASH_BYTE1_SHIFT 8
+#define RTE_JHASH_BYTE2_SHIFT 16
+#define RTE_JHASH_BYTE3_SHIFT 24
+#else
+#define RTE_JHASH_BYTE0_SHIFT 24
+#define RTE_JHASH_BYTE1_SHIFT 16
+#define RTE_JHASH_BYTE2_SHIFT 8
+#define RTE_JHASH_BYTE3_SHIFT 0
+#endif
+
+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
+#define LOWER16b_MASK rte_le_to_cpu_32(0x)
+#define LOWER24b_MASK rte_le_to_cpu_32(0xff)

 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +125,119 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-   uint32_t a, b, c, len;
-   const uint8_t *k = (const uint8_t *)key;
-   const uint32_t *k32 = (const uint32_t *)key;
+   uint32_t a, b, c;
+   union {
+   const void *ptr;
+   size_t i;
+   } u;

-   len = length;
-   a = b = RTE_JHASH_GOLDEN_RATIO;
-   c = initval;
+   /* Set up the internal state */
+   a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;

-   while (len >= 12) {
-   a += k32[0];
-   b += k32[1];
-   c += k32[2];
+   u.ptr = key;

-   __rte_jhash_mix(a,b,c);
+   /* Check key alignment. For x86 architecture, first case is always 
optimal */
+   if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 
0x3) == 0) {
+   const uint32_t *k = (const uint32_t *)key;

-   k += (3 * sizeof(uint32_t)), k32 += 3;
-   len -= (3 * sizeof(uint32_t));
-   }
+   while (length > 12) {
+   a += k[0];
+   b += k[1];
+   c += k[2];

-   c += length;
-   switch (len) {

[dpdk-dev] [PATCH v3 6/6] hash: rename rte_jhash2 to rte_jhash_32b

2015-05-05 Thread Pablo de Lara
From: De Lara Guarch, Pablo 

Changed name to something more meaningful,
and mark rte_jhash2 as deprecated.

Signed-off-by: Pablo de Lara 
---
 app/test/test_func_reentrancy.c |2 +-
 app/test/test_hash.c|4 ++--
 lib/librte_hash/rte_jhash.h |   17 +++--
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index dc070af..85504c0 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -228,7 +228,7 @@ hash_create_free(__attribute__((unused)) void *arg)
.entries = 16,
.bucket_entries = 4,
.key_len = 4,
-   .hash_func = (rte_hash_function)rte_jhash2,
+   .hash_func = (rte_hash_function)rte_jhash_32b,
.hash_func_init_val = 0,
.socket_id = 0,
};
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..4ecb11b 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -1177,7 +1177,7 @@ test_hash_add_delete_jhash2(void)

hash_params_ex.name = "hash_test_jhash2";
hash_params_ex.key_len = 4;
-   hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+   hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;

handle = rte_hash_create(&hash_params_ex);
if (handle == NULL) {
@@ -1216,7 +1216,7 @@ test_hash_add_delete_2_jhash2(void)

hash_params_ex.name = "hash_test_2_jhash2";
hash_params_ex.key_len = 8;
-   hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+   hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;

handle = rte_hash_create(&hash_params_ex);
if (handle == NULL)
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 6f05c4c..3a6e3f2 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -46,6 +46,8 @@ extern "C" {

 #include 
 #include 
+
+#include 
 #include 

 /* jhash.h: Jenkins hash support.
@@ -276,7 +278,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, 
uint32_t *pc, uint32_t *pb)
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void
-rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t 
*pb)
+rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, 
uint32_t *pb)
 {
__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
 }
@@ -319,11 +321,22 @@ rte_jhash(const void *key, uint32_t length, uint32_t 
initval)
  *   Calculated hash value.
  */
 static inline uint32_t
+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+   uint32_t initval2 = 0;
+
+   rte_jhash_32b_2hashes(k, length, &initval, &initval2);
+
+   return initval;
+}
+
+static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
uint32_t initval2 = 0;

-   rte_jhash2_2hashes(k, length, &initval, &initval2);
+   RTE_LOG(WARNING, HASH, "rte_jhash2 is deprecated\n");
+   rte_jhash_32b_2hashes(k, length, &initval, &initval2);

return initval;
 }
-- 
1.7.4.1



[dpdk-dev] Performance regression in DPDK 1.8/2.0

2015-05-05 Thread De Lara Guarch, Pablo
Hi Paul,

> -Original Message-
> From: Paul Emmerich [mailto:emmericp at net.in.tum.de]
> Sent: Tuesday, April 28, 2015 12:48 PM
> To: De Lara Guarch, Pablo
> Cc: Pavel Odintsov; dev at dpdk.org
> Subject: Re: [dpdk-dev] Performance regression in DPDK 1.8/2.0
> 
> Hi,
> 
> 
> De Lara Guarch, Pablo :
> > Could you tell me which changes you made here? I see you are using
> simple tx code path on 1.8.0,
> > but with the default values, you should be using vector tx,
> > unless you have changed anything in the tx configuration.
> 
> sorry, I might have written that down wrong or read the output wrong.
> I did not modify the l2fwd example.
> 
> 
> > So, just for clarification,
> > for l2fwd you used E3-1230 v2 (Ivy Bridge), at 1.6 GHz or 3.3 GHz?
> 
> At 1.6 GHz as it is simply too fast at 3.3 GHz ;)
> 
> 
> I'll probably write a minimal example that shows my
> problem with tx only sometime next week.
> I just used the l2fwd example to illustrate my point
> with a 'builtin' example.

Thanks for the clarification. I tested it on Ivy Bridge as well, and I could 
not reproduce the issue.
Make sure that you use vector rx/tx anyway, to get best performance 
(you should be seeing better performance, since l2fwd in 1.8/2.0 uses both 
vector rx/tx).

Thanks,
Pablo

> 
> Paul


[dpdk-dev] [PATCH v2 0/3] port: added ethdev_writer_nodrop and ring_writer_nodrop ports

2015-05-05 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Thursday, April 30, 2015 12:58 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 0/3] port: added ethdev_writer_nodrop and
> ring_writer_nodrop ports
> 
> From: Maciej Gajdzica 
> 
> When nodrop writer port fails to send data, it retries until reach maximum
> number of retries. Also added new tx_bulk implementation for ring writer
> port.
> 
> Maciej Gajdzica (3):
>   port: added WRITER_APPROACH == 1 implementation to ring port
>   port: added ethdev_writer_nodrop port
>   port: added ring_writer_nodrop port
> 
>  lib/librte_port/rte_port_ethdev.c |  230
> ++
>  lib/librte_port/rte_port_ethdev.h |   19 +++
>  lib/librte_port/rte_port_ring.c   |  285
> +
>  lib/librte_port/rte_port_ring.h   |   16 +++
>  4 files changed, 550 insertions(+)
> 
> --
> 1.7.9.5

Acked by: Cristian Dumitrescu 



[dpdk-dev] [PATCH v2 0/3] port: added frag_ipv6 and ras_ipv6 ports

2015-05-05 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Thursday, April 30, 2015 1:03 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 0/3] port: added frag_ipv6 and ras_ipv6 ports
> 
> From: Maciej Gajdzica 
> 
> Added ipv6 versions of ip fragmentation and ip reassembly ports.
> 
> Maciej Gajdzica (3):
>   port: removed IPV4_MTU_DEFAULT define
>   port: added ipv6 fragmentation port
>   port: added ipv6 reassembly port
> 
>  lib/librte_port/rte_port_frag.c |   67 --
>  lib/librte_port/rte_port_frag.h |9 ++-
>  lib/librte_port/rte_port_ras.c  |  142 
> ---
>  lib/librte_port/rte_port_ras.h  |9 ++-
>  4 files changed, 167 insertions(+), 60 deletions(-)
> 
> --
> 1.7.9.5

Acked by: Cristian Dumitrescu  


[dpdk-dev] [PATCH v2 00/13] port: added port statistics

2015-05-05 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Thursday, April 30, 2015 1:07 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 00/13] port: added port statistics
> 
> From: Maciej Gajdzica 
> 
> Added statistics for every type of port. By default all port statistics
> are disabled, user must activate them in config file.
> 
> Maciej Gajdzica (13):
>   port: added structures for port stats
>   port: added port_ethdev_reader stats
>   port: added port_ethdev_writer stats
>   port: added port_ethdev_writer_nodrop stats
>   port: added port_frag stats
>   port: added port_ras stats
>   port: added port_ring_reader stats
>   port: added port_ring_writer stats
>   port: added port_ring_writer_nodrop stats
>   port: added port_sched_reader stats
>   port: added port_sched_writer stats
>   port: added port_source stats
>   port: added port_sink stats
> 
>  config/common_bsdapp   |   12 
>  config/common_linuxapp |   12 
>  lib/librte_port/rte_port.h |   60 ++--
>  lib/librte_port/rte_port_ethdev.c  |  113
> +-
>  lib/librte_port/rte_port_frag.c|   36 ++
>  lib/librte_port/rte_port_ras.c |   38 ++
>  lib/librte_port/rte_port_ring.c|  118
> +++-
>  lib/librte_port/rte_port_sched.c   |   96 --
>  lib/librte_port/rte_port_source_sink.c |   98
> --
>  9 files changed, 566 insertions(+), 17 deletions(-)
> 
> --
> 1.7.9.5

Acked by: Cristian Dumitrescu  


[dpdk-dev] [PATCH v2 00/10] table: added table statistics

2015-05-05 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Thursday, April 30, 2015 1:14 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 00/10] table: added table statistics
> 
> From: Maciej Gajdzica 
> 
> Added statistics for every type of table. By default all table statistics
> are disabled, user must activate them in config file.
> 
> Maciej Gajdzica (10):
>   table: added structure for storing table stats
>   table: added acl table stats
>   table: added array table stats
>   table: added hash_ext table stats
>   table: added hash_key16 table stats
>   table: added hash_key32 table stats
>   table: added hash_key8 table stats
>   table: added hash_lru table stats
>   table: added lpm_ipv6 table stats
>   table: added lpm table stats
> 
>  config/common_bsdapp|9 ++
>  config/common_linuxapp  |9 ++
>  lib/librte_table/rte_table.h|   25 +++
>  lib/librte_table/rte_table_acl.c|   35 +
>  lib/librte_table/rte_table_array.c  |   34 +++-
>  lib/librte_table/rte_table_hash_ext.c   |   44
> ++
>  lib/librte_table/rte_table_hash_key16.c |   41
> 
>  lib/librte_table/rte_table_hash_key32.c |   41
> 
>  lib/librte_table/rte_table_hash_key8.c  |   52
> +++
>  lib/librte_table/rte_table_hash_lru.c   |   44
> ++
>  lib/librte_table/rte_table_lpm.c|   34 
>  lib/librte_table/rte_table_lpm_ipv6.c   |   34 
>  12 files changed, 401 insertions(+), 1 deletion(-)
> 
> --
> 1.7.9.5

Acked by: Cristian Dumitrescu 



[dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline ports and tables

2015-05-05 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Thursday, April 30, 2015 1:16 PM
> To: dev at dpdk.org
> Cc: Wodkowski, PawelX
> Subject: [dpdk-dev] [PATCH v2] pipeline: add statistics for librte_pipeline
> ports and tables
> 
> From: Pawel Wodkowski 
> 
> This patch adds statistics collection for librte_pipeline.
> Those statistics ale disabled by default during build time.
> 
> Signed-off-by: Pawel Wodkowski 
> ---
>  config/common_bsdapp   |1 +
>  config/common_linuxapp |1 +
>  lib/librte_pipeline/rte_pipeline.c |  185
> +---
>  lib/librte_pipeline/rte_pipeline.h |   99 +++
>  4 files changed, 275 insertions(+), 11 deletions(-)
> 
> diff --git a/config/common_bsdapp b/config/common_bsdapp
> index 1d0f5b2..e4f0bf5 100644
> --- a/config/common_bsdapp
> +++ b/config/common_bsdapp
> @@ -414,6 +414,7 @@ CONFIG_RTE_TABLE_LPM_STATS_COLLECT=n
>  # Compile librte_pipeline
>  #
>  CONFIG_RTE_LIBRTE_PIPELINE=y
> +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> 
>  #
>  # Compile librte_kni
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 8b01ca9..05553d9 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -421,6 +421,7 @@ CONFIG_RTE_TABLE_LPM_STATS_COLLECT=n
>  # Compile librte_pipeline
>  #
>  CONFIG_RTE_LIBRTE_PIPELINE=y
> +CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> 
>  #
>  # Compile librte_kni
> diff --git a/lib/librte_pipeline/rte_pipeline.c
> b/lib/librte_pipeline/rte_pipeline.c
> index 36d92c9..69bf003 100644
> --- a/lib/librte_pipeline/rte_pipeline.c
> +++ b/lib/librte_pipeline/rte_pipeline.c
> @@ -48,6 +48,17 @@
> 
>  #define RTE_TABLE_INVALID UINT32_MAX
> 
> +#ifdef RTE_PIPELINE_STATS_COLLECT
> +#define RTE_PIPELINE_STATS_ADD(counter, val) \
> + ({ (counter) += (val); })
> +
> +#define RTE_PIPELINE_STATS_ADD_M(counter, mask) \
> + ({ (counter) += __builtin_popcountll(mask); })
> +#else
> +#define RTE_PIPELINE_STATS_ADD(counter, val)
> +#define RTE_PIPELINE_STATS_ADD_M(counter, mask)
> +#endif
> +
>  struct rte_port_in {
>   /* Input parameters */
>   struct rte_port_in_ops ops;
> @@ -63,6 +74,8 @@ struct rte_port_in {
> 
>   /* List of enabled ports */
>   struct rte_port_in *next;
> +
> + uint64_t n_pkts_dropped_by_ah;
>  };
> 
>  struct rte_port_out {
> @@ -74,6 +87,8 @@ struct rte_port_out {
> 
>   /* Handle to low-level port */
>   void *h_port;
> +
> + uint64_t n_pkts_dropped_by_ah;
>  };
> 
>  struct rte_table {
> @@ -90,6 +105,12 @@ struct rte_table {
> 
>   /* Handle to the low-level table object */
>   void *h_table;
> +
> + /* Stats for this table. */
> + uint64_t n_pkts_dropped_by_lkp_hit_ah;
> + uint64_t n_pkts_dropped_by_lkp_miss_ah;
> + uint64_t n_pkts_dropped_lkp_hit;
> + uint64_t n_pkts_dropped_lkp_miss;
>  };
> 
>  #define RTE_PIPELINE_MAX_NAME_SZ   124
> @@ -1040,6 +1061,8 @@ rte_pipeline_action_handler_port_bulk(struct
> rte_pipeline *p,
> 
>   port_out->f_action_bulk(p->pkts, &pkts_mask, port_out-
> >arg_ah);
>   p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
> pkts_mask ^  mask;
> + RTE_PIPELINE_STATS_ADD_M(port_out-
> >n_pkts_dropped_by_ah,
> + pkts_mask ^  mask);
>   }
> 
>   /* Output port TX */
> @@ -1071,6 +1094,9 @@ rte_pipeline_action_handler_port(struct
> rte_pipeline *p, uint64_t pkts_mask)
>   p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |=
>   (pkt_mask ^ 1LLU) << i;
> 
> + RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah,
> + pkt_mask ^ 1LLU);
> +
>   /* Output port TX */
>   if (pkt_mask != 0)
>   port_out->ops.f_tx(port_out-
> >h_port,
> @@ -1104,6 +1130,9 @@ rte_pipeline_action_handler_port(struct
> rte_pipeline *p, uint64_t pkts_mask)
>   p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |=
>   (pkt_mask ^ 1LLU) << i;
> 
> + RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah,
> + pkt_mask ^ 1LLU);
> +
>   /* Output port TX */
>   if (pkt_mask != 0)
>   port_out->ops.f_tx(port_out-
> >h_port,
> @@ -1140,6 +1169,9 @@ rte_pipeline_action_handler_port_meta(struct
> rte_pipeline *p,
>   p-
> >action_mask0[RTE_PIPELINE_ACTION_DROP] |=
>   (pkt_mask ^ 1LLU) << i;
> 
> + RTE_PIPELINE_STATS_ADD(port_out-
> >n_pkts_dropped_by_ah,
> +   

[dpdk-dev] [PATCH] librte_eal:Using compiler memory barrier for IA processor's rte_wmb/rte_rmb.

2015-05-05 Thread WangDong
The current implementation of rte_wmb/rte_rmb for x86 is using processor memory 
barrier. It's unnessary for IA processor, compiler memory barrier is enough. 
But if dpdk runing on a AMD processor, maybe we should use processor memory 
barrier.
I add a macro to distinguish them, if we compile DPDK for IA processor, add the 
macro (RTE_ARCH_X86_IA) can improve performance with compiler memory barrier. 
Or we can add RTE_ARCH_X86_AMD for using processor memory barrier, in this 
case, if didn't add the macro, the memory ordering will not be guaranteed. 
Which macro is better?
If this patch applied, the PMD's old implementation of compiler memory barrier 
(some volatile variable) can be fixed with rte_rmb() and rte_wmb() for any 
architecture.

---
 lib/librte_eal/common/include/arch/x86/rte_atomic.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h 
b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
index e93e8ee..52b1e81 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -49,10 +49,20 @@ extern "C" {

 #definerte_mb() _mm_mfence()

+#ifdef RTE_ARCH_X86_IA
+
+#define rte_wmb() rte_compiler_barrier()
+
+#define rte_rmb() rte_compiler_barrier()
+
+#else
+
 #definerte_wmb() _mm_sfence()

 #definerte_rmb() _mm_lfence()

+#endif
+
 /*- 16 bit atomic operations 
-*/

 #ifndef RTE_FORCE_INTRINSICS
-- 
1.9.1



[dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for QinQ support

2015-05-05 Thread Chilikin, Andrey
Hi Helin,

I would agree with Konstantin about new naming for VLAN tags. I think we can 
leave existing name for t vlan_tci and just name new VLAN tag differently. I 
was thinking in the line of "vlan_tci_outer" or "stag_tci". So vlan_tci will 
store single VLAN in case if only one L2 tag is present or will store inner 
VLAN in case of two tags. "vlan_tci_outer" will store outer VLAN when two L2 
tags are present. "stag_tci" name also looks like a good candidate as in most 
cases if two tags are presented then outer VLAN is addressed as S-Tag even if 
it is simple tag stacking.

Regards,
Andrey

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ananyev,
> Konstantin
> Sent: Tuesday, May 5, 2015 12:05 PM
> To: Zhang, Helin; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for
> QinQ support
> 
> Hi Helin,
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Helin Zhang
> > Sent: Tuesday, May 05, 2015 3:32 AM
> > To: dev at dpdk.org
> > Subject: [dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for
> > QinQ support
> >
> > To support QinQ, 'vlan_tci' should be replaced by 'vlan_tci0' and
> > 'vlan_tci1'. Also new offload flags of 'PKT_RX_QINQ_PKT' and
> > 'PKT_TX_QINQ_PKT' should be added.
> >
> > Signed-off-by: Helin Zhang 
> > ---
> >  app/test-pmd/flowgen.c|  2 +-
> >  app/test-pmd/macfwd.c |  2 +-
> >  app/test-pmd/macswap.c|  2 +-
> >  app/test-pmd/rxonly.c |  2 +-
> >  app/test-pmd/txonly.c |  2 +-
> >  app/test/packet_burst_generator.c |  4 ++--
> >  lib/librte_ether/rte_ether.h  |  4 ++--
> >  lib/librte_mbuf/rte_mbuf.h| 22 +++---
> >  lib/librte_pmd_e1000/em_rxtx.c|  8 
> >  lib/librte_pmd_e1000/igb_rxtx.c   |  8 
> >  lib/librte_pmd_enic/enic_ethdev.c |  2 +-
> >  lib/librte_pmd_enic/enic_main.c   |  2 +-
> >  lib/librte_pmd_fm10k/fm10k_rxtx.c |  2 +-
> >  lib/librte_pmd_i40e/i40e_rxtx.c   |  8 
> >  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +--
> >  lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c |  6 +++---
> >  16 files changed, 51 insertions(+), 36 deletions(-)
> >
> > diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index
> > 72016c9..f24b00c 100644
> > --- a/app/test-pmd/flowgen.c
> > +++ b/app/test-pmd/flowgen.c
> > @@ -207,7 +207,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
> > pkt->nb_segs= 1;
> > pkt->pkt_len= pkt_size;
> > pkt->ol_flags   = ol_flags;
> > -   pkt->vlan_tci   = vlan_tci;
> > +   pkt->vlan_tci0  = vlan_tci;
> > pkt->l2_len = sizeof(struct ether_hdr);
> > pkt->l3_len = sizeof(struct ipv4_hdr);
> > pkts_burst[nb_pkt]  = pkt;
> > diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c index
> > 035e5eb..590b613 100644
> > --- a/app/test-pmd/macfwd.c
> > +++ b/app/test-pmd/macfwd.c
> > @@ -120,7 +120,7 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
> > mb->ol_flags = ol_flags;
> > mb->l2_len = sizeof(struct ether_hdr);
> > mb->l3_len = sizeof(struct ipv4_hdr);
> > -   mb->vlan_tci = txp->tx_vlan_id;
> > +   mb->vlan_tci0 = txp->tx_vlan_id;
> > }
> > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> nb_rx);
> > fs->tx_packets += nb_tx;
> > diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index
> > 6729849..c355399 100644
> > --- a/app/test-pmd/macswap.c
> > +++ b/app/test-pmd/macswap.c
> > @@ -122,7 +122,7 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
> > mb->ol_flags = ol_flags;
> > mb->l2_len = sizeof(struct ether_hdr);
> > mb->l3_len = sizeof(struct ipv4_hdr);
> > -   mb->vlan_tci = txp->tx_vlan_id;
> > +   mb->vlan_tci0 = txp->tx_vlan_id;
> > }
> > nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> nb_rx);
> > fs->tx_packets += nb_tx;
> > diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c index
> > ac56090..aa2cf7f 100644
> > --- a/app/test-pmd/rxonly.c
> > +++ b/app/test-pmd/rxonly.c
> > @@ -159,7 +159,7 @@ pkt_burst_receive(struct fwd_stream *fs)
> >mb->hash.fdir.hash, mb->hash.fdir.id);
> > }
> > if (ol_flags & PKT_RX_VLAN_PKT)
> > -   printf(" - VLAN tci=0x%x", mb->vlan_tci);
> > +   printf(" - VLAN tci=0x%x", mb->vlan_tci0);
> > if (is_encapsulation) {
> > struct ipv4_hdr *ipv4_hdr;
> > struct ipv6_hdr *ipv6_hdr;
> > diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> > ca32c85..4a2827f 100644
> > --- a/app/test-pmd/txonly.c
> > +++ b/app/test-pmd/txonly.c
> > @@ -266,7 +266,

[dpdk-dev] [PATCH 0/6] rte_sched: patches against 2.o

2015-05-05 Thread Dumitrescu, Cristian


> -Original Message-
> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Wednesday, April 29, 2015 6:05 PM
> To: Dumitrescu, Cristian
> Cc: dev at dpdk.org; Stephen Hemminger
> Subject: [PATCH 0/6] rte_sched: patches against 2.o
> 
> This is a subset of earlier patches for QoS (rte_sched) subsystem.
> Only changes were to fix whitespace and update against DPDK 2.0
> 
> Stephen Hemminger (6):
>   rte_sched: make RED optional at runtime
>   rte_sched: expand scheduler hierarchy for more VLAN's
>   rte_sched: keep track of RED drops
>   rte_sched: allow reading without clearing
>   rte_sched: don't put tabs in log messages
>   rte_sched: use correct log level
> 
>  app/test/test_sched.c|   4 +-
>  examples/qos_sched/stats.c   |  16 --
>  lib/librte_mbuf/rte_mbuf.h   |   5 +-
>  lib/librte_sched/rte_sched.c | 113 --
> -
>  lib/librte_sched/rte_sched.h |  62 +++-
>  5 files changed, 130 insertions(+), 70 deletions(-)
> 
> --
> 2.1.4

Acked by: Cristian Dumitrescu 



[dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv pkts.

2015-05-05 Thread Dong Wang
Hi, Konstantin,

>
>
>> -Original Message-
>> From: outlook_739db8e1c4bc6fae at outlook.com 
>> [mailto:outlook_739db8e1c4bc6fae at outlook.com] On Behalf Of Wang Dong
>> Sent: Thursday, April 16, 2015 12:36 PM
>> To: Ananyev, Konstantin; dev at dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv pkts.
>>
>>>
>>>
 -Original Message-
 From: outlook_739db8e1c4bc6fae at outlook.com 
 [mailto:outlook_739db8e1c4bc6fae at outlook.com] On Behalf Of Dong.Wang
 Sent: Wednesday, April 15, 2015 2:46 PM
 To: Ananyev, Konstantin; dev at dpdk.org
 Subject: Re: [dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv 
 pkts.



> Hi,
>
>> -Original Message-
>> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of WangDong
>> Sent: Saturday, April 11, 2015 4:34 PM
>> To: dev at dpdk.org
>> Subject: [dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv pkts.
>>
>> Like transmit packets, before update receive descriptor's tail pointer, 
>> rte_wmb() should be added after writing recv descriptor.
>>
>> Signed-off-by: Dong Wang 
>> ---
>> lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 5 +
>> 1 file changed, 5 insertions(+)
>>
>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>> index 9da2c7e..d504688 100644
>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>> @@ -1338,6 +1338,9 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf 
>> **rx_pkts,
>>   */
>>  rx_pkts[nb_rx++] = rxm;
>>  }
>> +
>> +rte_wmb();
>> +
>
> Why do you think it is necessary?
> I can't see any good reason to put wmb() here.
> I would understand if, at least you'll try to insert it just before 
> updating RDT:
> rx_id = (uint16_t) ((rx_id == 0) ?
> (rxq->nb_rx_desc - 1) : (rx_id - 
> 1));
> + rte_wmb();
> IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
>
> That is not needed IA with current implementation, but would make sense 
> for machines with relaxed memory ordering.
> Though right now DPDK IXGBE PMD is supported only on IA,  anyway.
> Same for ixgbe_recv_scattered_pkts().
>
> Konstantin

 Yes, current implementation works well with IA, and the transmit packets
 function's rte_wmb() is also unneccessary.

 But there are two reasons for adding rte_wmb() in recv pkts function:
 1) The memory barrier in recv pkts function and xmit pkts function are
 inconsistent, rte_wmb() should be added to recv pkts function or be
 removed from xmit pkts function.
 2) DPDK will support PowerPC processor (Other developers are working on
 it), I check the memory ordering of PowerPC, there was no mention of
 store-store instruction's principle in MPC8544 Reference Manual, only
 said it is weak memory ordering.

 So, I think it is neccessary to add rte_wmb() to recv pkts function.

 Dong
>>>
>>> What I was trying to say:
>>>
>>> 1. I think you put barrier in a wrong place.
>>> Even for machines with weak memory ordering, we need a barrier only when we 
>>> are goint to update RDT, i.e:
>>> if (nb_hold > rxq->rx_free_thresh) { ... ; barrier; 
>>> IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, ...); }
>> Yes, I put it in a wrong place, it will reduce performance. It's better
>> to place it in that you suggested.
>>>
>>> 2. Even with putting wmb() here, you wouldn't fix  ixgbe_recv_pkts() to 
>>> work on machines with weak memory ordering.
>>> I think that to make it work properly, you'll need an rmb() bewtween 
>>> reading DD bit and rest of RXD:
>>>
>>> rxdp = &rx_ring[rx_id];
>>>staterr = rxdp->wb.upper.status_error;
>>> + rte_rmb();
>>>if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
>>>   break;
>>>rxd = *rxdp;
>> Yes, it seems wmb is not enough for weak memory ordering processor. Both
>> rmb and wmb are needed.
>>>
>>> 3. As Stephen pointed in his mail, we shouldn't penalise IA implementation 
>>> with unnecessary barriers
>>> As was discussed at that thread:  
>>> http://dpdk.org/ml/archives/dev/2015-March/015202.html
>>> probably the best is to introduce a new macros: rte_smp_*mb (or something) 
>>> that would be architecture dependent:
>>> compiler_barrier on IA, proper HW barrier on machines with weak memory 
>>> ordering and update the code to use it.
I was trying to add a new macro, but I found it didn't need a new memory 
barrier macro, may be a macro that can distinguish the memory barrier 
(rte_wmb/rte_rmb) of IA and AMD is useful. Other architecture still use 
the rte_wmb() and rte_rmb().

I send a patch about it, please take a look at it..

Dong

>>>
>>> So, if you like to fix that issue, please do that in  a

[dpdk-dev] [PATCH v2] Add toeplitz hash algorithm used by RSS

2015-05-05 Thread Chilikin, Andrey
Hi Vladimir,

Why limit Toeplitz hash calculation to predefined tuples and length? Should it 
be more general, something like
rte_softrss_be(void *input, uint32_t input_len, const uint8_t *rss_key) to 
enable hash calculation for an input of any size? It would be useful for 
distributing packets using some non-standard tuples, like hashing on QinQ or 
adding IP protocol to hash calculation to separate UDP and TCP flows or even 
some other fields from a packet, for example, tunnel ID from VXLAN headers. By 
the way, i40e already supports RSS for SCTP in addition to TCP and UDP and 
includes Verification Tag as well as SCTP source and destination ports for RSS 
hash.

Regards,
Andrey

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Vladimir
> Medvedkin
> Sent: Tuesday, May 5, 2015 2:20 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2] Add toeplitz hash algorithm used by RSS
> 
> Software implementation of the Toeplitz hash function used by RSS.
> Can be used either for packet distribution on single queue NIC or for
> simulating of RSS computation on specific NIC (for example after GRE header
> decapsulating).
> 
> v2 changes
> - Add ipv6 support
> - Various style fixes
> 
> Signed-off-by: Vladimir Medvedkin 
> ---
>  lib/librte_hash/Makefile|   1 +
>  lib/librte_hash/rte_thash.h | 209
> 
>  2 files changed, 210 insertions(+)
>  create mode 100644 lib/librte_hash/rte_thash.h
> 
> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index
> 3696cb1..981230b 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -49,6 +49,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h  SYMLINK-
> $(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h  SYMLINK-
> $(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> 
>  # this lib needs eal
> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h new 
> file
> mode 100644 index 000..42c7bf6
> --- /dev/null
> +++ b/lib/librte_hash/rte_thash.h
> @@ -0,0 +1,209 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + *   notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + *   notice, this list of conditions and the following disclaimer in
> + *   the documentation and/or other materials provided with the
> + *   distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + *   contributors may be used to endorse or promote products derived
> + *   from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#ifndef _RTE_THASH_H
> +#define _RTE_THASH_H
> +
> +/**
> + * @file
> + *
> + * toeplitz hash functions.
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * Software implementation of the Toeplitz hash function used by RSS.
> + * Can be used either for packet distribution on single queue NIC
> + * or for simulating of RSS computation on specific NIC (for example
> + * after GRE header decapsulating)
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +#ifdef __SSE3__
> +static const __m128i bswap_mask = {0x0405060700010203,
> +0x0C0D0E0F08090A0B}; #endif
> +
> +enum rte_thash_len {
> + RTE_THASH_V4_L3 = 2,/*calculate hash of ipv4 header
> only*/
> + RTE_THASH_V4_L4 = 3,/*calculate hash of ipv4 + transport
> headers*/
> + RTE_THASH_V6_L3 = 8,/*calculate hash of ipv6 header only
> */
> + RTE_THASH_V6_L4 = 9,/*calculate hash of ipv6 + transport
> headers */

[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Wiles, Keith


Sent from my iPhone

> On May 5, 2015, at 6:56 AM, Neil Horman  wrote:
> 
>> On Mon, May 04, 2015 at 10:25:00PM -0500, Jim Thompson wrote:
>> 
>>> On May 4, 2015, at 10:12 PM, Wiles, Keith  wrote:
>>> 
>>> 
>>> 
 On 5/4/15, 10:48 AM, "Matthew Hall"  wrote:
 
> On Mon, May 04, 2015 at 12:43:48PM +, Qiu, Michael wrote:
> What mail client do you use? I think  mail client supporting thread mode
> is important for patch review.
 
 Like many UNIX people, I use mutt.
 
 My concern is that, if we're making the widespread adoption, usage, and
 contributions for DPDK dependent on selection or debate of the features
 of 
 various MUAs, I'm not sure that we're looking at this from the right
 angle.
 
 I'm just trying to figure out how to get DPDK in the place where the most
 eyeballs are, rather than trying to drag the eyeballs to the place where
 DPDK 
 is.
>>> 
>>> +1, I agree with this statement completely and I feel discussions about an
>>> MUA is non-productive and out of scope.
>> 
>> +1.  I?ve avoided the whole discussion, because ? ok, ?non-productive and 
>> out of scope? is a polite way of saying it.
>> 
>> jim
> 
> Very well, since you seem to want to avoid talking about ways to get what you
> want in a workflow, lets go back to where the conversation started:
> 
> http://dpdk.org/ml/archives/dev/2015-May/017225.html
> 
> We got into this debate because you wanted to move the project to github, and 
> as
> supporting reasons, listed a plethora of features that you liked about the 
> site.
> This entire subtread has been meant to illustrate how you can have the 
> features
> you want that you see as adventageous in the github environment without 
> actualy
> moving to github.  We've focused on email quote collapsing because we kept
> responding to one another, though I'm sure we could have the same debate on 
> any
> one of the workflow features github offers.
> 
> Can we all agree then, that for the list posted in your email above, any 
> github
> environmental feature can be recreated with proper tooling, available today,
> without forcing the github environment on everybody?  Further, can we agree
> that, given that those features are not unique to github, they are not
> compelling reasons to move the project there?

Neil (I had to type this on my phone so please forgive any typos or other 
statements that may sound odd. I am not trying to be rude in anyway)

I feel you are taking everything out of context here. The email client being 
able collapse threads is not the point here and I have tried to redirect you 
politely to the points moving DPDK to github. 

As I and others have pointed out GitHub offers a huge number eyes for DPDK 
community. GitHub offers a different set of processes and tools, which we do 
not have to create. Moving to GitHub is a change for the community and I feel a 
good change for the better. 

For your statements above I say NO we do not agree as much as your arguments 
around a single feature of an email client is not a compelling reason to accept 
your statements. 

Github gives us the DPDK community a better and more widely accepted place to 
allow DPDK to grow and become the open source project we all want IMO. 

I want to be polite here and we are not going to agree with keeping DPDK as it 
is today. We need to grow and change is the only way, I believe moving to 
GitHub gives the best support and eyeballs on DPDK to grow. 

The tools supported on GitHub are different and yes you may need to change. The 
day to day development will remain the same and as we know that is the bulk of 
the work. The pushing of patches will change, which should be easier for move 
people to understand plus use. 

We could spend a lot of time and money to update the current system, but why 
when we could start the move to GitHub today and use those tools for free. 

I do not want this to become a flame war or something like it. I want us to try 
and figure out how we can improve the DPDK community. I can see keeping DPDK 
the way it is today, but this will stagnate DPDK IMHO and no one wants this to 
happen. 

I do not want to split the DPDK community or try alienating any one. 

Please take a breath and relax as we all want the best for DPDK. 

Regards,
++Keith

> 
> Neil
> 


[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread John W. Linville
On Tue, May 05, 2015 at 04:43:08PM +, Wiles, Keith wrote:

> Please take a breath and relax as we all want the best for DPDK. 

I cannot believe how rude, asinine, and condascending your attitude
in this thread has been.  If this is the future of the DPDK community,
I'm surprised that anyone would want to be part of it.  Neil disagreed
with some of your assertions, and now you are trying to make him
seem like some sort of foolish twit that can't see beyond his own
preferred environment-- pot, kettle, black.

As for the 'millions of eyeballs' at GitHub...just how many of those
Go, Ruby, Python, and Swift developers are going to be contributing to
DPDK and all those future NFV projects?  And how many significant,
existing DPDK contributors (like Neil) are you isolating in the
process?  Do you even care?

Old-school IBM had the right motto -- THINK.

John
-- 
John W. LinvilleSomeday the world will need a hero, and you
linville at tuxdriver.com   might be all we have.  Be ready.


[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Wiles, Keith

Sent from my iPhone

> On May 5, 2015, at 10:58 AM, John W. Linville  
> wrote:
> 
>> On Tue, May 05, 2015 at 04:43:08PM +, Wiles, Keith wrote:
>> 
>> Please take a breath and relax as we all want the best for DPDK

Hi John

I am sorry you see it this way as I stated in the email parts you snipped off, 
I was not trying do exactly what you accuse me of doing!

Thank you for comments which are non-productive in the bigger scope of this 
topic. 

Not to sound condescending again, can you keep the rude comments and attitude 
out of the discussion. 

Thank you
Keith

> I cannot believe how rude, asinine, and condascending your attitude
> in this thread has been.  If this is the future of the DPDK community,
> I'm surprised that anyone would want to be part of it.  Neil disagreed
> with some of your assertions, and now you are trying to make him
> seem like some sort of foolish twit that can't see beyond his own
> preferred environment-- pot, kettle, black.
> 
> As for the 'millions of eyeballs' at GitHub...just how many of those
> Go, Ruby, Python, and Swift developers are going to be contributing to
> DPDK and all those future NFV projects?  And how many significant,
> existing DPDK contributors (like Neil) are you isolating in the
> process?  Do you even care?
> 
> Old-school IBM had the right motto -- THINK.
> 
> John
> -- 
> John W. LinvilleSomeday the world will need a hero, and you
> linville at tuxdriver.commight be all we have.  Be ready.


[dpdk-dev] [PATCH v7 06/10] eal/linux: add interrupt vectors handling on VFIO

2015-05-05 Thread Stephen Hemminger
On Tue,  5 May 2015 13:39:42 +0800
Cunming Liang  wrote:

> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index aea1fb1..387f54c 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -308,6 +308,18 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, 
> int vfio_dev_fd)
>   case VFIO_PCI_MSIX_IRQ_INDEX:
>   internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
>   dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
> + for (i = 0; i < RTE_MAX_RXTX_INTR_VEC_ID; i++) {
> + fd = eventfd(0, 0);
> + if (fd < 0) {
> +

You should pass EFD_NONBLOCK and EFD_CLOEXEC as flags to any eventfd's created
internally.


[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread John W. Linville
On Tue, May 05, 2015 at 06:30:48PM +, Wiles, Keith wrote:
> 
> Sent from my iPhone
> 
> > On May 5, 2015, at 10:58 AM, John W. Linville  
> > wrote:
> > 
> >> On Tue, May 05, 2015 at 04:43:08PM +, Wiles, Keith wrote:
> >> 
> >> Please take a breath and relax as we all want the best for DPDK
> 
> Hi John
> 
> I am sorry you see it this way as I stated in the email parts you snipped 
> off, I was not trying do exactly what you accuse me of doing!
> 
> Thank you for comments which are non-productive in the bigger scope of this 
> topic. 
> 
> Not to sound condescending again, can you keep the rude comments and attitude 
> out of the discussion. 

Wow...really?  Someone calls you on your crap and all you have is the
"I know you are but what am I" defense?  Sad.

Perhaps you should take the advice that you so kindly offered to Neil...

John

P.S.  If it's alright with you, I'm trimming the rest of my message
that you quoted for no obvious reason.  In the old days, we called
that "Netiquette".  It's what we used to solve most of the problems
that you've been claiming can only be solved by GitHub.
-- 
John W. LinvilleSomeday the world will need a hero, and you
linville at tuxdriver.com   might be all we have.  Be ready.


[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Neil Horman
On Tue, May 05, 2015 at 04:43:08PM +, Wiles, Keith wrote:
> 
> 
> Sent from my iPhone
> 
> > On May 5, 2015, at 6:56 AM, Neil Horman  wrote:
> > 
> >> On Mon, May 04, 2015 at 10:25:00PM -0500, Jim Thompson wrote:
> >> 
> >>> On May 4, 2015, at 10:12 PM, Wiles, Keith  
> >>> wrote:
> >>> 
> >>> 
> >>> 
>  On 5/4/15, 10:48 AM, "Matthew Hall"  wrote:
>  
> > On Mon, May 04, 2015 at 12:43:48PM +, Qiu, Michael wrote:
> > What mail client do you use? I think  mail client supporting thread mode
> > is important for patch review.
>  
>  Like many UNIX people, I use mutt.
>  
>  My concern is that, if we're making the widespread adoption, usage, and
>  contributions for DPDK dependent on selection or debate of the features
>  of 
>  various MUAs, I'm not sure that we're looking at this from the right
>  angle.
>  
>  I'm just trying to figure out how to get DPDK in the place where the most
>  eyeballs are, rather than trying to drag the eyeballs to the place where
>  DPDK 
>  is.
> >>> 
> >>> +1, I agree with this statement completely and I feel discussions about an
> >>> MUA is non-productive and out of scope.
> >> 
> >> +1.  I?ve avoided the whole discussion, because ? ok, ?non-productive and 
> >> out of scope? is a polite way of saying it.
> >> 
> >> jim
> > 
> > Very well, since you seem to want to avoid talking about ways to get what 
> > you
> > want in a workflow, lets go back to where the conversation started:
> > 
> > http://dpdk.org/ml/archives/dev/2015-May/017225.html
> > 
> > We got into this debate because you wanted to move the project to github, 
> > and as
> > supporting reasons, listed a plethora of features that you liked about the 
> > site.
> > This entire subtread has been meant to illustrate how you can have the 
> > features
> > you want that you see as adventageous in the github environment without 
> > actualy
> > moving to github.  We've focused on email quote collapsing because we kept
> > responding to one another, though I'm sure we could have the same debate on 
> > any
> > one of the workflow features github offers.
> > 
> > Can we all agree then, that for the list posted in your email above, any 
> > github
> > environmental feature can be recreated with proper tooling, available today,
> > without forcing the github environment on everybody?  Further, can we agree
> > that, given that those features are not unique to github, they are not
> > compelling reasons to move the project there?
> 
> Neil (I had to type this on my phone so please forgive any typos or other 
> statements that may sound odd. I am not trying to be rude in anyway)
> 
> I feel you are taking everything out of context here. The email client being 
> able collapse threads is not the point here and I have tried to redirect you 
> politely to the points moving DPDK to github. 
> 
I'm sorry, I disagree.  This is the context in which we began this debate:
http://dpdk.org/ml/archives/dev/2015-May/017229.html

Matthew stated (and you supported) the notion that collapsing quotes in email
was an adventageous feature to have when reviewing patches.  While that may be
true for you (I certainly don't deny it), Everything I have said so far has been
an effort to illustrate that that feature (and more generally the workflow tools
that github provides) are reproducible using existing infrastructure and tools
(i.e. that the github environment is not a reason in and of itself to move to
github, as it is not unique to that environment).  I have pointed this out
several times:

http://dpdk.org/ml/archives/dev/2015-May/017233.html
http://dpdk.org/ml/archives/dev/2015-May/017247.html

Its you and Matthew that seem to be fixed on asserting that I'm somehow
focused on only choosing a mail client 

http://dpdk.org/ml/archives/dev/2015-May/017294.html

And I don't appreciate it.  You and Matthew made statements regarding this as a
feature that you found desierable (among other features).  I'm fine with you
doing so, and I believe that they are worthwhile points of debate.  What I am
unwilling to accept however, is that any assertion to the contrary is, to use
your words "not the point".  If you want to make a statement about the
superiority of a environment, please do so, but understand that there may be
those who don't agree.  If you don't want to have the argument, retract the
statement.

However, as I stated above more than once now, if we can agree that githubs
environment is not unique to github and so not a supporting reason to move the
project there, we can be done with this subthread in its entirety.

Note that I am not saying here that the tools and workflow that github provides
are expressly bad, only that they are not unique to github, and so other reasons
should be considered for the movement.

> As I and others have pointed out GitHub offers a huge number eyes for DPDK 
> community. GitHub offers a different set of processes and tools, which we d

[dpdk-dev] [PATCH v7 03/10] eal/linux: add API to set rx interrupt event monitor

2015-05-05 Thread Stephen Hemminger
On Tue,  5 May 2015 13:39:39 +0800
Cunming Liang  wrote:

>  static void
> +eal_intr_proc_rxtx_intr(int fd, struct rte_intr_handle *intr_handle)
> +{

Should be const intr_handle is not modified


[dpdk-dev] [PATCH v7 08/10] ixgbe: enable rx queue interrupts for both PF and VF

2015-05-05 Thread Stephen Hemminger
On Tue,  5 May 2015 13:39:44 +0800
Cunming Liang  wrote:

>  
> + /* set max interrupt vfio request */
> + if (pci_dev->intr_handle.vec_en) {
> + pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues +
> + IXGBEVF_MAX_OTHER_INTR;
> + pci_dev->intr_handle.intr_vec =
> + rte_zmalloc("intr_vec",
> + hw->mac.max_rx_queues * sizeof(int), 0);
> + 

Since MSI-X vectors are limited on many hardware platforms, this whole API
should be changed so that max_intr is based on number of rx_queues actually
used by the application.  That means the setup needs to move from init to 
configure.


[dpdk-dev] GitHub sandbox for the DPDK community

2015-05-05 Thread Wiles, Keith
Neil and John and anyone else, if I have been rude or ugly in anyway that was 
not my intent. Please accept my apologies for being rude or condescending.

Sent from my iPhone

>> On May 5, 2015, at 12:07 PM, Neil Horman  wrote:
>> 
>> On Tue, May 05, 2015 at 04:43:08PM +, Wiles, Keith wrote:
>> 
>> 
>> Sent from my iPhone
>> 
> On May 5, 2015, at 6:56 AM, Neil Horman  wrote:
 
> On Mon, May 04, 2015 at 10:25:00PM -0500, Jim Thompson wrote:
> 
> On May 4, 2015, at 10:12 PM, Wiles, Keith  
> wrote:
> 
> 
> 
>>> On 5/4/15, 10:48 AM, "Matthew Hall"  wrote:
>>> 
>>> On Mon, May 04, 2015 at 12:43:48PM +, Qiu, Michael wrote:
>>> What mail client do you use? I think  mail client supporting thread mode
>>> is important for patch review.
>> 
>> Like many UNIX people, I use mutt.
>> 
>> My concern is that, if we're making the widespread adoption, usage, and
>> contributions for DPDK dependent on selection or debate of the features
>> of 
>> various MUAs, I'm not sure that we're looking at this from the right
>> angle.
>> 
>> I'm just trying to figure out how to get DPDK in the place where the most
>> eyeballs are, rather than trying to drag the eyeballs to the place where
>> DPDK 
>> is.
> 
> +1, I agree with this statement completely and I feel discussions about an
> MUA is non-productive and out of scope.
 
 +1.  I?ve avoided the whole discussion, because ? ok, ?non-productive and 
 out of scope? is a polite way of saying it.
 
 jim
>>> 
>>> Very well, since you seem to want to avoid talking about ways to get what 
>>> you
>>> want in a workflow, lets go back to where the conversation started:
>>> 
>>> http://dpdk.org/ml/archives/dev/2015-May/017225.html
>>> 
>>> We got into this debate because you wanted to move the project to github, 
>>> and as
>>> supporting reasons, listed a plethora of features that you liked about the 
>>> site.
>>> This entire subtread has been meant to illustrate how you can have the 
>>> features
>>> you want that you see as adventageous in the github environment without 
>>> actualy
>>> moving to github.  We've focused on email quote collapsing because we kept
>>> responding to one another, though I'm sure we could have the same debate on 
>>> any
>>> one of the workflow features github offers.
>>> 
>>> Can we all agree then, that for the list posted in your email above, any 
>>> github
>>> environmental feature can be recreated with proper tooling, available today,
>>> without forcing the github environment on everybody?  Further, can we agree
>>> that, given that those features are not unique to github, they are not
>>> compelling reasons to move the project there?
>> 
>> Neil (I had to type this on my phone so please forgive any typos or other 
>> statements that may sound odd. I am not trying to be rude in anyway)
>> 
>> I feel you are taking everything out of context here. The email client being 
>> able collapse threads is not the point here and I have tried to redirect you 
>> politely to the points moving DPDK to github.
> I'm sorry, I disagree.  This is the context in which we began this debate:
> http://dpdk.org/ml/archives/dev/2015-May/017229.html
> 
> Matthew stated (and you supported) the notion that collapsing quotes in email
> was an adventageous feature to have when reviewing patches.  While that may be
> true for you (I certainly don't deny it), Everything I have said so far has 
> been
> an effort to illustrate that that feature (and more generally the workflow 
> tools
> that github provides) are reproducible using existing infrastructure and tools
> (i.e. that the github environment is not a reason in and of itself to move to
> github, as it is not unique to that environment).  I have pointed this out
> several times:
> 
> http://dpdk.org/ml/archives/dev/2015-May/017233.html
> http://dpdk.org/ml/archives/dev/2015-May/017247.html
> 
> Its you and Matthew that seem to be fixed on asserting that I'm somehow
> focused on only choosing a mail client

I am sorry if I seem to be doing what you suggest. I agree and email client 
feature is not a valid reason to move. To me it was a minor point and me moving 
to another email client with that feature was not a reasonable solution for me. 
I was trying to move to other topics as I felt we both made our statements I 
guess a responded wrong. 

> http://dpdk.org/ml/archives/dev/2015-May/017294.html
> 
> And I don't appreciate it.  You and Matthew made statements regarding this as 
> a
> feature that you found desierable (among other features).  I'm fine with you
> doing so, and I believe that they are worthwhile points of debate.  What I am
> unwilling to accept however, is that any assertion to the contrary is, to use
> your words "not the point".  If you want to make a statement about the
> superiority of a environment, please do so, but understand that there may be
> those who don'

[dpdk-dev] [PATCH] Implement memcmp using AVX/SSE instructio

2015-05-05 Thread Ravi Kerur
On Thu, Apr 23, 2015 at 3:26 PM, Ravi Kerur  wrote:

>
>
> On Thu, Apr 23, 2015 at 7:00 AM, Bruce Richardson <
> bruce.richardson at intel.com> wrote:
>
>> On Thu, Apr 23, 2015 at 06:53:44AM -0700, Ravi Kerur wrote:
>> > On Thu, Apr 23, 2015 at 2:23 AM, Ananyev, Konstantin <
>> > konstantin.ananyev at intel.com> wrote:
>> >
>> > >
>> > >
>> > > > -Original Message-
>> > > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Bruce
>> Richardson
>> > > > Sent: Thursday, April 23, 2015 9:12 AM
>> > > > To: Wodkowski, PawelX
>> > > > Cc: dev at dpdk.org
>> > > > Subject: Re: [dpdk-dev] [PATCH] Implement memcmp using AVX/SSE
>> instructio
>> > > >
>> > > > On Thu, Apr 23, 2015 at 09:24:52AM +0200, Pawel Wodkowski wrote:
>> > > > > On 2015-04-22 17:33, Ravi Kerur wrote:
>> > > > > >+/**
>> > > > > >+ * Compare bytes between two locations. The locations must not
>> > > overlap.
>> > > > > >+ *
>> > > > > >+ * @note This is implemented as a macro, so it's address should
>> not
>> > > be taken
>> > > > > >+ * and care is needed as parameter expressions may be evaluated
>> > > multiple times.
>> > > > > >+ *
>> > > > > >+ * @param src_1
>> > > > > >+ *   Pointer to the first source of the data.
>> > > > > >+ * @param src_2
>> > > > > >+ *   Pointer to the second source of the data.
>> > > > > >+ * @param n
>> > > > > >+ *   Number of bytes to compare.
>> > > > > >+ * @return
>> > > > > >+ *   true if equal otherwise false.
>> > > > > >+ */
>> > > > > >+static inline bool
>> > > > > >+rte_memcmp(const void *src_1, const void *src,
>> > > > > >+  size_t n) __attribute__((always_inline));
>> > > > > You are exposing this as public API, so I think you should follow
>> > > > > description bellow or not call this _memcmp_
>> > > > >
>> > > > > int memcmp(const void *s1, const void *s2, size_t n);
>> > > > >
>> > > > > The memcmp() function returns an integer less than, equal  to,  or
>> > > greater
>> > > > > than
>> > > > >zero  if  the  first  n  bytes  of s1 is found,
>> respectively,
>> > > to be
>> > > > > less than, to
>> > > > >match, or be greater than the first n bytes of s2.
>> > > > >
>> > > >
>> > > > +1 to this point.
>> > > >
>> > > > Also, if I read your quoted performance numbers in your earlier mail
>> > > correctly,
>> > > > we are only looking at a 1-4% performance increase. Is the
>> additional
>> > > code to
>> > > > maintain worth the benefit?
>> > >
>> > > Yep, same thought here, is it really worth it?
>> > > Konstantin
>> > >
>> > > >
>> > > > /Bruce
>> > > >
>> > > > > --
>> > > > > Pawel
>> > >
>> >
>> > I think I haven't exploited every thing x86 has to offer to improve
>> > performance. I am looking for inputs. Until we have exhausted all
>> avenues I
>> > don't want to drop it. One thing I have noticed is that bigger key size
>> > gets better performance numbers. I plan to re-run perf tests with 64 and
>> > 128 bytes key size and will report back. Any other avenues to try out
>> > please let me know I will give it a shot.
>> >
>> > Thanks,
>> > Ravi
>>
>> Hi Ravi,
>>
>> are 128 byte comparisons realistic? An IPv6 5-tuple with double vlan tags
>> is still
>> only 41 bytes, or 48 with some padding added?
>> While for a memcpy function, you can see cases where you are going to
>> copy a whole
>> packet, meaning that sizes of 128B+ (up to multiple k) are realistic,
>> it's harder
>> to see that for a compare function.
>>
>> In any case, we await the results of your further optimization work to
>> see how
>> that goes.
>>
>>
>
Actually I was looking at wrong numbers. Wrote couple of sample programs
and found that memory comparison with AVX/SSE takes almost 1/3rd less cpu
ticks when compared with regular memcmp.

For 16bytes,

regular memcmp
Time: 276 ticks (3623188 memcmp/tick)
Time: 276 ticks (3623188 memcmp/tick)

memcmp with AVX/SSE
Time: 86 ticks (11627906 memcmp/tick)
Time: 87 ticks (11494252 memcmp/tick)

For 32bytes,

regular memcmp
Time: 301 ticks (3322259 memcmp/tick)
Time: 302 ticks (3311258 memcmp/tick)

memcmp with AVX/SSE
Time: 87 ticks (11494252 memcmp/tick)
Time: 88 ticks (11363636 memcmp/tick)

For 64bytes,

regular memcmp
Time: 376 ticks (2855696 memcmp/tick) 0
Time: 377 ticks (2848121 memcmp/tick) 0

memcmp with AVX/SSE
Time: 110 ticks (9761289 memcmp/tick) 0
Time: 110 ticks (9761289 memcmp/tick) 0

With some modifications to original patch, and looking through
test_hash_perf which has statistics for every test (Add on empty, Add
update, Lookup) it performs, in almost all categories (16, 32, 48 and 64
bytes) AVX/SSE beats regular memcmp. Please note that the time measured in
test_hash_perf is for hash functions (jhash and hash_crc) and memcmp is
just a small part of the hash functionality.

I will send modified patch later on.

Thanks,
Ravi




> Hi Bruce,
>
> Couple of things I am planning to try
>
> 1. Use _xor_ and _testz_ instructions for comparison instead of _cmpeq_
> and _mask_.
> 2. I am using unaligned loads, not sure about the penalty

[dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for QinQ support

2015-05-05 Thread Ananyev, Konstantin


> -Original Message-
> From: Chilikin, Andrey
> Sent: Tuesday, May 05, 2015 4:43 PM
> To: Ananyev, Konstantin; Zhang, Helin; dev at dpdk.org
> Subject: RE: [dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for QinQ 
> support
> 
> Hi Helin,
> 
> I would agree with Konstantin about new naming for VLAN tags. I think we can 
> leave existing name for t vlan_tci and just name new
> VLAN tag differently. I was thinking in the line of "vlan_tci_outer" or 
> "stag_tci". So vlan_tci will store single VLAN in case if only one L2
> tag is present or will store inner VLAN in case of two tags. "vlan_tci_outer" 
> will store outer VLAN when two L2 tags are present.
> "stag_tci" name also looks like a good candidate as in most cases if two tags 
> are presented then outer VLAN is addressed as S-Tag
> even if it is simple tag stacking.

Yep, I suppose "vlan_tci_outer" or "stag_tci" is a better name, then what I 
suggested.
Konstantin

> 
> Regards,
> Andrey
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ananyev,
> > Konstantin
> > Sent: Tuesday, May 5, 2015 12:05 PM
> > To: Zhang, Helin; dev at dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for
> > QinQ support
> >
> > Hi Helin,
> >
> > > -Original Message-
> > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Helin Zhang
> > > Sent: Tuesday, May 05, 2015 3:32 AM
> > > To: dev at dpdk.org
> > > Subject: [dpdk-dev] [PATCH RFC 1/6] mbuf: update mbuf structure for
> > > QinQ support
> > >
> > > To support QinQ, 'vlan_tci' should be replaced by 'vlan_tci0' and
> > > 'vlan_tci1'. Also new offload flags of 'PKT_RX_QINQ_PKT' and
> > > 'PKT_TX_QINQ_PKT' should be added.
> > >
> > > Signed-off-by: Helin Zhang 
> > > ---
> > >  app/test-pmd/flowgen.c|  2 +-
> > >  app/test-pmd/macfwd.c |  2 +-
> > >  app/test-pmd/macswap.c|  2 +-
> > >  app/test-pmd/rxonly.c |  2 +-
> > >  app/test-pmd/txonly.c |  2 +-
> > >  app/test/packet_burst_generator.c |  4 ++--
> > >  lib/librte_ether/rte_ether.h  |  4 ++--
> > >  lib/librte_mbuf/rte_mbuf.h| 22 +++---
> > >  lib/librte_pmd_e1000/em_rxtx.c|  8 
> > >  lib/librte_pmd_e1000/igb_rxtx.c   |  8 
> > >  lib/librte_pmd_enic/enic_ethdev.c |  2 +-
> > >  lib/librte_pmd_enic/enic_main.c   |  2 +-
> > >  lib/librte_pmd_fm10k/fm10k_rxtx.c |  2 +-
> > >  lib/librte_pmd_i40e/i40e_rxtx.c   |  8 
> > >  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +--
> > >  lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c |  6 +++---
> > >  16 files changed, 51 insertions(+), 36 deletions(-)
> > >
> > > diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c index
> > > 72016c9..f24b00c 100644
> > > --- a/app/test-pmd/flowgen.c
> > > +++ b/app/test-pmd/flowgen.c
> > > @@ -207,7 +207,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
> > >   pkt->nb_segs= 1;
> > >   pkt->pkt_len= pkt_size;
> > >   pkt->ol_flags   = ol_flags;
> > > - pkt->vlan_tci   = vlan_tci;
> > > + pkt->vlan_tci0  = vlan_tci;
> > >   pkt->l2_len = sizeof(struct ether_hdr);
> > >   pkt->l3_len = sizeof(struct ipv4_hdr);
> > >   pkts_burst[nb_pkt]  = pkt;
> > > diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c index
> > > 035e5eb..590b613 100644
> > > --- a/app/test-pmd/macfwd.c
> > > +++ b/app/test-pmd/macfwd.c
> > > @@ -120,7 +120,7 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
> > >   mb->ol_flags = ol_flags;
> > >   mb->l2_len = sizeof(struct ether_hdr);
> > >   mb->l3_len = sizeof(struct ipv4_hdr);
> > > - mb->vlan_tci = txp->tx_vlan_id;
> > > + mb->vlan_tci0 = txp->tx_vlan_id;
> > >   }
> > >   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> > nb_rx);
> > >   fs->tx_packets += nb_tx;
> > > diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index
> > > 6729849..c355399 100644
> > > --- a/app/test-pmd/macswap.c
> > > +++ b/app/test-pmd/macswap.c
> > > @@ -122,7 +122,7 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
> > >   mb->ol_flags = ol_flags;
> > >   mb->l2_len = sizeof(struct ether_hdr);
> > >   mb->l3_len = sizeof(struct ipv4_hdr);
> > > - mb->vlan_tci = txp->tx_vlan_id;
> > > + mb->vlan_tci0 = txp->tx_vlan_id;
> > >   }
> > >   nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> > nb_rx);
> > >   fs->tx_packets += nb_tx;
> > > diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c index
> > > ac56090..aa2cf7f 100644
> > > --- a/app/test-pmd/rxonly.c
> > > +++ b/app/test-pmd/rxonly.c
> > > @@ -159,7 +159,7 @@ pkt_burst_receive(struct fwd_stream *fs)
> > >  mb->hash.fdir.hash, mb->hash.fdir.id);
> > >   }
> > >  

[dpdk-dev] [PATCH] librte_eal:Using compiler memory barrier for IA processor's rte_wmb/rte_rmb.

2015-05-05 Thread Ananyev, Konstantin
Hi Dong,

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of WangDong
> Sent: Tuesday, May 05, 2015 4:38 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH] librte_eal:Using compiler memory barrier for IA 
> processor's rte_wmb/rte_rmb.
> 
> The current implementation of rte_wmb/rte_rmb for x86 is using processor 
> memory barrier. It's unnessary for IA processor, compiler
> memory barrier is enough. 

I wouldn't say they are 'unnecessary'.
There are situations, even on IA, when you need _fence_ isntructions.
So, please leave rte_*mb() macros unmodified.
I still think that we need to create a new set of architecture dependent 
macros, as what discussed before.
Probably by analogy with linux kernel rte_smp_*mb() is a good name for them.  
Though if you have some better name in mind, I am open to suggestions here.

> But if dpdk runing on a AMD processor, maybe we should use processor memory 
> barrier.

As far as I remember, amd has the same memory ordering model.
So, I don't think we need  #ifdef RTE_ARCH_X86_IA here.

Konstantin

> I add a macro to distinguish them, if we compile DPDK for IA processor, add 
> the macro (RTE_ARCH_X86_IA) can improve performance
> with compiler memory barrier. Or we can add RTE_ARCH_X86_AMD for using 
> processor memory barrier, in this case, if didn't add the
> macro, the memory ordering will not be guaranteed. Which macro is better?
> If this patch applied, the PMD's old implementation of compiler memory 
> barrier (some volatile variable) can be fixed with rte_rmb()
> and rte_wmb() for any architecture.
> 
> ---
>  lib/librte_eal/common/include/arch/x86/rte_atomic.h | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h 
> b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
> index e93e8ee..52b1e81 100644
> --- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h
> +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
> @@ -49,10 +49,20 @@ extern "C" {
> 
>  #define  rte_mb() _mm_mfence()
> 
> +#ifdef RTE_ARCH_X86_IA
> +
> +#define rte_wmb() rte_compiler_barrier()
> +
> +#define rte_rmb() rte_compiler_barrier()
> +
> +#else
> +
>  #define  rte_wmb() _mm_sfence()
> 
>  #define  rte_rmb() _mm_lfence()
> 
> +#endif
> +
>  /*- 16 bit atomic operations 
> -*/
> 
>  #ifndef RTE_FORCE_INTRINSICS
> --
> 1.9.1



[dpdk-dev] [PATCH v7 09/10] igb: enable rx queue interrupts for PF

2015-05-05 Thread Stephen Hemminger
On Tue,  5 May 2015 13:39:45 +0800
Cunming Liang  wrote:

> The patch does below for igb PF:
> - Setup NIC to generate MSI-X interrupts
> - Set the IVAR register to map interrupt causes to vectors
> - Implement interrupt enable/disable functions
> 
> Signed-off-by: Danny Zhou 
> Signed-off-by: Cunming Liang 

What about E1000?

This only usable if it works on all devices.