[dpdk-dev] [RFC PATCH 0/3] extend vmdq_dcb sample for X710 supporting

2016-01-11 Thread Jingjing Wu
Currently, the example vmdq_dcb only works on Intel? 82599 NICs.
This patch set extended this sample to make it works both on
Intel? 82599 and X710/XL710 NICs. This patch set also enabled
DCB VMDQ mode in i40e driver and added unsupported mode checking
in ixgbe driver.

Jingjing Wu (3):
  i40e: enable DCB in VMDQ vsis
  ixgbe: add more multi queue mode checking
  examples/vmdq_dcb: extend sample for X710 supporting

 doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst | 169 ++
 drivers/net/i40e/i40e_ethdev.c   | 153 +++--
 drivers/net/i40e/i40e_ethdev.h   |  28 +-
 drivers/net/ixgbe/ixgbe_ethdev.c |   5 +
 examples/vmdq_dcb/main.c | 388 ++-
 5 files changed, 582 insertions(+), 161 deletions(-)

-- 
2.4.0



[dpdk-dev] [RFC PATCH 1/3] i40e: enable DCB in VMDQ vsis

2016-01-11 Thread Jingjing Wu
Currently, DCB is only enabled on PF, queue mapping and BW
configuration is only done on PF vsi. This patch enabled DCB
for VMDQ vsis by following steps:
 1. Take BW and ETS configuration on VEB.
 2. Take BW and ETS configuration on VMDQ vsis.
 3. Update TC and queues mapping on VMDQ vsis.
To enable DCB on VMDQ, the number of TCs should not be lager than
the number of queues in VMDQ pools, and the number of queues per
VMDQ pool is specified by CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM
in config/common_* file.

Signed-off-by: Jingjing Wu 
---
 drivers/net/i40e/i40e_ethdev.c | 153 -
 drivers/net/i40e/i40e_ethdev.h |  28 
 2 files changed, 149 insertions(+), 32 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index bf6220d..fbafcc6 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -8087,6 +8087,8 @@ i40e_vsi_update_queue_mapping(struct i40e_vsi *vsi,
int i, total_tc = 0;
uint16_t qpnum_per_tc, bsf, qp_idx;
struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(vsi);
+   struct i40e_pf *pf = I40E_VSI_TO_PF(vsi);
+   uint16_t used_queues;

ret = validate_tcmap_parameter(vsi, enabled_tcmap);
if (ret != I40E_SUCCESS)
@@ -8100,7 +8102,18 @@ i40e_vsi_update_queue_mapping(struct i40e_vsi *vsi,
total_tc = 1;
vsi->enabled_tc = enabled_tcmap;

-   qpnum_per_tc = dev_data->nb_rx_queues / total_tc;
+   /* different VSI has different queues assigned */
+   if (vsi->type == I40E_VSI_MAIN)
+   used_queues = dev_data->nb_rx_queues -
+   pf->nb_cfg_vmdq_vsi * RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM;
+   else if (vsi->type == I40E_VSI_VMDQ2)
+   used_queues = RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM;
+   else {
+   PMD_INIT_LOG(ERR, "unsupported VSI type.");
+   return I40E_ERR_NO_AVAILABLE_VSI;
+   }
+
+   qpnum_per_tc = used_queues / total_tc;
/* Number of queues per enabled TC */
if (qpnum_per_tc == 0) {
PMD_INIT_LOG(ERR, " number of queues is less that tcs.");
@@ -8145,6 +8158,93 @@ i40e_vsi_update_queue_mapping(struct i40e_vsi *vsi,
 }

 /*
+ * i40e_config_switch_comp_tc - Configure VEB tc setting for given TC map
+ * @veb: VEB to be configured
+ * @tc_map: enabled TC bitmap
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static enum i40e_status_code
+i40e_config_switch_comp_tc(struct i40e_veb *veb, uint8_t tc_map)
+{
+   struct i40e_aqc_configure_switching_comp_bw_config_data veb_bw;
+   struct i40e_aqc_query_switching_comp_bw_config_resp bw_query;
+   struct i40e_aqc_query_switching_comp_ets_config_resp ets_query;
+   struct i40e_hw *hw = I40E_VSI_TO_HW(veb->associate_vsi);
+   enum i40e_status_code ret = I40E_SUCCESS;
+   int i;
+   uint32_t bw_max;
+
+   /* Check if enabled_tc is same as existing or new TCs */
+   if (veb->enabled_tc == tc_map)
+   return ret;
+
+   /* configure tc bandwidth */
+   memset(&veb_bw, 0, sizeof(veb_bw));
+   veb_bw.tc_valid_bits = tc_map;
+   /* Enable ETS TCs with equal BW Share for now across all VSIs */
+   for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+   if (tc_map & BIT_ULL(i))
+   veb_bw.tc_bw_share_credits[i] = 1;
+   }
+   ret = i40e_aq_config_switch_comp_bw_config(hw, veb->seid,
+  &veb_bw, NULL);
+   if (ret) {
+   PMD_INIT_LOG(ERR, "AQ command Config switch_comp BW allocation"
+ " per TC failed = %d",
+ hw->aq.asq_last_status);
+   return ret;
+   }
+
+   memset(&ets_query, 0, sizeof(ets_query));
+   ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid,
+  &ets_query, NULL);
+   if (ret != I40E_SUCCESS) {
+   PMD_DRV_LOG(ERR, "Failed to get switch_comp ETS"
+" configuration %u", hw->aq.asq_last_status);
+   return ret;
+   }
+   memset(&bw_query, 0, sizeof(bw_query));
+   ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid,
+ &bw_query, NULL);
+   if (ret != I40E_SUCCESS) {
+   PMD_DRV_LOG(ERR, "Failed to get switch_comp bandwidth"
+" configuration %u", hw->aq.asq_last_status);
+   return ret;
+   }
+
+   /* store and print out BW info */
+   veb->bw_info.bw_limit = rte_le_to_cpu_16(ets_query.port_bw_limit);
+   veb->bw_info.bw_max = ets_query.tc_bw_max;
+   PMD_DRV_LOG(DEBUG, "switch_comp bw limit:%u", veb->bw_info.bw_limit);
+   PMD_DRV_LOG(DEBUG, "switch_comp max_bw:%u", veb->bw_info.bw_max);
+   bw_max = rte_le_to_cpu_16(bw_query.tc_bw_max[0

[dpdk-dev] [RFC PATCH 2/3] ixgbe: add more multi queue mode checking

2016-01-11 Thread Jingjing Wu
The multi queue mode ETH_MQ_RX_VMDQ_DCB_RSS is not supported in
ixgbe driver. This patch added the checking.

Signed-off-by: Jingjing Wu 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4c4c6df..24cd30b 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1853,6 +1853,11 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev)
return -EINVAL;
}
} else {
+   if (dev_conf->rxmode.mq_mode == ETH_MQ_RX_VMDQ_DCB_RSS) {
+   PMD_INIT_LOG(ERR, "VMDQ+DCB+RSS mq_mode is"
+ " not supported.");
+   return -EINVAL;
+   }
/* check configuration for vmdb+dcb mode */
if (dev_conf->rxmode.mq_mode == ETH_MQ_RX_VMDQ_DCB) {
const struct rte_eth_vmdq_dcb_conf *conf;
-- 
2.4.0



[dpdk-dev] [RFC PATCH 3/3] examples/vmdq_dcb: extend sample for X710 supporting

2016-01-11 Thread Jingjing Wu
Currently, the example vmdq_dcb only works on Intel? 82599 NICs.
This patch extended this sample to make it work both on Intel? 82599
and X710/XL710 NICs by following changes:
  1. add VMDQ base queue checking to avoid forwarding on PF queues.
  2. assign each VMDQ pools with MAC address.
  3. add more arguments (nb-tcs, enable-rss) to change the default
 setting
  4. extend the max number of queues from 128 to 1024.
This patch also reworked the user guide for the vmdq_dcb sample.

Signed-off-by: Jingjing Wu 
---
 doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst | 169 ++
 examples/vmdq_dcb/main.c | 388 ++-
 2 files changed, 428 insertions(+), 129 deletions(-)

diff --git a/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst 
b/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst
index 9140a22..fe717fa 100644
--- a/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst
+++ b/doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst
@@ -32,8 +32,8 @@ VMDQ and DCB Forwarding Sample Application
 ==

 The VMDQ and DCB Forwarding sample application is a simple example of packet 
processing using the DPDK.
-The application performs L2 forwarding using VMDQ and DCB to divide the 
incoming traffic into 128 queues.
-The traffic splitting is performed in hardware by the VMDQ and DCB features of 
the Intel? 82599 10 Gigabit Ethernet Controller.
+The application performs L2 forwarding using VMDQ and DCB to divide the 
incoming traffic into queues.
+The traffic splitting is performed in hardware by the VMDQ and DCB features of 
the Intel? 82599 and X710/XL710  Ethernet Controller.

 Overview
 
@@ -41,28 +41,27 @@ Overview
 This sample application can be used as a starting point for developing a new 
application that is based on the DPDK and
 uses VMDQ and DCB for traffic partitioning.

-The VMDQ and DCB filters work on VLAN traffic to divide the traffic into 128 
input queues on the basis of the VLAN ID field and
-VLAN user priority field.
-VMDQ filters split the traffic into 16 or 32 groups based on the VLAN ID.
-Then, DCB places each packet into one of either 4 or 8 queues within that 
group, based upon the VLAN user priority field.
-
-In either case, 16 groups of 8 queues, or 32 groups of 4 queues, the traffic 
can be split into 128 hardware queues on the NIC,
-each of which can be polled individually by a DPDK application.
+The VMDQ and DCB filters work on MAC and VLAN traffic to divide the traffic 
into input queues on the basis of the Destination MAC
+address, VLAN ID and VLAN user priority fields.
+VMDQ filters split the traffic into 16 or 32 groups based on the Destination 
MAC and VLAN ID.
+Then, DCB places each packet into one of queues within that group, based upon 
the VLAN user priority field.

 All traffic is read from a single incoming port (port 0) and output on port 1, 
without any processing being performed.
-The traffic is split into 128 queues on input, where each thread of the 
application reads from multiple queues.
-For example, when run with 8 threads, that is, with the -c FF option, each 
thread receives and forwards packets from 16 queues.
+Take Intel? 82599 NIC for example, the traffic is split into 128 queues on 
input, where each thread of the application reads from
+multiple queues. When run with 8 threads, that is, with the -c FF option, each 
thread receives and forwards packets from 16 queues.

-As supplied, the sample application configures the VMDQ feature to have 16 
pools with 8 queues each as indicated in :numref:`figure_vmdq_dcb_example`.
-The Intel? 82599 10 Gigabit Ethernet Controller NIC also supports the 
splitting of traffic into 32 pools of 4 queues each and
-this can be used by changing the NUM_POOLS parameter in the supplied code.
-The NUM_POOLS parameter can be passed on the command line, after the EAL 
parameters:
+As supplied, the sample application configures the VMDQ feature to have 32 
pools with 4 queues each as indicated in :numref:`figure_vmdq_dcb_example`.
+The Intel? 82599 10 Gigabit Ethernet Controller NIC also supports the 
splitting of traffic into 16 pools of 8 queues. While the
+Intel? X710 or XL710 Ethernet Controller NICs support any specified VMDQ pools 
of 4 or 8 queues each. For simplicity, only 16
+or 32 pools is supported in this sample. And queues numbers for each VMDQ pool 
can be changed by setting CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM
+in config/common_* file.
+The nb-pools, nb-tcs and enable-rss parameters can be passed on the command 
line, after the EAL parameters:

 .. code-block:: console

-./build/vmdq_dcb [EAL options] -- -p PORTMASK --nb-pools NP
+./build/vmdq_dcb [EAL options] -- -p PORTMASK --nb-pools NP --nb-tcs TC 
--enable-rss

-where, NP can be 16 or 32.
+where, NP can be 16 or 32, TC can be 4 or 8, rss is disabled by default.

 .. _figure_vmdq_dcb_example:

@@ -72,9 +71,7 @@ where, NP can be 16 or 32.


 In Linux* user space, the application 

[dpdk-dev] [PATCH 08/12] pmd/mlx4: add dev_ptype_info_get implementation

2016-01-11 Thread Tan, Jianfeng
> OK, that makes sense. Please check my above comments about coding
> style/indents (I know I'm annoying).

Thank you, Mazarguil. I'll fix it when sending out v2 patch.

Jianfeng




[dpdk-dev] [PATCH v1 0/2] Virtio-net PMD Extension to work on host

2016-01-11 Thread Tan, Jianfeng
Hi Tetsuya,

> With current your implementation, when 'virtual' virtio-net PMD is used,
> 'phys_addr' will be virtual address in EAL layer.
>
> struct rte_memseg {
>  phys_addr_t phys_addr;  /**< Start physical address. */
>  union {
>  void *addr; /**< Start virtual address. */
>  uint64_t addr_64;   /**< Makes sure addr is always 64
> bits */
>  };
>  ...
> };

It's not true. It does not effect EAL layer at all. Just fill virtual 
address in virtio PMD when:
1). set_base_addr;
2). preparing RX's descriptors;
3). transmitting packets, CVA is filled in TX's descriptors;
4). in TX and CQ's header, CVA is used.

>
> How about choosing it in virtio-net PMD?

My current implementation works as you say.

> (In the case of 'virtual', just use 'addr' instead of using 'phys_addr'.)
> For example, port0 may use physical address, but port1 may use virtual
> address.
>
> With this, of course, we don't have an issue with 'physical' virtio-net PMD.
> Also, with 'virtual' virtio-net PMD, we can use virtual address and fd
> that represents the big virtual address space.
> (TODO: Need to change rte_memseg and EAL to keep fd and offset?)

I suppose you mean that when initializing memory, just maintain one fd 
in the end, and
mmap all memsegs inside it. This sounds like a good idea to solve the 
limitation of
VHOST_MEMORY_MAX_NREGIONS.

Besides, Sergio and I are discussing about using VA instead of PA in 
VFIO to avoid the
requirement of physical-config for physical devices.


Thanks,
Jianfeng



> Then, you don't worry about VHOST_MEMORY_MAX_NREGIONS, because we have
> only one fd.
>
>> b. containers without root privilege
>> No need to worry about this problem, because it lacks of privilege to
>> construct physical-contiguous memory.
>>
> Yes, we cannot run 'physical' PMDs in this type of container.
> Anyway, I will check it more, if we really need it.
>
> Thanks,
> Tetsuya



[dpdk-dev] [PATCH 0/4] Support VxLAN & NVGRE checksum off-load on X550

2016-01-11 Thread Wenzhuo Lu
This patch set add the VxLAN & NVGRE checksum off-load support.
Both RX and TX checksum off-load can be used for VxLAN & NVGRE.
And the VxLAN port can be set, it's implemented in this patch
set either.

Wenzhuo Lu (4):
  ixgbe: support UDP tunnel add/del
  ixgbe: support VxLAN &  NVGRE RX checksum off-load
  ixgbe: support VxLAN &  NVGRE TX checksum off-load
  doc: update release note for VxLAN & NVGRE checksum off-load support

 doc/guides/rel_notes/release_2_3.rst |  9 
 drivers/net/ixgbe/ixgbe_ethdev.c | 93 
 drivers/net/ixgbe/ixgbe_rxtx.c   | 63 +++-
 drivers/net/ixgbe/ixgbe_rxtx.h   |  6 ++-
 lib/librte_mbuf/rte_mbuf.c   |  1 +
 lib/librte_mbuf/rte_mbuf.h   |  3 ++
 6 files changed, 163 insertions(+), 12 deletions(-)

-- 
1.9.3



[dpdk-dev] [PATCH 2/4] ixgbe: support VxLAN & NVGRE RX checksum off-load

2016-01-11 Thread Wenzhuo Lu
X550 will do VxLAN & NVGRE RX checksum off-load automatically.
This patch exposes the result of the checksum off-load.

Signed-off-by: Wenzhuo Lu 
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 11 ++-
 lib/librte_mbuf/rte_mbuf.c |  1 +
 lib/librte_mbuf/rte_mbuf.h |  1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 52a263c..512ac3a 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1003,6 +1003,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
 static inline uint64_t
 rx_desc_error_to_pkt_flags(uint32_t rx_status)
 {
+   uint64_t pkt_flags;
+
/*
 * Bit 31: IPE, IPv4 checksum error
 * Bit 30: L4I, L4I integrity error
@@ -1011,8 +1013,15 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status)
0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
};
-   return error_to_pkt_flags_map[(rx_status >>
+   pkt_flags = error_to_pkt_flags_map[(rx_status >>
IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
+
+   if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
+   (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
+   pkt_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
+   }
+
+   return pkt_flags;
 }

 /*
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index c18b438..5d4af39 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -260,6 +260,7 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
/* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+   case PKT_RX_OUTER_IP_CKSUM_BAD: return "PKT_RX_OUTER_IP_CKSUM_BAD";
default: return NULL;
}
 }
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..5ad5e59 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -98,6 +98,7 @@ extern "C" {
 #define PKT_RX_FDIR_ID   (1ULL << 13) /**< FD id reported if FDIR match. */
 #define PKT_RX_FDIR_FLX  (1ULL << 14) /**< Flexible bytes reported if FDIR 
match. */
 #define PKT_RX_QINQ_PKT  (1ULL << 15)  /**< RX packet with double VLAN 
stripped. */
+#define PKT_RX_OUTER_IP_CKSUM_BAD (1ULL << 16)  /**< Outer IP cksum of RX pkt. 
is not OK. */
 /* add new RX flags here */

 /* add new TX flags here */
-- 
1.9.3



[dpdk-dev] [PATCH 3/4] ixgbe: support VxLAN & NVGRE TX checksum off-load

2016-01-11 Thread Wenzhuo Lu
The patch add VxLAN & NVGRE TX checksum off-load. When the flag of
outer IP header checksum offload is set, we'll set the context
descriptor to enable this checksum off-load.

Signed-off-by: Wenzhuo Lu 
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 52 ++
 drivers/net/ixgbe/ixgbe_rxtx.h |  6 -
 lib/librte_mbuf/rte_mbuf.h |  2 ++
 3 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 512ac3a..fea2495 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -85,7 +85,8 @@
PKT_TX_VLAN_PKT |\
PKT_TX_IP_CKSUM |\
PKT_TX_L4_MASK | \
-   PKT_TX_TCP_SEG)
+   PKT_TX_TCP_SEG | \
+   PKT_TX_OUTER_IP_CKSUM)

 static inline struct rte_mbuf *
 rte_rxmbuf_alloc(struct rte_mempool *mp)
@@ -364,9 +365,11 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
uint32_t ctx_idx;
uint32_t vlan_macip_lens;
union ixgbe_tx_offload tx_offload_mask;
+   uint32_t seqnum_seed = 0;

ctx_idx = txq->ctx_curr;
-   tx_offload_mask.data = 0;
+   tx_offload_mask.data[0] = 0;
+   tx_offload_mask.data[1] = 0;
type_tucmd_mlhl = 0;

/* Specify which HW CTX to upload. */
@@ -430,9 +433,20 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
}
}

+   if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
+   tx_offload_mask.outer_l3_len |= ~0;
+   tx_offload_mask.outer_l2_len |= ~0;
+   seqnum_seed |= tx_offload.outer_l3_len
+  << IXGBE_ADVTXD_OUTER_IPLEN;
+   seqnum_seed |= tx_offload.outer_l2_len
+  << IXGBE_ADVTXD_TUNNEL_LEN;
+   }
+
txq->ctx_cache[ctx_idx].flags = ol_flags;
-   txq->ctx_cache[ctx_idx].tx_offload.data  =
-   tx_offload_mask.data & tx_offload.data;
+   txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
+   tx_offload_mask.data[0] & tx_offload.data[0];
+   txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
+   tx_offload_mask.data[1] & tx_offload.data[1];
txq->ctx_cache[ctx_idx].tx_offload_mask= tx_offload_mask;

ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
@@ -441,7 +455,7 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << 
IXGBE_ADVTXD_VLAN_SHIFT);
ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
-   ctx_txd->seqnum_seed = 0;
+   ctx_txd->seqnum_seed = seqnum_seed;
 }

 /*
@@ -454,16 +468,24 @@ what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t 
flags,
 {
/* If match with the current used context */
if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
-   (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
-   (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & 
tx_offload.data {
+   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
+   (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
+& tx_offload.data[0])) &&
+   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
+   (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
+& tx_offload.data[1] {
return txq->ctx_curr;
}

/* What if match with the next context  */
txq->ctx_curr ^= 1;
if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
-   (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
-   (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & 
tx_offload.data {
+   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
+   (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
+& tx_offload.data[0])) &&
+   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
+   (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
+& tx_offload.data[1] {
return txq->ctx_curr;
}

@@ -492,6 +514,12 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
if (ol_flags & PKT_TX_TCP_SEG)
cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
+   if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
+   cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
+   if (ol_flags & PKT_TX_VXLAN_PKT)
+   cmdtype &= ~(1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
+   else
+   cmdtype |= (1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
return cmdtype;
 }

@@ -588,8 +616,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint64_t tx_ol_req;
uint32_t ctx = 0;

[dpdk-dev] [PATCH 4/4] doc: update release note for VxLAN & NVGRE checksum off-load support

2016-01-11 Thread Wenzhuo Lu
Signed-off-by: Wenzhuo Lu 
---
 doc/guides/rel_notes/release_2_3.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/doc/guides/rel_notes/release_2_3.rst 
b/doc/guides/rel_notes/release_2_3.rst
index 99de186..8a8f878 100644
--- a/doc/guides/rel_notes/release_2_3.rst
+++ b/doc/guides/rel_notes/release_2_3.rst
@@ -4,6 +4,15 @@ DPDK Release 2.3
 New Features
 

+* **Support VxLAN & NVGRE checksum off-load on X550**
+
+  * VxLAN & NVGRE RX/TX checksum off-load is supported on X550.
+Provide RX/TX checksum off-load on both inner and outer IP
+header and TCP header.
+  * Support VxLAN port configuration. Although the default VxLAN
+port number is 4789, it can be changed. We should make it
+configable to meet the change.
+

 Resolved Issues
 ---
-- 
1.9.3



[dpdk-dev] [PATCH 1/4] ixgbe: support UDP tunnel add/del

2016-01-11 Thread Wenzhuo Lu
Add UDP tunnel add/del support on ixgbe. Now it only support
VxLAN port configuration.
Although the VxLAN port has a default value 4789, it can be
changed. We support VxLAN port configuration to meet the
change.
Note, the default value of VxLAN port in ixgbe NICs is 0. So
please set it when using VxLAN off-load.

Signed-off-by: Wenzhuo Lu 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 93 
 1 file changed, 93 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 4c4c6df..381cbad 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -337,6 +337,10 @@ static int ixgbe_timesync_read_time(struct rte_eth_dev 
*dev,
   struct timespec *timestamp);
 static int ixgbe_timesync_write_time(struct rte_eth_dev *dev,
   const struct timespec *timestamp);
+static int ixgbe_dev_udp_tunnel_add(struct rte_eth_dev *dev,
+   struct rte_eth_udp_tunnel *udp_tunnel);
+static int ixgbe_dev_udp_tunnel_del(struct rte_eth_dev *dev,
+   struct rte_eth_udp_tunnel *udp_tunnel);

 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -495,6 +499,8 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
.timesync_adjust_time = ixgbe_timesync_adjust_time,
.timesync_read_time   = ixgbe_timesync_read_time,
.timesync_write_time  = ixgbe_timesync_write_time,
+   .udp_tunnel_add   = ixgbe_dev_udp_tunnel_add,
+   .udp_tunnel_del   = ixgbe_dev_udp_tunnel_del,
 };

 /*
@@ -6191,6 +6197,93 @@ ixgbe_dev_get_dcb_info(struct rte_eth_dev *dev,
return 0;
 }

+#define DEFAULT_VXLAN_PORT 4789
+
+/* on x550, there's only one register for VxLAN UDP port.
+ * So, we cannot add or del the port. We only update it.
+ */
+static int
+ixgbe_update_vxlan_port(struct ixgbe_hw *hw,
+   uint16_t port)
+{
+   IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, port);
+   IXGBE_WRITE_FLUSH(hw);
+
+   return 0;
+}
+
+/* Add UDP tunneling port */
+static int
+ixgbe_dev_udp_tunnel_add(struct rte_eth_dev *dev,
+struct rte_eth_udp_tunnel *udp_tunnel)
+{
+   int ret = 0;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   if (hw->mac.type != ixgbe_mac_X550 &&
+   hw->mac.type != ixgbe_mac_X550EM_x) {
+   return -ENOTSUP;
+   }
+
+   if (udp_tunnel == NULL)
+   return -EINVAL;
+
+   switch (udp_tunnel->prot_type) {
+   case RTE_TUNNEL_TYPE_VXLAN:
+   /* cannot add a port, update the port value */
+   ret = ixgbe_update_vxlan_port(hw, udp_tunnel->udp_port);
+   break;
+
+   case RTE_TUNNEL_TYPE_GENEVE:
+   case RTE_TUNNEL_TYPE_TEREDO:
+   PMD_DRV_LOG(ERR, "Tunnel type is not supported now.");
+   ret = -1;
+   break;
+
+   default:
+   PMD_DRV_LOG(ERR, "Invalid tunnel type");
+   ret = -1;
+   break;
+   }
+
+   return ret;
+}
+
+/* Remove UDP tunneling port */
+static int
+ixgbe_dev_udp_tunnel_del(struct rte_eth_dev *dev,
+struct rte_eth_udp_tunnel *udp_tunnel)
+{
+   int ret = 0;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   if (hw->mac.type != ixgbe_mac_X550 &&
+   hw->mac.type != ixgbe_mac_X550EM_x) {
+   return -ENOTSUP;
+   }
+
+   if (udp_tunnel == NULL)
+   return -EINVAL;
+
+   switch (udp_tunnel->prot_type) {
+   case RTE_TUNNEL_TYPE_VXLAN:
+   /* cannot del the port, reset it to default */
+   ret = ixgbe_update_vxlan_port(hw, DEFAULT_VXLAN_PORT);
+   break;
+   case RTE_TUNNEL_TYPE_GENEVE:
+   case RTE_TUNNEL_TYPE_TEREDO:
+   PMD_DRV_LOG(ERR, "Tunnel type is not supported now.");
+   ret = -1;
+   break;
+   default:
+   PMD_DRV_LOG(ERR, "Invalid tunnel type");
+   ret = -1;
+   break;
+   }
+
+   return ret;
+}
+
 static struct rte_driver rte_ixgbe_driver = {
.type = PMD_PDEV,
.init = rte_ixgbe_pmd_init,
-- 
1.9.3



[dpdk-dev] [PATCH 1/2] fm10k: Add Atwood Channel Support

2016-01-11 Thread Michael Qiu
Atwood Channel is intel 25G NIC, and this patch add the support
in DPDK.

Signed-off-by: Michael Qiu
---
 drivers/net/fm10k/base/fm10k_osdep.h| 4 
 lib/librte_eal/common/include/rte_pci_dev_ids.h | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/fm10k/base/fm10k_osdep.h 
b/drivers/net/fm10k/base/fm10k_osdep.h
index 6852ef0..9cb46ff 100644
--- a/drivers/net/fm10k/base/fm10k_osdep.h
+++ b/drivers/net/fm10k/base/fm10k_osdep.h
@@ -48,6 +48,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #define BOULDER_RAPIDS_HW
 #endif

+#ifndef ATWOOD_CHANNEL_HW
+#define ATWOOD_CHANNEL_HW
+#endif
+
 #define STATIC  static
 #define DEBUGFUNC(F)DEBUGOUT(F "\n");
 #define DEBUGOUT(S, args...)PMD_DRV_LOG_RAW(DEBUG, S, ##args)
diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h 
b/lib/librte_eal/common/include/rte_pci_dev_ids.h
index e31b934..cb0d177 100644
--- a/lib/librte_eal/common/include/rte_pci_dev_ids.h
+++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h
@@ -530,9 +530,11 @@ RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, 
I40E_DEV_ID_10G_BASE_T_X722)

 #define FM10K_DEV_ID_PF   0x15A4
 #define FM10K_DEV_ID_SDI_FM10420_QDA2 0x15D0
+#define FM10K_DEV_ID_SDI_FM10420_DA2  0x15D5

 RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_PF)
 RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2)
+RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_SDI_FM10420_DA2)

 /** Virtual IGB devices from e1000_hw.h **/

-- 
1.9.3



[dpdk-dev] [PATCH 2/2] fm10k: update doc for Atwood Channel

2016-01-11 Thread Michael Qiu
Atwood Channel is 20GbE NIC and belongs to Intel FM10K family,
update the doc for it.

Signed-off-by: Michael Qiu 
---
 doc/guides/rel_notes/release_2_3.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/guides/rel_notes/release_2_3.rst 
b/doc/guides/rel_notes/release_2_3.rst
index 99de186..7dd9c0f 100644
--- a/doc/guides/rel_notes/release_2_3.rst
+++ b/doc/guides/rel_notes/release_2_3.rst
@@ -3,7 +3,9 @@ DPDK Release 2.3

 New Features
 
+* **New NIC Atwood Channel support.**

+  Added support for the Atwood Channel variant of Intel's fm10k NIC family.

 Resolved Issues
 ---
-- 
1.9.3



[dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if packet type is set

2016-01-11 Thread Tan, Jianfeng
Hi,

According to the proposal, I'm going to fix the definition of this API 
as below:
/**
  * Retrieve the contextual information of an Ethernet device.
  *
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param ptype_mask
  *   A hint of what kind of packet type which the caller is interested in
  * @param ptypes
  *   An array of packet types to be filled with
  * @param num
  * Size of ptypes[]
  * @return
  *   - (>=0) if successful. Indicate number of valid values in ptypes 
array.
  *   - (-ENOTSUP) if hardware-assisted VLAN stripping not configured.
  *   - (-ENODEV) if *port_id* invalid.
  */
extern int rte_eth_dev_get_ptype_info(uint8_t port_id,
 uint32_t ptype_mask, uint32_t ptypes[], 
uint32_t num);

Unresolved issues:
1) When num is exceeded, we just stop there and return num, or return 
-ENOMEM?
The first way has a bug when: what app is exactly asking for is not 
filled in ptypes[] because of
exceeding num, but app believes this API returns with success.

2) if RTE_PTYPE_*_MAX_NUM macros necessary? Without them, we could calculate
num through 2^(number of bit 1 in RTE_PTPE_*_MASK).

Thanks,
Jianfeng


[dpdk-dev] [PATCH 1/4] ixgbe: support UDP tunnel add/del

2016-01-11 Thread Vincent JARDIN
see inline

Le 11 janv. 2016 08:08, "Wenzhuo Lu"  a ?crit :
>
> Add UDP tunnel add/del support on ixgbe. Now it only support
> VxLAN port configuration.
> Although the VxLAN port has a default value 4789, it can be
> changed. We support VxLAN port configuration to meet the
> change.
> Note, the default value of VxLAN port in ixgbe NICs is 0. So
> please set it when using VxLAN off-load.
>
> Signed-off-by: Wenzhuo Lu 
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c | 93

>  1 file changed, 93 insertions(+)
>
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 4c4c6df..381cbad 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -337,6 +337,10 @@ static int ixgbe_timesync_read_time(struct
rte_eth_dev *dev,
>struct timespec *timestamp);
>  static int ixgbe_timesync_write_time(struct rte_eth_dev *dev,
>const struct timespec *timestamp);
> +static int ixgbe_dev_udp_tunnel_add(struct rte_eth_dev *dev,
> +   struct rte_eth_udp_tunnel
*udp_tunnel);
> +static int ixgbe_dev_udp_tunnel_del(struct rte_eth_dev *dev,
> +   struct rte_eth_udp_tunnel
*udp_tunnel);
>
>  /*
>   * Define VF Stats MACRO for Non "cleared on read" register
> @@ -495,6 +499,8 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
> .timesync_adjust_time = ixgbe_timesync_adjust_time,
> .timesync_read_time   = ixgbe_timesync_read_time,
> .timesync_write_time  = ixgbe_timesync_write_time,
> +   .udp_tunnel_add   = ixgbe_dev_udp_tunnel_add,
> +   .udp_tunnel_del   = ixgbe_dev_udp_tunnel_del,
>  };
>

Your patch is not adding HW tunnel support but port management.

>  /*
> @@ -6191,6 +6197,93 @@ ixgbe_dev_get_dcb_info(struct rte_eth_dev *dev,
> return 0;
>  }
>
> +#define DEFAULT_VXLAN_PORT 4789
> +
> +/* on x550, there's only one register for VxLAN UDP port.
> + * So, we cannot add or del the port. We only update it.
> + */
> +static int
> +ixgbe_update_vxlan_port(struct ixgbe_hw *hw,
> +   uint16_t port)
> +{
> +   IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, port);
> +   IXGBE_WRITE_FLUSH(hw);
> +
> +   return 0;
> +}
> +
> +/* Add UDP tunneling port */
> +static int
> +ixgbe_dev_udp_tunnel_add(struct rte_eth_dev *dev,
> +struct rte_eth_udp_tunnel *udp_tunnel)
> +{
> +   int ret = 0;
> +   struct ixgbe_hw *hw =
IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +   if (hw->mac.type != ixgbe_mac_X550 &&
> +   hw->mac.type != ixgbe_mac_X550EM_x) {
> +   return -ENOTSUP;
> +   }
> +
> +   if (udp_tunnel == NULL)
> +   return -EINVAL;
> +
> +   switch (udp_tunnel->prot_type) {
> +   case RTE_TUNNEL_TYPE_VXLAN:
> +   /* cannot add a port, update the port value */
> +   ret = ixgbe_update_vxlan_port(hw, udp_tunnel->udp_port);
> +   break;
> +
> +   case RTE_TUNNEL_TYPE_GENEVE:
> +   case RTE_TUNNEL_TYPE_TEREDO:
> +   PMD_DRV_LOG(ERR, "Tunnel type is not supported now.");
> +   ret = -1;
> +   break;
> +
> +   default:
> +   PMD_DRV_LOG(ERR, "Invalid tunnel type");
> +   ret = -1;
> +   break;
> +   }
> +
> +   return ret;
> +}

Is tunnel_add a proper naming? We need to keep flexibility for NICs that
will support full HW tunneling support.

Here it is about setting registers.

> +
> +/* Remove UDP tunneling port */
> +static int
> +ixgbe_dev_udp_tunnel_del(struct rte_eth_dev *dev,
> +struct rte_eth_udp_tunnel *udp_tunnel)
> +{
> +   int ret = 0;
> +   struct ixgbe_hw *hw =
IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +   if (hw->mac.type != ixgbe_mac_X550 &&
> +   hw->mac.type != ixgbe_mac_X550EM_x) {
> +   return -ENOTSUP;
> +   }
> +
> +   if (udp_tunnel == NULL)
> +   return -EINVAL;
> +
> +   switch (udp_tunnel->prot_type) {
> +   case RTE_TUNNEL_TYPE_VXLAN:
> +   /* cannot del the port, reset it to default */
> +   ret = ixgbe_update_vxlan_port(hw, DEFAULT_VXLAN_PORT);
> +   break;
> +   case RTE_TUNNEL_TYPE_GENEVE:
> +   case RTE_TUNNEL_TYPE_TEREDO:
> +   PMD_DRV_LOG(ERR, "Tunnel type is not supported now.");
> +   ret = -1;
> +   break;
> +   default:
> +   PMD_DRV_LOG(ERR, "Invalid tunnel type");
> +   ret = -1;
> +   break;
> +   }
> +
> +   return ret;
> +}
> +
>  static struct rte_driver rte_ixgbe_driver = {
> .type = PMD_PDEV,
> .init = rte_ixgbe_pmd_init,
> --
> 1.9.3
>

I think the semantic of this serie should be revisited. It is about adding
the support of inner/outer checksu

[dpdk-dev] [PATCH 1/4] ixgbe: support UDP tunnel add/del

2016-01-11 Thread Lu, Wenzhuo
Hi Vincent,

From: Vincent JARDIN [mailto:vincent.jar...@6wind.com]
Sent: Monday, January 11, 2016 3:41 PM
To: Lu, Wenzhuo 
Cc: dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH 1/4] ixgbe: support UDP tunnel add/del


see inline

Le 11 janv. 2016 08:08, "Wenzhuo Lu" mailto:wenzhuo.lu 
at intel.com>> a ?crit :
>
> Add UDP tunnel add/del support on ixgbe. Now it only support
> VxLAN port configuration.
> Although the VxLAN port has a default value 4789, it can be
> changed. We support VxLAN port configuration to meet the
> change.
> Note, the default value of VxLAN port in ixgbe NICs is 0. So
> please set it when using VxLAN off-load.
>
> Signed-off-by: Wenzhuo Lu mailto:wenzhuo.lu at 
> intel.com>>
> ---
>  drivers/net/ixgbe/ixgbe_ethdev.c | 93 
> 
>  1 file changed, 93 insertions(+)
>
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c 
> b/drivers/net/ixgbe/ixgbe_ethdev.c
> index 4c4c6df..381cbad 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -337,6 +337,10 @@ static int ixgbe_timesync_read_time(struct rte_eth_dev 
> *dev,
>struct timespec *timestamp);
>  static int ixgbe_timesync_write_time(struct rte_eth_dev *dev,
>const struct timespec *timestamp);
> +static int ixgbe_dev_udp_tunnel_add(struct rte_eth_dev *dev,
> +   struct rte_eth_udp_tunnel *udp_tunnel);
> +static int ixgbe_dev_udp_tunnel_del(struct rte_eth_dev *dev,
> +   struct rte_eth_udp_tunnel *udp_tunnel);
>
>  /*
>   * Define VF Stats MACRO for Non "cleared on read" register
> @@ -495,6 +499,8 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
> .timesync_adjust_time = ixgbe_timesync_adjust_time,
> .timesync_read_time   = ixgbe_timesync_read_time,
> .timesync_write_time  = ixgbe_timesync_write_time,
> +   .udp_tunnel_add   = ixgbe_dev_udp_tunnel_add,
> +   .udp_tunnel_del   = ixgbe_dev_udp_tunnel_del,
>  };
>

Your patch is not adding HW tunnel support but port management.

>  /*
> @@ -6191,6 +6197,93 @@ ixgbe_dev_get_dcb_info(struct rte_eth_dev *dev,
> return 0;
>  }
>
> +#define DEFAULT_VXLAN_PORT 4789
> +
> +/* on x550, there's only one register for VxLAN UDP port.
> + * So, we cannot add or del the port. We only update it.
> + */
> +static int
> +ixgbe_update_vxlan_port(struct ixgbe_hw *hw,
> +   uint16_t port)
> +{
> +   IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, port);
> +   IXGBE_WRITE_FLUSH(hw);
> +
> +   return 0;
> +}
> +
> +/* Add UDP tunneling port */
> +static int
> +ixgbe_dev_udp_tunnel_add(struct rte_eth_dev *dev,
> +struct rte_eth_udp_tunnel *udp_tunnel)
> +{
> +   int ret = 0;
> +   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +   if (hw->mac.type != ixgbe_mac_X550 &&
> +   hw->mac.type != ixgbe_mac_X550EM_x) {
> +   return -ENOTSUP;
> +   }
> +
> +   if (udp_tunnel == NULL)
> +   return -EINVAL;
> +
> +   switch (udp_tunnel->prot_type) {
> +   case RTE_TUNNEL_TYPE_VXLAN:
> +   /* cannot add a port, update the port value */
> +   ret = ixgbe_update_vxlan_port(hw, udp_tunnel->udp_port);
> +   break;
> +
> +   case RTE_TUNNEL_TYPE_GENEVE:
> +   case RTE_TUNNEL_TYPE_TEREDO:
> +   PMD_DRV_LOG(ERR, "Tunnel type is not supported now.");
> +   ret = -1;
> +   break;
> +
> +   default:
> +   PMD_DRV_LOG(ERR, "Invalid tunnel type");
> +   ret = -1;
> +   break;
> +   }
> +
> +   return ret;
> +}

Is tunnel_add a proper naming? We need to keep flexibility for NICs that will 
support full HW tunneling support.

Here it is about setting registers.

> +
> +/* Remove UDP tunneling port */
> +static int
> +ixgbe_dev_udp_tunnel_del(struct rte_eth_dev *dev,
> +struct rte_eth_udp_tunnel *udp_tunnel)
> +{
> +   int ret = 0;
> +   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +   if (hw->mac.type != ixgbe_mac_X550 &&
> +   hw->mac.type != ixgbe_mac_X550EM_x) {
> +   return -ENOTSUP;
> +   }
> +
> +   if (udp_tunnel == NULL)
> +   return -EINVAL;
> +
> +   switch (udp_tunnel->prot_type) {
> +   case RTE_TUNNEL_TYPE_VXLAN:
> +   /* cannot del the port, reset it to default */
> +   ret = ixgbe_update_vxlan_port(hw, DEFAULT_VXLAN_PORT);
> +   break;
> +   case RTE_TUNNEL_TYPE_GENEVE:
> +   case RTE_TUNNEL_TYPE_TEREDO:
> +   PMD_DRV_LOG(ERR, "Tunnel type is not supported now.");
> +   ret = -1;
> +   break;
> +   default:
> +   PMD_DRV_LOG(ERR, "Invalid tunnel type");
> +   ret = -1;
> +   

[dpdk-dev] [PATCH 1/4] ixgbe: support UDP tunnel add/del

2016-01-11 Thread Vincent JARDIN
> [Wenzhuo] The udp_tunnel_add and udp_tunnel_del have already existed. I
just use them. Honestly I agree with you they are not accurate name. Better
change them to udp_tunnel_port_add and udp_tunnel_port_del. But it should
be a ABI change if I?m not wrong. I think we can announce it this release
and change them in the next release. Would you agree?  Thanks.

Yes you are right.


[dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if packet type is set

2016-01-11 Thread Ananyev, Konstantin
Hi Jianfeng,

> -Original Message-
> From: Tan, Jianfeng
> Sent: Monday, January 11, 2016 7:39 AM
> To: Ananyev, Konstantin; N?lio Laranjeiro; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if packet 
> type is set
> 
> Hi,
> 
> According to the proposal, I'm going to fix the definition of this API
> as below:
> /**
>   * Retrieve the contextual information of an Ethernet device.
>   *
>   * @param port_id
>   *   The port identifier of the Ethernet device.
>   * @param ptype_mask
>   *   A hint of what kind of packet type which the caller is interested in
>   * @param ptypes
>   *   An array of packet types to be filled with
>   * @param num
>   * Size of ptypes[]
>   * @return
>   *   - (>=0) if successful. Indicate number of valid values in ptypes
> array.
>   *   - (-ENOTSUP) if hardware-assisted VLAN stripping not configured.
>   *   - (-ENODEV) if *port_id* invalid.
>   */
> extern int rte_eth_dev_get_ptype_info(uint8_t port_id,
>  uint32_t ptype_mask, uint32_t ptypes[],
> uint32_t num);
> 
> Unresolved issues:
> 1) When num is exceeded, we just stop there and return num, or return
> -ENOMEM?

I think when num is exceeded it should return number of entries enough to
return all requested packet types.
Same as snprintf() does when it has to truncate the output buffer.

> The first way has a bug when: what app is exactly asking for is not
> filled in ptypes[] because of
> exceeding num, but app believes this API returns with success.

It is a caller responsibility to check the return value and handle it properly.
When return value  exceeds num - caller can  resize ptypes[] and call 
get_ptype_info() again.

> 
> 2) if RTE_PTYPE_*_MAX_NUM macros necessary? Without them, we could calculate
> num through 2^(number of bit 1 in RTE_PTPE_*_MASK).

I don't think caller has to guess somehow what number of entries in ptypes[] he 
need .
He can retrieve that information from get_ptype_info() itself.
Something like that for example:

num = rte_eth_dev_get_ptype_info(port, UINT32_MAX, NULL, 0);
if (num < 0) return num;
ptypes = alloca(num * sizeof(ptypes[0]);
ret = rte_eth_dev_get_ptype_info(port, UINT32_MAX, ptypes, num);
if (ret != num) return -1;


Konstantin

> 
> Thanks,
> Jianfeng


[dpdk-dev] [PATCH 3/4] virtio/vdev: add ways to interact with vhost

2016-01-11 Thread Pavel Fedin
 Hello!

 Please, see inline

> -Original Message-
> From: Jianfeng Tan [mailto:jianfeng.tan at intel.com]
> Sent: Sunday, January 10, 2016 2:43 PM
> To: dev at dpdk.org
> Cc: rich.lane at bigswitch.com; yuanhan.liu at linux.intel.com; mst at 
> redhat.com;
> nakajima.yoshihiro at lab.ntt.co.jp; huawei.xie at intel.com; mukawa at 
> igel.co.jp;
> p.fedin at samsung.com; michael.qiu at intel.com; ann.zhuangyanying at 
> huawei.com; Jianfeng Tan
> Subject: [PATCH 3/4] virtio/vdev: add ways to interact with vhost
> 
> Depends on the type of vhost file: vhost-user is used if the given
> path points to a unix socket; vhost-net is used if the given path
> points to a char device.
> 
> NOTE: we now keep CONFIG_RTE_VIRTIO_VDEV undefined by default, need
> to be uncommented when in use.
> 
> Signed-off-by: Huawei Xie 
> Signed-off-by: Jianfeng Tan 
> ---
>  config/common_linuxapp |   5 +
>  drivers/net/virtio/Makefile|   4 +
>  drivers/net/virtio/vhost.c | 734 
> +
>  drivers/net/virtio/vhost.h | 192 ++
>  drivers/net/virtio/virtio_ethdev.h |   5 +-
>  drivers/net/virtio/virtio_pci.h|  52 ++-
>  6 files changed, 990 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/net/virtio/vhost.c
>  create mode 100644 drivers/net/virtio/vhost.h
> 
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 74bc515..f76e162 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -534,3 +534,8 @@ CONFIG_RTE_APP_TEST=y
>  CONFIG_RTE_TEST_PMD=y
>  CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
>  CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
> +
> +#
> +# Enable virtio support for container
> +#
> +CONFIG_RTE_VIRTIO_VDEV=y
> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
> index 43835ba..0877023 100644
> --- a/drivers/net/virtio/Makefile
> +++ b/drivers/net/virtio/Makefile
> @@ -52,6 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
>  SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
>  SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
> 
> +ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
> + SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost.c
> +endif
> +
>  # this lib depends upon:
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
> diff --git a/drivers/net/virtio/vhost.c b/drivers/net/virtio/vhost.c
> new file mode 100644
> index 000..e423e02
> --- /dev/null
> +++ b/drivers/net/virtio/vhost.c
> @@ -0,0 +1,734 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + *   notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + *   notice, this list of conditions and the following disclaimer in
> + *   the documentation and/or other materials provided with the
> + *   distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + *   contributors may be used to endorse or promote products derived
> + *   from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +
> +#include "virtio_pci.h"
> +#include "virtio_logs.h"
> +#include "virtio_ethdev.h"
> +#include "virtqueue.h"
> +#include "vhost.h"
> +
> +static int
> +vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
> +{
> + struct msghdr msgh;
> + struct iovec iov;
> + int r;
> +
> + size_t fd_size = fd_num * sizeof(int);
> + cha

[dpdk-dev] [PATCH] pmd/virtio: fix cannot start virtio dev after stop

2016-01-11 Thread Pavel Fedin
 Hello!

 I tried to apply your patch to master and got compile errors. See inline.

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Jianfeng Tan
> Sent: Tuesday, January 05, 2016 4:08 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH] pmd/virtio: fix cannot start virtio dev after stop
> 
> Fix the issue that virtio device cannot be started after stopped.
> 
> The field, hw->started, should be changed by virtio_dev_start/stop instead
> of virtio_dev_close.
> 
> Signed-off-by: Jianfeng Tan 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c 
> b/drivers/net/virtio/virtio_ethdev.c
> index d928339..07fe271 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -490,11 +490,13 @@ virtio_dev_close(struct rte_eth_dev *dev)
> 
>   PMD_INIT_LOG(DEBUG, "virtio_dev_close");
> 
> + if (hw->started == 1)
> + virtio_dev_stop(eth_dev);
> +

 'dev', but not 'eth_dev' here.

>   /* reset the NIC */
>   if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
>   vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
>   vtpci_reset(hw);
> - hw->started = 0;
>   virtio_dev_free_mbufs(dev);
>   virtio_free_queues(dev);
>  }
> @@ -1408,10 +1410,9 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
>   if (rte_eal_process_type() == RTE_PROC_SECONDARY)
>   return -EPERM;
> 
> - if (hw->started == 1) {
> - virtio_dev_stop(eth_dev);
> - virtio_dev_close(eth_dev);
> - }
> + /* Close it anyway since there's no way to know if closed */
> + virtio_dev_close(eth_dev);
> +
>   pci_dev = eth_dev->pci_dev;
> 
>   eth_dev->dev_ops = NULL;
> @@ -1615,6 +1616,8 @@ virtio_dev_stop(struct rte_eth_dev *dev)
> 
>   PMD_INIT_LOG(DEBUG, "stop");
> 
> + hw->started = 0;
> +

 'hw' is not declared in this function, you have to add it.

>   if (dev->data->dev_conf.intr_conf.lsc)
>   rte_intr_disable(&dev->pci_dev->intr_handle);
> 
> --
> 2.1.4

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia




[dpdk-dev] [PATCH 2/4] mem: add API to obstain memory-backed file info

2016-01-11 Thread Pavel Fedin
 Hello!

> -Original Message-
> From: Jianfeng Tan [mailto:jianfeng.tan at intel.com]
> Sent: Sunday, January 10, 2016 2:43 PM
> To: dev at dpdk.org
> Cc: rich.lane at bigswitch.com; yuanhan.liu at linux.intel.com; mst at 
> redhat.com;
> nakajima.yoshihiro at lab.ntt.co.jp; huawei.xie at intel.com; mukawa at 
> igel.co.jp;
> p.fedin at samsung.com; michael.qiu at intel.com; ann.zhuangyanying at 
> huawei.com; Jianfeng Tan
> Subject: [PATCH 2/4] mem: add API to obstain memory-backed file info

 "obtain" - typo in subject

> 
> A new API named rte_eal_get_backfile_info() and a new data
> struct back_file is added to obstain information of memory-
> backed file info.
> 
> Signed-off-by: Huawei Xie 
> Signed-off-by: Jianfeng Tan 
> ---
>  lib/librte_eal/common/include/rte_memory.h | 16 +
>  lib/librte_eal/linuxapp/eal/eal_memory.c   | 37 
> ++
>  2 files changed, 53 insertions(+)
> 
> diff --git a/lib/librte_eal/common/include/rte_memory.h
> b/lib/librte_eal/common/include/rte_memory.h
> index 9c9e40f..75ef8db 100644
> --- a/lib/librte_eal/common/include/rte_memory.h
> +++ b/lib/librte_eal/common/include/rte_memory.h
> @@ -109,6 +109,22 @@ struct rte_memseg {
>  } __rte_packed;
> 
>  /**
> + * This struct is used to store information about memory-backed file that
> + * we mapped in memory initialization.
> + */
> +struct back_file {
> + void *addr; /**< virtual addr */
> + size_t size;/**< the page size */
> + char filepath[PATH_MAX]; /**< path to backing file on filesystem */
> +};
> +
> +/**
> +  * Get the hugepage file information. Caller to free.
> +  * Return number of hugepage files used.
> +  */
> +int rte_eal_get_backfile_info(struct back_file **);
> +
> +/**
>   * Lock page in physical memory and prevent from swapping.
>   *
>   * @param virt
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c
> b/lib/librte_eal/linuxapp/eal/eal_memory.c
> index 2bb1163..6ca1404 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> @@ -758,6 +758,9 @@ sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct 
> hugepage_info
> *hpi)
>   return 0;
>  }
> 
> +static struct hugepage_file *hugepage_files;
> +static int num_hugepage_files;
> +
>  /*
>   * Uses mmap to create a shared memory area for storage of data
>   * Used in this file to store the hugepage file map on disk
> @@ -776,9 +779,29 @@ create_shared_memory(const char *filename, const size_t 
> mem_size)
>   retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 
> 0);
>   close(fd);
> 
> + hugepage_files = retval;
> + num_hugepage_files = mem_size / (sizeof(struct hugepage_file));
> +
>   return retval;
>  }
> 
> +int
> +rte_eal_get_backfile_info(struct back_file **p)
> +{
> + struct back_file *backfiles;
> + int i, num_backfiles = num_hugepage_files;
> +
> + backfiles = malloc(sizeof(struct back_file) * num_backfiles);
> + for (i = 0; i < num_backfiles; ++i) {
> + backfiles[i].addr = hugepage_files[i].final_va;
> + backfiles[i].size = hugepage_files[i].size;
> + strcpy(backfiles[i].filepath, hugepage_files[i].filepath);
> + }
> +
> + *p = backfiles;
> + return num_backfiles;
> +}
> +
>  /*
>   * this copies *active* hugepages from one hugepage table to another.
>   * destination is typically the shared memory.
> @@ -1157,6 +1180,20 @@ rte_eal_hugepage_init(void)
>   mcfg->memseg[0].len = internal_config.memory;
>   mcfg->memseg[0].socket_id = socket_id;
> 
> + hugepage = create_shared_memory(eal_hugepage_info_path(),
> + sizeof(struct hugepage_file));
> + hugepage->orig_va = addr;
> + hugepage->final_va = addr;
> + hugepage->physaddr = rte_mem_virt2phy(addr);
> + hugepage->size = pagesize;
> + hugepage->socket_id = socket_id;
> + hugepage->file_id = 0;
> + hugepage->memseg_id = 0;
> +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
> + hugepage->repeated = internal_config.memory / pagesize;
> +#endif
> + strncpy(hugepage->filepath, filepath, MAX_HUGEPAGE_PATH);
> +
>   close(fd);
> 
>   return 0;
> --
> 2.1.4

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia



[dpdk-dev] [PATCH v2] pmd/virtio: fix cannot start virtio dev after stop

2016-01-11 Thread Jianfeng Tan
v2 changes:
- Address compiling error.
- Add Reported-by.

Fix the issue that virtio device cannot be started after stopped.

The field, hw->started, should be changed by virtio_dev_start/stop instead
of virtio_dev_close.

Reported-by: Pavel Fedin 
Signed-off-by: Jianfeng Tan 
Acked-by: Yuanhan Liu 

---
 drivers/net/virtio/virtio_ethdev.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..5bdd305 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -490,11 +490,13 @@ virtio_dev_close(struct rte_eth_dev *dev)

PMD_INIT_LOG(DEBUG, "virtio_dev_close");

+   if (hw->started == 1)
+   virtio_dev_stop(dev);
+
/* reset the NIC */
if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
vtpci_reset(hw);
-   hw->started = 0;
virtio_dev_free_mbufs(dev);
virtio_free_queues(dev);
 }
@@ -1408,10 +1410,9 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
return -EPERM;

-   if (hw->started == 1) {
-   virtio_dev_stop(eth_dev);
-   virtio_dev_close(eth_dev);
-   }
+   /* Close it anyway since there's no way to know if closed */
+   virtio_dev_close(eth_dev);
+
pci_dev = eth_dev->pci_dev;

eth_dev->dev_ops = NULL;
@@ -1612,9 +1613,12 @@ static void
 virtio_dev_stop(struct rte_eth_dev *dev)
 {
struct rte_eth_link link;
+   struct virtio_hw *hw = dev->data->dev_private;

PMD_INIT_LOG(DEBUG, "stop");

+   hw->started = 0;
+
if (dev->data->dev_conf.intr_conf.lsc)
rte_intr_disable(&dev->pci_dev->intr_handle);

-- 
2.1.4



[dpdk-dev] [RFC v2 0/2] ethdev: Enhancements to flow director filter

2016-01-11 Thread Rahul Lakkireddy
Hi All,

On Wednesday, December 12/23/15, 2015 at 18:11:19 +0530, Rahul Lakkireddy wrote:
> This RFC series of patches attempt to extend the flow director filter to
> add support for Chelsio T5 hardware filtering capabilities.
> 
> Chelsio T5 supports carrying out filtering in hardware which supports 3
> actions to carry out on a packet which hit a filter viz.
> 
> 1. Action Pass - Packets hitting a filter rule can be directed to a
>particular RXQ.
> 
> 2. Action Drop - Packets hitting a filter rule are dropped in h/w.
> 
> 3. Action Switch - Packets hitting a filter rule can be switched in h/w
>from one port to another, without involvement of host.  Also, the
>action Switch also supports rewrite of src-mac/dst-mac headers as
>well as rewrite of vlan headers.  It also supports rewrite of IP
>headers and thereby, supports NAT (Network Address Translation)
>in h/w.
> 
> Also, each filter rule can optionally support specifying a mask value
> i.e. it's possible to create a filter rule for an entire subnet of IP
> addresses or a range of tcp/udp ports, etc.
> 
> Patch 1 does the following:
> - Adds an additional flow rte_eth_pkt_filter_flow which encapsulates
>   ingress ports, l2 payload, vlan and ntuples.
> - Adds an additional mask for the flow to allow range of values to be
>   matched.
> - Adds an ability to set both filters with masks (Maskfull) and
>   without masks (Maskless).  Also allow prioritizing one of these
>   filter types over the other when a packet matches several types.
> - Adds a new behavior 'switch'.
> - Adds behavior arguments that can be passed when a particular behavior
>   is taken.  For ex: in case of action 'switch', pass additional 4-tuple
>   to allow rewriting src/dst ip and port addresses to support NAT'ing.
> 
> Patch 2 shows testpmd command line example to support packet filter
> flow.
> 
> The patch series has been compile tested on all x86 gcc targets and the
> current fdir filter supported drivers seem to return appropriate error
> codes when this new flow type and the new action are not supported and
> hence are not affected.
> 
> Posting this series mainly for discussion on API change. Once this is
> agreeable then, I will post the cxgbe PMD changes to use the new API.
> 
> ---
> v2:
> 1. Added ttl to rte_eth_ipv4_flow and tc, flow_label, next_header,
>and hop_limit to rte_eth_ipv6_flow.
> 
> 2. Added new field type to rte_eth_pkt_filter_flow to differentiate
>between maskfull and maskless filter types.
> 
> 3. Added new field prio to rte_eth_pkt_filter_flow to allow setting
>priority over maskfull or maskless when packet matches multiple
>filter types.
> 
> 4. Added new behavior sub op RTE_FDIR_BEHAVIOR_SUB_OP_SWAP to allow
>swapping fields in matched flows. For ex, useful when swapping mac
>addresses in hardware before switching.
> 
> 5. Updated the testpmd example to reflect the above new changes.
> 
> 6. Dropped Patch 3 since the ABI announcement has already been merged.
> 
> Rahul Lakkireddy (2):
>   ethdev: add packet filter flow and new behavior switch to fdir
>   testpmd: add an example to show packet filter flow
> 
>  app/test-pmd/cmdline.c  | 528 
> +++-
>  lib/librte_ether/rte_eth_ctrl.h | 127 +-
>  2 files changed, 646 insertions(+), 9 deletions(-)
> 
> -- 
> 2.5.3
> 

Any comments on this RFC series?  If the overall approach is fine then,
I'll re-submit it as a PATCH series along with the CXGBE PMD driver
changes.

Thanks,
Rahul


[dpdk-dev] [PATCH 3/4] virtio/vdev: add ways to interact with vhost

2016-01-11 Thread Pavel Fedin
 Hello! There's one more problem (see inline).

> -Original Message-
> From: Jianfeng Tan [mailto:jianfeng.tan at intel.com]
> Sent: Sunday, January 10, 2016 2:43 PM
> To: dev at dpdk.org
> Cc: rich.lane at bigswitch.com; yuanhan.liu at linux.intel.com; mst at 
> redhat.com;
> nakajima.yoshihiro at lab.ntt.co.jp; huawei.xie at intel.com; mukawa at 
> igel.co.jp;
> p.fedin at samsung.com; michael.qiu at intel.com; ann.zhuangyanying at 
> huawei.com; Jianfeng Tan
> Subject: [PATCH 3/4] virtio/vdev: add ways to interact with vhost
> 
> Depends on the type of vhost file: vhost-user is used if the given
> path points to a unix socket; vhost-net is used if the given path
> points to a char device.
> 
> NOTE: we now keep CONFIG_RTE_VIRTIO_VDEV undefined by default, need
> to be uncommented when in use.
> 
> Signed-off-by: Huawei Xie 
> Signed-off-by: Jianfeng Tan 
> ---
>  config/common_linuxapp |   5 +
>  drivers/net/virtio/Makefile|   4 +
>  drivers/net/virtio/vhost.c | 734 
> +
>  drivers/net/virtio/vhost.h | 192 ++
>  drivers/net/virtio/virtio_ethdev.h |   5 +-
>  drivers/net/virtio/virtio_pci.h|  52 ++-
>  6 files changed, 990 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/net/virtio/vhost.c
>  create mode 100644 drivers/net/virtio/vhost.h
> 
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 74bc515..f76e162 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -534,3 +534,8 @@ CONFIG_RTE_APP_TEST=y
>  CONFIG_RTE_TEST_PMD=y
>  CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
>  CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
> +
> +#
> +# Enable virtio support for container
> +#
> +CONFIG_RTE_VIRTIO_VDEV=y
> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
> index 43835ba..0877023 100644
> --- a/drivers/net/virtio/Makefile
> +++ b/drivers/net/virtio/Makefile
> @@ -52,6 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
>  SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
>  SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
> 
> +ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
> + SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost.c
> +endif
> +
>  # this lib depends upon:
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
> diff --git a/drivers/net/virtio/vhost.c b/drivers/net/virtio/vhost.c
> new file mode 100644
> index 000..e423e02
> --- /dev/null
> +++ b/drivers/net/virtio/vhost.c
> @@ -0,0 +1,734 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + *   notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + *   notice, this list of conditions and the following disclaimer in
> + *   the documentation and/or other materials provided with the
> + *   distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + *   contributors may be used to endorse or promote products derived
> + *   from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +
> +#include "virtio_pci.h"
> +#include "virtio_logs.h"
> +#include "virtio_ethdev.h"
> +#include "virtqueue.h"
> +#include "vhost.h"
> +
> +static int
> +vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
> +{
> + struct msghdr msgh;
> + struct iovec iov;
> + int r;
> +
> + size_t fd_size = fd_num * sizeof

[dpdk-dev] [PATCH 0/4] virtio support for container

2016-01-11 Thread Pavel Fedin
 Hello!


> This patchset is to provide high performance networking interface (virtio)
> for container-based DPDK applications. The way of starting DPDK apps in
> containers with ownership of NIC devices exclusively is beyond the scope.
> The basic idea here is to present a new virtual device (named eth_cvio),
> which can be discovered and initialized in container-based DPDK apps using
> rte_eal_init(). To minimize the change, we reuse already-existing virtio
> frontend driver code (driver/net/virtio/).

 With the aforementioned fixes i tried to run it inside libvirt-lxc. I got the 
following:
a) With hugepages - "abort with 256 hugepage files exceed the maximum of 8 for 
vhost-user" - i set -m 512
b) With --single-file - ovs runs, but doesn't get any packets at all. When i 
try to ping the container from within host side, it
counts drops on vhost-user port.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia




[dpdk-dev] [PATCH v2] pmd/virtio: fix cannot start virtio dev after stop

2016-01-11 Thread Pavel Fedin
 Tested-by: Pavel Fedin 

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia


> -Original Message-
> From: Jianfeng Tan [mailto:jianfeng.tan at intel.com]
> Sent: Monday, January 11, 2016 9:16 AM
> To: dev at dpdk.org
> Cc: p.fedin at samsung.com; yuanhan.liu at linux.intel.com; Jianfeng Tan
> Subject: [PATCH v2] pmd/virtio: fix cannot start virtio dev after stop
> 
> v2 changes:
> - Address compiling error.
> - Add Reported-by.
> 
> Fix the issue that virtio device cannot be started after stopped.
> 
> The field, hw->started, should be changed by virtio_dev_start/stop instead
> of virtio_dev_close.
> 
> Reported-by: Pavel Fedin 
> Signed-off-by: Jianfeng Tan 
> Acked-by: Yuanhan Liu 
> 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 14 +-
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c 
> b/drivers/net/virtio/virtio_ethdev.c
> index d928339..5bdd305 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -490,11 +490,13 @@ virtio_dev_close(struct rte_eth_dev *dev)
> 
>   PMD_INIT_LOG(DEBUG, "virtio_dev_close");
> 
> + if (hw->started == 1)
> + virtio_dev_stop(dev);
> +
>   /* reset the NIC */
>   if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
>   vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
>   vtpci_reset(hw);
> - hw->started = 0;
>   virtio_dev_free_mbufs(dev);
>   virtio_free_queues(dev);
>  }
> @@ -1408,10 +1410,9 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
>   if (rte_eal_process_type() == RTE_PROC_SECONDARY)
>   return -EPERM;
> 
> - if (hw->started == 1) {
> - virtio_dev_stop(eth_dev);
> - virtio_dev_close(eth_dev);
> - }
> + /* Close it anyway since there's no way to know if closed */
> + virtio_dev_close(eth_dev);
> +
>   pci_dev = eth_dev->pci_dev;
> 
>   eth_dev->dev_ops = NULL;
> @@ -1612,9 +1613,12 @@ static void
>  virtio_dev_stop(struct rte_eth_dev *dev)
>  {
>   struct rte_eth_link link;
> + struct virtio_hw *hw = dev->data->dev_private;
> 
>   PMD_INIT_LOG(DEBUG, "stop");
> 
> + hw->started = 0;
> +
>   if (dev->data->dev_conf.intr_conf.lsc)
>   rte_intr_disable(&dev->pci_dev->intr_handle);
> 
> --
> 2.1.4




[dpdk-dev] [PATCH 0/4] virtio support for container

2016-01-11 Thread Tan, Jianfeng


Hi Fedin,
>   With the aforementioned fixes i tried to run it inside libvirt-lxc. I got 
> the following:
> a) With hugepages - "abort with 256 hugepage files exceed the maximum of 8 
> for vhost-user" - i set -m 512
This is currently a known issue, we have discussed in another thread 
with Tetsuya.
> b) With --single-file - ovs runs, but doesn't get any packets at all. When i 
> try to ping the container from within host side, it
> counts drops on vhost-user port.
Can you check the OVS in host side, if it prints out message of "virtio 
is now ready for processing"?

Thanks,
Jianfeng

>
> Kind regards,
> Pavel Fedin
> Expert Engineer
> Samsung Electronics Research center Russia
>
>



[dpdk-dev] [PATCH 00/14] Step towards PCI independency

2016-01-11 Thread Jan Viktorin
Hello David,

did you find time to see the patchset? I am working on a PMD on top of
these so I'd be glad to base on the code close to the (potentially)
upstreamed one.

Regards
Jan

On Tue, 5 Jan 2016 10:45:11 +0100
David Marchand  wrote:

> Hello Jan,
> 
> I agree that ethdev / cryptodev should really have no idea of what device
> type is underneath.
> I was thinking of a slightly difference approach, but yours looks fine.
> 
> I must find time to have a closer look at your patchset, asap.
> 
> 


[dpdk-dev] [PATCH] doc: add a further ACL example

2016-01-11 Thread Antonio Fischetti
Add a further ACL example where the elements of the search key
are not entirely fitting into the 4 consecutive bytes of all
input fields.

Signed-off-by: Antonio Fischetti 
---
 .../prog_guide/packet_classif_access_ctrl.rst  | 68 ++
 1 file changed, 68 insertions(+)
 mode change 100644 => 100755 
doc/guides/prog_guide/packet_classif_access_ctrl.rst

diff --git a/doc/guides/prog_guide/packet_classif_access_ctrl.rst 
b/doc/guides/prog_guide/packet_classif_access_ctrl.rst
old mode 100644
new mode 100755
index a9a5815..5fd3d34
--- a/doc/guides/prog_guide/packet_classif_access_ctrl.rst
+++ b/doc/guides/prog_guide/packet_classif_access_ctrl.rst
@@ -246,6 +246,74 @@ A typical example of such an IPv6 2-tuple rule is a 
follows:
 Any IPv6 packets with protocol ID 6 (TCP), and source address inside the range
 [2001:db8:1234::::: - 
2001:db8:1234:::::] matches the above rule.

+In the following example the last element of the search key is 8-bit long.
+So it is a case where the 4 consecutive bytes of an input field are not fully 
occupied.
+The structure for the classification is:
+
+.. code-block:: c
+
+struct acl_key {
+uint8_t ip_proto;
+uint32_t ip_src;
+uint32_t ip_dst;
+uint8_t tos;  /*< This is partially using a 32-bit input element */
+};
+
+The following array of field definitions can be used:
+
+.. code-block:: c
+
+struct rte_acl_field_def ipv4_defs[4] = {
+/* first input field - always one byte long. */
+{
+.type = RTE_ACL_FIELD_TYPE_BITMASK,
+.size = sizeof (uint8_t),
+.field_index = 0,
+.input_index = 0,
+.offset = offsetof (struct acl_key, ip_proto),
+},
+
+/* next input field (IPv4 source address) - 4 consecutive bytes. */
+{
+.type = RTE_ACL_FIELD_TYPE_MASK,
+.size = sizeof (uint32_t),
+.field_index = 1,
+.input_index = 1,
+   .offset = offsetof (struct acl_key, ip_src),
+},
+
+/* next input field (IPv4 destination address) - 4 consecutive bytes. 
*/
+{
+.type = RTE_ACL_FIELD_TYPE_MASK,
+.size = sizeof (uint32_t),
+.field_index = 2,
+.input_index = 2,
+   .offset = offsetof (struct acl_key, ip_dst),
+},
+
+/*
+ * Next element of search key (Type of Service) is indeed 1 byte long.
+ * Anyway we need to allocate all the 4 consecutive bytes for it.
+ */
+{
+.type = RTE_ACL_FIELD_TYPE_BITMASK,
+.size = sizeof (uint32_t), /* All the 4 consecutive bytes are 
allocated */
+.field_index = 3,
+.input_index = 3,
+.offset = offsetof (struct acl_key, tos),
+},
+};
+
+A typical example of such an IPv4 4-tuple rule is as follows:
+
+::
+
+source addr/mask  destination addr/mask  tos/mask protocol/mask
+192.168.1.0/24192.168.2.31/321/0xff   6/0xff
+
+Any IPv4 packets with protocol ID 6 (TCP), source address 192.168.1.[0-255], 
destination address 192.168.2.31,
+ToS 1 matches the above rule.
+
 When creating a set of rules, for each rule, additional information must be 
supplied also:

 *   **priority**: A weight to measure the priority of the rules (higher is 
better).
-- 
1.9.3



[dpdk-dev] VF in the bnx2x driver broken on 57810

2016-01-11 Thread Chas Williams
Interestingly enough, VF works for a 57800 card but doesn't seem to work
for a 57810 card. ?To the best of my knowledge, these cards are
practically the same, i.e. E3's with rev B0. ?Are there any bnx2x hw
guys that could tell me why these cards might be behaving differently?

The 57810 brokenness is isolated to the receive path. ?It seems to
transmit fine. ?Of course, the linux driver works just fine so it isn't
something specific with the hypervisor configuration.


[dpdk-dev] [PATCH 2/4] mem: add API to obstain memory-backed file info

2016-01-11 Thread Rich Lane
On Sun, Jan 10, 2016 at 3:43 AM, Jianfeng Tan 
wrote:

> @@ -1157,6 +1180,20 @@ rte_eal_hugepage_init(void)
> mcfg->memseg[0].len = internal_config.memory;
> mcfg->memseg[0].socket_id = socket_id;
>
> +   hugepage = create_shared_memory(eal_hugepage_info_path(),
> +   sizeof(struct hugepage_file));
> +   hugepage->orig_va = addr;
> +   hugepage->final_va = addr;
> +   hugepage->physaddr = rte_mem_virt2phy(addr);
> +   hugepage->size = pagesize;
>

Should this be "hugepage->size = internal_config.memory"? Otherwise the
vhost-user
memtable entry has a size of only 2MB.