[dpdk-dev] [PATCH v2 2/2] net/ixgbe: add mac type check for all filters

2017-03-08 Thread Wei Zhao
All kinds of filter need to hardware mac type check
to make sure the hardware support that type of fliter.
If not, it may cause serious issue.

Fixes: 11777435c727 ("net/ixgbe: parse flow director filter")
Fixes: 672be56d76a2 ("net/ixgbe: parse n-tuple filter")
Fixes: eb3539fc8550 ("net/ixgbe: parse ethertype filter")
Fixes: 429f6ebb42cc ("net/ixgbe: parse TCP SYN filter")

v2:
 delete useless function declaration
---
 drivers/net/ixgbe/ixgbe_flow.c | 76 ++
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index 48a06a6..e2ba9c2 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -420,13 +420,17 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
 
 /* a specific function for ixgbe because the flags is specific */
 static int
-ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_ntuple_filter(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
  const struct rte_flow_item pattern[],
  const struct rte_flow_action actions[],
  struct rte_eth_ntuple_filter *filter,
  struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   MAC_TYPE_FILTER_SUP_EXT(hw->mac.type);
 
ret = cons_parse_ntuple_filter(attr, pattern, actions, filter, error);
 
@@ -668,13 +672,17 @@ cons_parse_ethertype_filter(const struct rte_flow_attr 
*attr,
 }
 
 static int
-ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_ethertype_filter(struct rte_eth_dev *dev,
+const struct rte_flow_attr *attr,
 const struct rte_flow_item pattern[],
 const struct rte_flow_action actions[],
 struct rte_eth_ethertype_filter *filter,
 struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   MAC_TYPE_FILTER_SUP(hw->mac.type);
 
ret = cons_parse_ethertype_filter(attr, pattern,
actions, filter, error);
@@ -963,13 +971,17 @@ cons_parse_syn_filter(const struct rte_flow_attr *attr,
 }
 
 static int
-ixgbe_parse_syn_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_syn_filter(struct rte_eth_dev *dev,
+const struct rte_flow_attr *attr,
 const struct rte_flow_item pattern[],
 const struct rte_flow_action actions[],
 struct rte_eth_syn_filter *filter,
 struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   MAC_TYPE_FILTER_SUP(hw->mac.type);
 
ret = cons_parse_syn_filter(attr, pattern,
actions, filter, error);
@@ -1152,7 +1164,7 @@ cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
 }
 
 static int
-ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev,
+ixgbe_parse_l2_tn_filter(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
@@ -2306,46 +2318,37 @@ ixgbe_parse_fdir_filter_tunnel(const struct 
rte_flow_attr *attr,
 }
 
 static int
-ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_fdir_filter(struct rte_eth_dev *dev,
+   const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
struct ixgbe_fdir_rule *rule,
struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode;
+
+   if (hw->mac.type != ixgbe_mac_82599EB &&
+   hw->mac.type != ixgbe_mac_X540 &&
+   hw->mac.type != ixgbe_mac_X550 &&
+   hw->mac.type != ixgbe_mac_X550EM_x &&
+   hw->mac.type != ixgbe_mac_X550EM_a)
+   return -ENOTSUP;
 
ret = ixgbe_parse_fdir_filter_normal(attr, pattern,
actions, rule, error);
 
if (!ret)
-   return 0;
+   goto step_next;
 
ret = ixgbe_parse_fdir_filter_tunnel(attr, pattern,
actions, rule, error);
 
-   return ret;
-}
-
-static int
-ixgbe_validate_fdir_filter(struct rte_eth_dev *dev,
-   const struct rte_flow_attr *attr,
-

[dpdk-dev] [PATCH v2 1/2] net/ixgbe: move ixgbe 2 mac type check macro

2017-03-08 Thread Wei Zhao
move ixgbe 2 mac type check macro to ixgbe_ethdev.h in
order to be used by filter parser functions in file
ixgbe_flow.c.

Fixes: 6c52c126f27a ("ixgbe: move to drivers/net/")

Signed-off-by: Wei Zhao 
Signed-off-by: Wenzhuo Lu 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 12 
 drivers/net/ixgbe/ixgbe_ethdev.h | 12 
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 2e497a8..e57a427 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -5951,13 +5951,6 @@ ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev, 
struct ether_addr *addr)
hw->mac.ops.set_rar(hw, 0, (void *)addr, 0, 0);
 }
 
-#define MAC_TYPE_FILTER_SUP(type)do {\
-   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540 &&\
-   (type) != ixgbe_mac_X550 && (type) != ixgbe_mac_X550EM_x &&\
-   (type) != ixgbe_mac_X550EM_a)\
-   return -ENOTSUP;\
-} while (0)
-
 int
 ixgbe_syn_filter_set(struct rte_eth_dev *dev,
struct rte_eth_syn_filter *filter,
@@ -6225,11 +6218,6 @@ ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t 
mtu)
return 0;
 }
 
-#define MAC_TYPE_FILTER_SUP_EXT(type)do {\
-   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540)\
-   return -ENOTSUP;\
-} while (0)
-
 static inline struct ixgbe_5tuple_filter *
 ixgbe_5tuple_filter_lookup(struct ixgbe_5tuple_filter_list *filter_list,
struct ixgbe_5tuple_filter_info *key)
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index c13b10e..058ad87 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -139,6 +139,18 @@
 #define IXGBE_MAX_FDIR_FILTER_NUM   (1024 * 32)
 #define IXGBE_MAX_L2_TN_FILTER_NUM  128
 
+#define MAC_TYPE_FILTER_SUP_EXT(type)do {\
+   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540)\
+   return -ENOTSUP;\
+} while (0)
+
+#define MAC_TYPE_FILTER_SUP(type)do {\
+   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540 &&\
+   (type) != ixgbe_mac_X550 && (type) != ixgbe_mac_X550EM_x &&\
+   (type) != ixgbe_mac_X550EM_a)\
+   return -ENOTSUP;\
+} while (0)
+
 /*
  * Information about the fdir mode.
  */
-- 
2.9.3



[dpdk-dev] [PATCH v2 2/2] net/ixgbe: add mac type check for all filters

2017-03-08 Thread Wei Zhao
All kinds of filter need to hardware mac type check
to make sure the hardware support that type of fliter.
If not, it may cause serious issue.

v2:
 delete useless function declaration

Fixes: 11777435c727 ("net/ixgbe: parse flow director filter")
Fixes: 672be56d76a2 ("net/ixgbe: parse n-tuple filter")
Fixes: eb3539fc8550 ("net/ixgbe: parse ethertype filter")
Fixes: 429f6ebb42cc ("net/ixgbe: parse TCP SYN filter")

Signed-off-by: Wei Zhao 
Signed-off-by: Wenzhuo Lu 

---
 drivers/net/ixgbe/ixgbe_flow.c | 76 ++
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index 48a06a6..e2ba9c2 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -420,13 +420,17 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
 
 /* a specific function for ixgbe because the flags is specific */
 static int
-ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_ntuple_filter(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
  const struct rte_flow_item pattern[],
  const struct rte_flow_action actions[],
  struct rte_eth_ntuple_filter *filter,
  struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   MAC_TYPE_FILTER_SUP_EXT(hw->mac.type);
 
ret = cons_parse_ntuple_filter(attr, pattern, actions, filter, error);
 
@@ -668,13 +672,17 @@ cons_parse_ethertype_filter(const struct rte_flow_attr 
*attr,
 }
 
 static int
-ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_ethertype_filter(struct rte_eth_dev *dev,
+const struct rte_flow_attr *attr,
 const struct rte_flow_item pattern[],
 const struct rte_flow_action actions[],
 struct rte_eth_ethertype_filter *filter,
 struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   MAC_TYPE_FILTER_SUP(hw->mac.type);
 
ret = cons_parse_ethertype_filter(attr, pattern,
actions, filter, error);
@@ -963,13 +971,17 @@ cons_parse_syn_filter(const struct rte_flow_attr *attr,
 }
 
 static int
-ixgbe_parse_syn_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_syn_filter(struct rte_eth_dev *dev,
+const struct rte_flow_attr *attr,
 const struct rte_flow_item pattern[],
 const struct rte_flow_action actions[],
 struct rte_eth_syn_filter *filter,
 struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   MAC_TYPE_FILTER_SUP(hw->mac.type);
 
ret = cons_parse_syn_filter(attr, pattern,
actions, filter, error);
@@ -1152,7 +1164,7 @@ cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
 }
 
 static int
-ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev,
+ixgbe_parse_l2_tn_filter(struct rte_eth_dev *dev,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
@@ -2306,46 +2318,37 @@ ixgbe_parse_fdir_filter_tunnel(const struct 
rte_flow_attr *attr,
 }
 
 static int
-ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr,
+ixgbe_parse_fdir_filter(struct rte_eth_dev *dev,
+   const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
struct ixgbe_fdir_rule *rule,
struct rte_flow_error *error)
 {
int ret;
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode;
+
+   if (hw->mac.type != ixgbe_mac_82599EB &&
+   hw->mac.type != ixgbe_mac_X540 &&
+   hw->mac.type != ixgbe_mac_X550 &&
+   hw->mac.type != ixgbe_mac_X550EM_x &&
+   hw->mac.type != ixgbe_mac_X550EM_a)
+   return -ENOTSUP;
 
ret = ixgbe_parse_fdir_filter_normal(attr, pattern,
actions, rule, error);
 
if (!ret)
-   return 0;
+   goto step_next;
 
ret = ixgbe_parse_fdir_filter_tunnel(attr, pattern,
actions, rule, error);
 
-   return ret;
-}
-
-static int
-ixgbe_validate_fdir_filter(struct rte_eth_dev *dev,
-

[dpdk-dev] [PATCH v2 1/2] net/ixgbe: move ixgbe 2 mac type check macro

2017-03-08 Thread Wei Zhao
move ixgbe 2 mac type check macro to ixgbe_ethdev.h in
order to be used by filter parser functions in file
ixgbe_flow.c.

Fixes: 6c52c126f27a ("ixgbe: move to drivers/net/")

Signed-off-by: Wei Zhao 
Signed-off-by: Wenzhuo Lu 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 12 
 drivers/net/ixgbe/ixgbe_ethdev.h | 12 
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 2e497a8..e57a427 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -5951,13 +5951,6 @@ ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev, 
struct ether_addr *addr)
hw->mac.ops.set_rar(hw, 0, (void *)addr, 0, 0);
 }
 
-#define MAC_TYPE_FILTER_SUP(type)do {\
-   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540 &&\
-   (type) != ixgbe_mac_X550 && (type) != ixgbe_mac_X550EM_x &&\
-   (type) != ixgbe_mac_X550EM_a)\
-   return -ENOTSUP;\
-} while (0)
-
 int
 ixgbe_syn_filter_set(struct rte_eth_dev *dev,
struct rte_eth_syn_filter *filter,
@@ -6225,11 +6218,6 @@ ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t 
mtu)
return 0;
 }
 
-#define MAC_TYPE_FILTER_SUP_EXT(type)do {\
-   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540)\
-   return -ENOTSUP;\
-} while (0)
-
 static inline struct ixgbe_5tuple_filter *
 ixgbe_5tuple_filter_lookup(struct ixgbe_5tuple_filter_list *filter_list,
struct ixgbe_5tuple_filter_info *key)
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index c13b10e..058ad87 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -139,6 +139,18 @@
 #define IXGBE_MAX_FDIR_FILTER_NUM   (1024 * 32)
 #define IXGBE_MAX_L2_TN_FILTER_NUM  128
 
+#define MAC_TYPE_FILTER_SUP_EXT(type)do {\
+   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540)\
+   return -ENOTSUP;\
+} while (0)
+
+#define MAC_TYPE_FILTER_SUP(type)do {\
+   if ((type) != ixgbe_mac_82599EB && (type) != ixgbe_mac_X540 &&\
+   (type) != ixgbe_mac_X550 && (type) != ixgbe_mac_X550EM_x &&\
+   (type) != ixgbe_mac_X550EM_a)\
+   return -ENOTSUP;\
+} while (0)
+
 /*
  * Information about the fdir mode.
  */
-- 
2.9.3



[dpdk-dev] checksum and vlan insertion seem not working on X520

2017-03-08 Thread Chillance Zen
Hi ,every one

when I was conducting nic offloading exp,I can not make a x520 nic insert
vlan ,nor checksum,
here is my rx/tx capability:
rx offload capability:9f
tx offload capability:203f

with code like this:
while(1){
nr_mbufs=rte_eth_rx_burst(0,queue_id,mbufs,32);
for(idx=0;idxhdr_checksum=0;
mbufs[idx]->l2_len=14;
mbufs[idx]->l3_len=20;
mbufs[idx]->ol_flags=PKT_TX_IP_CKSUM|PKT_TX_IPV4;
#else
mbufs[idx]->vlan_tci=0xef00;
mbufs[idx]->ol_flags=PKT_TX_VLAN_PKT;
#endif
rte_eth_tx_burst(0,queue_id,&mbufs[idx],1);
}
}

does anybody know what;s wrong with my code?
Thanks & regards
Linc


Re: [dpdk-dev] [PATCH 2/2] net/ixgbe: add mac type check for all filters

2017-03-08 Thread Zhao1, Wei
Hi, Ferruh

> -Original Message-
> From: Zhao1, Wei
> Sent: Monday, March 6, 2017 3:44 PM
> To: Yigit, Ferruh ; dev@dpdk.org
> Cc: Lu, Wenzhuo 
> Subject: RE: [dpdk-dev] [PATCH 2/2] net/ixgbe: add mac type check for all
> filters
> 
> Hi, Ferruh
> 
> > -Original Message-
> > From: Yigit, Ferruh
> > Sent: Saturday, February 18, 2017 12:01 AM
> > To: Zhao1, Wei ; dev@dpdk.org
> > Cc: Lu, Wenzhuo 
> > Subject: Re: [dpdk-dev] [PATCH 2/2] net/ixgbe: add mac type check for
> > all filters
> >
> > On 2/13/2017 7:35 AM, Wei Zhao wrote:
> > > All kinds of filter need to hardware mac type check to make sure the
> > > hardware support that type of fliter.
> > > If not, it may cause serious issue.
> > >
> > > Fixes: 11777435c727 ("net/ixgbe: parse flow director filter")
> > > Fixes: 672be56d76a2 ("net/ixgbe: parse n-tuple filter")
> > > Fixes: eb3539fc8550 ("net/ixgbe: parse ethertype filter")
> > > Fixes: 429f6ebb42cc ("net/ixgbe: parse TCP SYN filter")
> > >
> > > Signed-off-by: Wei Zhao 
> > > Signed-off-by: Wenzhuo Lu 
> > > ---
> > >  drivers/net/ixgbe/ixgbe_flow.c | 129
> > > +
> > >  1 file changed, 65 insertions(+), 64 deletions(-)
> > >
> > > diff --git a/drivers/net/ixgbe/ixgbe_flow.c
> > > b/drivers/net/ixgbe/ixgbe_flow.c index 5a634d3..f414fa8 100644
> > > --- a/drivers/net/ixgbe/ixgbe_flow.c
> > > +++ b/drivers/net/ixgbe/ixgbe_flow.c
> > > @@ -84,11 +84,12 @@ cons_parse_ntuple_filter(const struct
> > > rte_flow_attr
> > *attr,
> > >   struct rte_eth_ntuple_filter *filter,
> > >   struct rte_flow_error *error);  static
> int
> > > -ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr,
> > > - const struct rte_flow_item pattern[],
> > > - const struct rte_flow_action actions[],
> > > - struct rte_eth_ntuple_filter *filter,
> > > - struct rte_flow_error *error);
> > > +ixgbe_parse_ntuple_filter(struct rte_eth_dev *dev,
> > > +   const struct rte_flow_attr *attr,
> > > +   const struct rte_flow_item pattern[],
> > > +   const struct rte_flow_action actions[],
> > > +   struct rte_eth_ntuple_filter *filter,
> > > +   struct rte_flow_error *error);
> >
> > Hi Wei,
> >
> > You don't need these function declarations at all. What do you think
> > removing these first, in a separate patch, and won't need to update
> > them here?
> >
> > Also it is possible to remove all function declarations if you move
> > "ixgbe_flow_ops" at the end of the file, that would be something I
> > prefer, but it is your call.
> >
> > Thanks,
> > Ferruh
> 
> Sorry for late reply, I have just find your mail just now.
>  I think your suggestion is very good from code style, but this patch is a fix
> patch set for a P2 issue.
> So, it is not focus on code style. code style can be changed in a separate
> patch if we need to and not mix them together?
> 

I have commit 1 remove patch and new v2 fix patch set as your suggestion.

Thank you.



Re: [dpdk-dev] [PATCHv8 19/46] pool/dpaa2: add DPAA2 hardware offloaded mempool

2017-03-08 Thread Olivier MATZ
Hi Hemant,

On Fri, 3 Mar 2017 18:16:36 +0530, Hemant Agrawal
 wrote:
> Adding NXP DPAA2 architecture specific mempool support.
> 
> This patch also registers a dpaa2 type MEMPOOL OPS
> 
> Signed-off-by: Hemant Agrawal 
> ---
>  MAINTAINERS   |   1 +
>  config/common_base|   5 +
>  config/defconfig_arm64-dpaa2-linuxapp-gcc |   8 +
>  drivers/Makefile  |   1 +
>  drivers/pool/Makefile |  40 +++
>  drivers/pool/dpaa2/Makefile   |  72 ++
>  drivers/pool/dpaa2/dpaa2_hw_mempool.c | 339
> ++
> drivers/pool/dpaa2/dpaa2_hw_mempool.h |  95 
> drivers/pool/dpaa2/rte_pool_dpaa2_version.map |   8 +

I think the current mempool handlers should be moved first in a
separate patch.

I'd prefer drivers/mempool instead of drivers/pool (more precise and
more consistent with librte_mempool).


>
> [...]
>
> +
> +struct dpaa2_bp_info rte_dpaa2_bpid_info[MAX_BPID];
> +static struct dpaa2_bp_list *h_bp_list;
> +
> +static int
> +hw_mbuf_create_pool(struct rte_mempool *mp)

Would it work for something else than mbufs?
The initial approach of the mempool is to work for kind of object. The
specialization in mbuf is done by the mbuf layer.


> +{
> + struct dpaa2_bp_list *bp_list;
> + struct dpaa2_dpbp_dev *avail_dpbp;
> + struct dpbp_attr dpbp_attr;
> + uint32_t bpid;
> + int ret;
> +
> + avail_dpbp = dpaa2_alloc_dpbp_dev();
> +
> + if (!avail_dpbp) {
> + PMD_DRV_LOG(ERR, "DPAA2 resources not available");
> + return -1;
> + }

The other pool handlers return a -errno instead of -1. I think it
should be the same here.

The same comment can applies to other locations/functions.

> [...]
> +
> + /* Set parameters of buffer pool list */
> + bp_list->buf_pool.num_bufs = mp->size;
> + bp_list->buf_pool.size = mp->elt_size
> + - sizeof(struct rte_mbuf) - rte_pktmbuf_priv_size(mp);
> + bp_list->buf_pool.bpid = dpbp_attr.bpid;
> + bp_list->buf_pool.h_bpool_mem = NULL;
> + bp_list->buf_pool.mp = mp;
> + bp_list->buf_pool.dpbp_node = avail_dpbp;
> + bp_list->next = h_bp_list;
> +
> + bpid = dpbp_attr.bpid;
> +
> +
> + rte_dpaa2_bpid_info[bpid].meta_data_size = sizeof(struct rte_mbuf)
> + + rte_pktmbuf_priv_size(mp);

Are the 2 empty lines garbage?


> + rte_dpaa2_bpid_info[bpid].bp_list = bp_list;
> + rte_dpaa2_bpid_info[bpid].bpid = bpid;
> +
> + mp->pool_data = (void *)&rte_dpaa2_bpid_info[bpid];
> +
> + PMD_INIT_LOG(DEBUG, "BP List created for bpid =%d", dpbp_attr.bpid); +
> + h_bp_list = bp_list;
> + /* Identification for our offloaded pool_data structure
> +  */
> + mp->flags |= MEMPOOL_F_HW_PKT_POOL;

I think this flag should be declared in rte_mempool.h,
not in drivers/bus/fslmc/portal/dpaa2_hw_pvt.h.

It should also be documented, what does this flag mean?

> [...]
>
> +static
> +void rte_dpaa2_mbuf_release(struct rte_mempool *pool __rte_unused,
> + void * const *obj_table,
> + uint32_t bpid,
> + uint32_t meta_data_size,
> + int count)


Is there a reason why some functions are prefixed with rte_dpaa2_ and
other but hw_mbuf_?


> +{
> + struct qbman_release_desc releasedesc;
> + struct qbman_swp *swp;
> + int ret;
> + int i, n;
> + uint64_t bufs[DPAA2_MBUF_MAX_ACQ_REL];
> +
> + if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
> + ret = dpaa2_affine_qbman_swp();
> + if (ret != 0) {
> + RTE_LOG(ERR, PMD, "Failed to allocate IO portal");
> + return;
> + }
> + }
> + swp = DPAA2_PER_LCORE_PORTAL;
> +
> + /* Create a release descriptor required for releasing
> +  * buffers into QBMAN
> +  */
> + qbman_release_desc_clear(&releasedesc);
> + qbman_release_desc_set_bpid(&releasedesc, bpid);
> +
> + n = count % DPAA2_MBUF_MAX_ACQ_REL;
> +
> + /* convert mbuf to buffers  for the remainder*/

bad spaces

> + for (i = 0; i < n ; i++)
> + bufs[i] = (uint64_t)obj_table[i] + meta_data_size;
> +
> + /* feed them to bman*/

missing space at the end

> + do {
> + ret = qbman_swp_release(swp, &releasedesc, bufs, n);
> + } while (ret == -EBUSY);
> +
> + /* if there are more buffers to free */
> + while (n < count) {
> + /* convert mbuf to buffers */
> + for (i = 0; i < DPAA2_MBUF_MAX_ACQ_REL; i++)
> + bufs[i] = (uint64_t)obj_table[n + i] + meta_data_size;
> +
> + do {
> + ret = qbman_swp_release(swp, &releasedesc, bufs,
> + DPAA2_MBUF_MAX_ACQ_REL);
> + } while (ret == -EBUSY);

The while in not properly indented

> [.

Re: [dpdk-dev] checksum and vlan insertion seem not working on X520

2017-03-08 Thread Ananyev, Konstantin
Hi Linc,

Wonder what TX function are you using?
Make sure that you are not using simple TX function that doesn’t support any 
offloads or multiseg packets.
You need something like that at setup phase:

rte_eth_dev_info_get(port_id, &dev_info);
dev_info.default_txconf.txq_flags = 0;
...
rte_eth_tx_queue_setup(…, &dev_info.default_txconf);

Konstantin

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Chillance Zen
> Sent: Wednesday, March 8, 2017 8:50 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] checksum and vlan insertion seem not working on X520
> 
> Hi ,every one
> 
> when I was conducting nic offloading exp,I can not make a x520 nic insert
> vlan ,nor checksum,
> here is my rx/tx capability:
> rx offload capability:9f
> tx offload capability:203f
> 
> with code like this:
> while(1){
> nr_mbufs=rte_eth_rx_burst(0,queue_id,mbufs,32);
> for(idx=0;idx #if 0
> buffer=rte_pktmbuf_mtod(mbufs[idx],char*);
> ip4=(struct ipv4_hdr*)(buffer+14);
> ip4->hdr_checksum=0;
> mbufs[idx]->l2_len=14;
> mbufs[idx]->l3_len=20;
> mbufs[idx]->ol_flags=PKT_TX_IP_CKSUM|PKT_TX_IPV4;
> #else
> mbufs[idx]->vlan_tci=0xef00;
> mbufs[idx]->ol_flags=PKT_TX_VLAN_PKT;
> #endif
> rte_eth_tx_burst(0,queue_id,&mbufs[idx],1);
> }
> }
> 
> does anybody know what;s wrong with my code?
> Thanks & regards
> Linc


Re: [dpdk-dev] Issues with ixgbe and rte_flow

2017-03-08 Thread Le Scouarnec Nicolas
My response is inline bellow, and further comment on the code excerpt also


From: Lu, Wenzhuo 
Sent: Wednesday, March 8, 2017 4:16 AM
To: Le Scouarnec Nicolas; dev@dpdk.org; Adrien Mazarguil 
(adrien.mazarg...@6wind.com)
Cc: Yigit, Ferruh
Subject: RE: Issues with ixgbe and rte_flow
    
>> I have been using the new API rte_flow to program filtering on an X540 
>> (ixgbe)
>> NIC. My goal is to send packets from different VLANs to different queues
>> (filtering which should be supported by flow director as far as I 
>> understand). I
>> enclosed the setup code at the bottom of this email.
>> For reference, here is the setup code I use
>>
>>   vlan_spec.tci = vlan_be;
>>   vlan_spec.tpid = 0;
>>
>>   vlan_mask.tci = rte_cpu_to_be_16(0x0fff);
>>   vlan_mask.tpid =  0;

>To my opinion, this setting is not right. As we know, vlan tag is inserted 
>between MAC source address and Ether type.
>So if we have a MAC+VLAN+IPv4 packet, the vlan_spec.tpid should be 0x8100, the 
>eth_spec.type should be 0x0800.
>+ Adrien, the author. He can correct me if I'm wrong.

Ok, I apologize, you're right. Being more used to the software-side than to the 
hardware-side, I misunderstood struct rte_flow_item_vlan and though it was the 
"equivalent" of struct vlan_hdr, in which case the vlan_hdr contains the type 
of the encapsulated frame.

(  /**
 * Ethernet VLAN Header.
 * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type
 * of the encapsulated frame.
 */
struct vlan_hdr {
uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */
uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */
} __attribute__((__packed__));)


Best regards,
Nicolas Le Scouarnec

[dpdk-dev] [PATCH 2/9] mbuf: make raw free function public

2017-03-08 Thread Olivier Matz
Rename __rte_mbuf_raw_free() as rte_mbuf_raw_free() and make
it public. The old function is kept for compat but is marked as
deprecated.

The next commit changes the behavior of rte_mbuf_raw_free() to
make it more consistent with rte_mbuf_raw_alloc().

Signed-off-by: Olivier Matz 
---
 drivers/net/ena/ena_ethdev.c |  2 +-
 drivers/net/mlx5/mlx5_rxtx.c |  6 +++---
 drivers/net/mpipe/mpipe_tilegx.c |  2 +-
 lib/librte_mbuf/rte_mbuf.h   | 22 --
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index b5e6db6..5dd44d7 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -680,7 +680,7 @@ static void ena_rx_queue_release_bufs(struct ena_ring *ring)
ring->rx_buffer_info[ring->next_to_clean & ring_mask];
 
if (m)
-   __rte_mbuf_raw_free(m);
+   rte_mbuf_raw_free(m);
 
ring->next_to_clean++;
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 88b0354..41a5bb2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1399,7 +1399,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
assert(pkt != (*rxq->elts)[idx]);
rep = NEXT(pkt);
rte_mbuf_refcnt_set(pkt, 0);
-   __rte_mbuf_raw_free(pkt);
+   rte_mbuf_raw_free(pkt);
pkt = rep;
}
break;
@@ -1410,13 +1410,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
   &rss_hash_res);
if (!len) {
rte_mbuf_refcnt_set(rep, 0);
-   __rte_mbuf_raw_free(rep);
+   rte_mbuf_raw_free(rep);
break;
}
if (unlikely(len == -1)) {
/* RX error, packet is likely too large. */
rte_mbuf_refcnt_set(rep, 0);
-   __rte_mbuf_raw_free(rep);
+   rte_mbuf_raw_free(rep);
++rxq->stats.idropped;
goto skip;
}
diff --git a/drivers/net/mpipe/mpipe_tilegx.c b/drivers/net/mpipe/mpipe_tilegx.c
index 60d5f81..536b8ea 100644
--- a/drivers/net/mpipe/mpipe_tilegx.c
+++ b/drivers/net/mpipe/mpipe_tilegx.c
@@ -558,7 +558,7 @@ mpipe_recv_flush_stack(struct mpipe_dev_priv *priv)
mbuf->data_len= 0;
mbuf->pkt_len = 0;
 
-   __rte_mbuf_raw_free(mbuf);
+   rte_mbuf_raw_free(mbuf);
}
 }
 
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index b61c430..575dc9d 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -790,20 +790,30 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct 
rte_mempool *mp)
 }
 
 /**
- * @internal Put mbuf back into its original mempool.
- * The use of that function is reserved for RTE internal needs.
- * Please use rte_pktmbuf_free().
+ * Put mbuf back into its original mempool.
+ *
+ * The caller must ensure that the mbuf is direct and that the
+ * reference counter is 0.
  *
  * @param m
  *   The mbuf to be freed.
  */
 static inline void __attribute__((always_inline))
-__rte_mbuf_raw_free(struct rte_mbuf *m)
+rte_mbuf_raw_free(struct rte_mbuf *m)
 {
+   RTE_ASSERT(RTE_MBUF_DIRECT(m));
RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
rte_mempool_put(m->pool, m);
 }
 
+/* compat with older versions */
+__rte_deprecated
+static inline void __attribute__((always_inline))
+__rte_mbuf_raw_free(struct rte_mbuf *m)
+{
+   rte_mbuf_raw_free(m);
+}
+
 /* Operations on ctrl mbuf */
 
 /**
@@ -1210,7 +1220,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
m->ol_flags = 0;
 
if (rte_mbuf_refcnt_update(md, -1) == 0)
-   __rte_mbuf_raw_free(md);
+   rte_mbuf_raw_free(md);
 }
 
 /**
@@ -1265,7 +1275,7 @@ rte_pktmbuf_free_seg(struct rte_mbuf *m)
m = rte_pktmbuf_prefree_seg(m);
if (likely(m != NULL)) {
m->next = NULL;
-   __rte_mbuf_raw_free(m);
+   rte_mbuf_raw_free(m);
}
 }
 
-- 
2.8.1



[dpdk-dev] [PATCH 1/9] mbuf: make segment prefree function public

2017-03-08 Thread Olivier Matz
Document the function and make it public, since it is used at several
places in the drivers. The old one is marked as deprecated.

Signed-off-by: Olivier Matz 
---
 drivers/net/enic/enic_rxtx.c  |  2 +-
 drivers/net/fm10k/fm10k_rxtx.c|  6 +++---
 drivers/net/fm10k/fm10k_rxtx_vec.c|  6 +++---
 drivers/net/i40e/i40e_rxtx_vec_common.h   |  6 +++---
 drivers/net/ixgbe/ixgbe_rxtx.c|  2 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h |  6 +++---
 drivers/net/virtio/virtio_rxtx_simple.h   |  6 +++---
 lib/librte_mbuf/rte_mbuf.h| 30 +++---
 8 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index 343dabc..1ee5cbb 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -473,7 +473,7 @@ static inline void enic_free_wq_bufs(struct vnic_wq *wq, 
u16 completed_index)
pool = ((struct rte_mbuf *)buf->mb)->pool;
for (i = 0; i < nb_to_free; i++) {
buf = &wq->bufs[tail_idx];
-   m = __rte_pktmbuf_prefree_seg((struct rte_mbuf *)(buf->mb));
+   m = rte_pktmbuf_prefree_seg((struct rte_mbuf *)(buf->mb));
buf->mb = NULL;
 
if (unlikely(m == NULL)) {
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index 144e5e6..c9bb04a 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -434,12 +434,12 @@ static inline void tx_free_bulk_mbuf(struct rte_mbuf 
**txep, int num)
if (unlikely(num == 0))
return;
 
-   m = __rte_pktmbuf_prefree_seg(txep[0]);
+   m = rte_pktmbuf_prefree_seg(txep[0]);
if (likely(m != NULL)) {
free[0] = m;
nb_free = 1;
for (i = 1; i < num; i++) {
-   m = __rte_pktmbuf_prefree_seg(txep[i]);
+   m = rte_pktmbuf_prefree_seg(txep[i]);
if (likely(m != NULL)) {
if (likely(m->pool == free[0]->pool))
free[nb_free++] = m;
@@ -455,7 +455,7 @@ static inline void tx_free_bulk_mbuf(struct rte_mbuf 
**txep, int num)
rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
} else {
for (i = 1; i < num; i++) {
-   m = __rte_pktmbuf_prefree_seg(txep[i]);
+   m = rte_pktmbuf_prefree_seg(txep[i]);
if (m != NULL)
rte_mempool_put(m->pool, m);
txep[i] = NULL;
diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c 
b/drivers/net/fm10k/fm10k_rxtx_vec.c
index 27f3e43..825e3c1 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -754,12 +754,12 @@ fm10k_tx_free_bufs(struct fm10k_tx_queue *txq)
 * next_dd - (rs_thresh-1)
 */
txep = &txq->sw_ring[txq->next_dd - (n - 1)];
-   m = __rte_pktmbuf_prefree_seg(txep[0]);
+   m = rte_pktmbuf_prefree_seg(txep[0]);
if (likely(m != NULL)) {
free[0] = m;
nb_free = 1;
for (i = 1; i < n; i++) {
-   m = __rte_pktmbuf_prefree_seg(txep[i]);
+   m = rte_pktmbuf_prefree_seg(txep[i]);
if (likely(m != NULL)) {
if (likely(m->pool == free[0]->pool))
free[nb_free++] = m;
@@ -774,7 +774,7 @@ fm10k_tx_free_bufs(struct fm10k_tx_queue *txq)
rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
} else {
for (i = 1; i < n; i++) {
-   m = __rte_pktmbuf_prefree_seg(txep[i]);
+   m = rte_pktmbuf_prefree_seg(txep[i]);
if (m != NULL)
rte_mempool_put(m->pool, m);
}
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h 
b/drivers/net/i40e/i40e_rxtx_vec_common.h
index 3745558..76031fe 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -123,12 +123,12 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
  * tx_next_dd - (tx_rs_thresh-1)
  */
txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
-   m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
+   m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
if (likely(m != NULL)) {
free[0] = m;
nb_free = 1;
for (i = 1; i < n; i++) {
-   m = __rte_pktmbuf_prefree_seg(txep[i].mbuf);
+   m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
if (likely(m != NULL)) {
if (likely(m->pool == free[0]->pool)) {
free[nb_free++] = m;
@@ -144,7 +144,7 @@ i40e_t

[dpdk-dev] [PATCH 3/9] mbuf: set mbuf fields while in pool

2017-03-08 Thread Olivier Matz
Set the value of m->refcnt to 1, m->nb_segs to 1 and m->next
to NULL when the mbuf is stored inside the mempool (unused).
This is done in rte_pktmbuf_prefree_seg(), before freeing or
recycling a mbuf.

Before this patch, the value of m->refcnt was expected to be 0
while in pool.

The objectives are:

- to avoid drivers to set m->next to NULL in the early Rx path, since
  this field is in the second 64B of the mbuf and its access could
  trigger a cache miss

- rationalize the behavior of raw_alloc/raw_free: one is now the
  symmetric of the other, and refcnt is never changed in these functions.

Signed-off-by: Olivier Matz 
---
 drivers/net/mlx5/mlx5_rxtx.c |  5 ++---
 drivers/net/mpipe/mpipe_tilegx.c |  1 +
 lib/librte_mbuf/rte_mbuf.c   |  2 ++
 lib/librte_mbuf/rte_mbuf.h   | 42 +---
 4 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 41a5bb2..fc59544 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1398,7 +1398,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
while (pkt != seg) {
assert(pkt != (*rxq->elts)[idx]);
rep = NEXT(pkt);
-   rte_mbuf_refcnt_set(pkt, 0);
+   NEXT(pkt) = NULL;
+   NB_SEGS(pkt) = 1;
rte_mbuf_raw_free(pkt);
pkt = rep;
}
@@ -1409,13 +1410,11 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt,
   &rss_hash_res);
if (!len) {
-   rte_mbuf_refcnt_set(rep, 0);
rte_mbuf_raw_free(rep);
break;
}
if (unlikely(len == -1)) {
/* RX error, packet is likely too large. */
-   rte_mbuf_refcnt_set(rep, 0);
rte_mbuf_raw_free(rep);
++rxq->stats.idropped;
goto skip;
diff --git a/drivers/net/mpipe/mpipe_tilegx.c b/drivers/net/mpipe/mpipe_tilegx.c
index 536b8ea..0135e2f 100644
--- a/drivers/net/mpipe/mpipe_tilegx.c
+++ b/drivers/net/mpipe/mpipe_tilegx.c
@@ -557,6 +557,7 @@ mpipe_recv_flush_stack(struct mpipe_dev_priv *priv)
mbuf->packet_type = 0;
mbuf->data_len= 0;
mbuf->pkt_len = 0;
+   mbuf->next= NULL;
 
rte_mbuf_raw_free(mbuf);
}
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 72ad91e..0acc810 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -145,6 +145,8 @@ rte_pktmbuf_init(struct rte_mempool *mp,
m->pool = mp;
m->nb_segs = 1;
m->port = 0xff;
+   rte_mbuf_refcnt_set(m, 1);
+   m->next = NULL;
 }
 
 /* helper to create a mbuf pool */
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 575dc9d..b4fe786 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -768,6 +768,11 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int 
is_header);
  * initializing all the required fields. See rte_pktmbuf_reset().
  * For standard needs, prefer rte_pktmbuf_alloc().
  *
+ * The caller can expect that the following fields of the mbuf structure
+ * are initialized: buf_addr, buf_physaddr, buf_len, refcnt=1, nb_segs=1,
+ * next=NULL, pool, priv_size. The other fields must be initialized
+ * by the caller.
+ *
  * @param mp
  *   The mempool from which mbuf is allocated.
  * @return
@@ -782,8 +787,9 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct 
rte_mempool *mp)
if (rte_mempool_get(mp, &mb) < 0)
return NULL;
m = (struct rte_mbuf *)mb;
-   RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
-   rte_mbuf_refcnt_set(m, 1);
+   RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1);
+   RTE_ASSERT(m->next == NULL);
+   RTE_ASSERT(m->nb_segs == 1);
__rte_mbuf_sanity_check(m, 0);
 
return m;
@@ -792,8 +798,13 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct 
rte_mempool *mp)
 /**
  * Put mbuf back into its original mempool.
  *
- * The caller must ensure that the mbuf is direct and that the
- * reference counter is 0.
+ * The caller must ensure that the mbuf is direct and properly
+ * reinitialized (refcnt=1, next=NULL, nb_segs=1), as done by
+ * rte_pktmbuf_prefree_seg().
+ *
+ * This function should be used with care, when optimization is
+ * required. For standard needs, prefer rte_pktmbuf_free() or
+ * rte_pktmbuf_free_seg().
  *
  * @param m

[dpdk-dev] [PATCH 0/9] mbuf: structure reorganization

2017-03-08 Thread Olivier Matz
Based on discussions done in [1] and in this thread, this patchset reorganizes
the mbuf.

The main changes are:
- reorder structure to increase vector performance on some non-ia
  platforms.
- add a 64bits timestamp field in the 1st cache line. This timestamp
  is not normalized, i.e. no unit or time reference is enforced. A
  library may be added to do this job in the future.
- m->next, m->nb_segs, and m->refcnt are always initialized for mbufs
  in the pool, avoiding the need of setting m->next (located in the
  2nd cache line) in the Rx path for mono-segment packets.
- change port and nb_segs to 16 bits
- move seqn in the 2nd cache line

Things discussed but not done in the patchset:
- move refcnt and nb_segs to the 2nd cache line: many drivers sets
  them in the Rx path, so it could introduce a performance regression, or
  it would require to change all the drivers, which is not an easy task.
- remove the m->port field: too much impact on many examples and libraries,
  and some people highlighted they are using it.
- moving m->next in the 1st cache line: there is not enough room, and having
  it set to NULL for unused mbuf should remove the need for it.
- merge seqn and timestamp together in a union: we could imagine use cases
  were both are activated. There is no flag indicating the presence of seqn,
  so it looks preferable to keep them separated for now.

I made some basic performance tests (ixgbe) and see no regression.
Other tests from NIC vendors are welcome.

Once this patchset is pushed, the Rx path of drivers could be optimized a bit,
by removing writes to m->next, m->nb_segs and m->refcnt. The patch 4/8 gives an
idea of what could be done.

[1] http://dpdk.org/ml/archives/dev/2016-October/049338.html

rfc->v1:
- fix reset of mbuf fields in case of indirect mbuf in rte_pktmbuf_prefree_seg()
- do not enforce a unit or time reference for m->timestamp
- reorganize fields to make vlan and outer vlan consecutive
- enhance documentation of m->refcnt and m->port to explain why they are 16bits

Jerin Jacob (1):
  mbuf: make rearm data address naturally aligned

Olivier Matz (8):
  mbuf: make segment prefree function public
  mbuf: make raw free function public
  mbuf: set mbuf fields while in pool
  drivers/net: don't touch mbuf next or nb segs on Rx
  mbuf: use 2 bytes for port and nb segments
  mbuf: move sequence number in second cache line
  mbuf: add a timestamp field
  mbuf: reorder VLAN tci and buffer len fields

 app/test-pmd/csumonly.c|   4 +-
 drivers/net/ena/ena_ethdev.c   |   2 +-
 drivers/net/enic/enic_rxtx.c   |   2 +-
 drivers/net/fm10k/fm10k_rxtx.c |   6 +-
 drivers/net/fm10k/fm10k_rxtx_vec.c |   9 +-
 drivers/net/i40e/i40e_rxtx_vec_common.h|   6 +-
 drivers/net/i40e/i40e_rxtx_vec_sse.c   |  11 +-
 drivers/net/ixgbe/ixgbe_rxtx.c |  10 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h  |   6 +-
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c|   9 --
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c |   9 --
 drivers/net/mlx5/mlx5_rxtx.c   |  11 +-
 drivers/net/mpipe/mpipe_tilegx.c   |   3 +-
 drivers/net/null/rte_eth_null.c|   2 -
 drivers/net/virtio/virtio_rxtx.c   |   4 -
 drivers/net/virtio/virtio_rxtx_simple.h|   6 +-
 .../linuxapp/eal/include/exec-env/rte_kni_common.h |   5 +-
 lib/librte_mbuf/rte_mbuf.c |   4 +
 lib/librte_mbuf/rte_mbuf.h | 123 -
 19 files changed, 130 insertions(+), 102 deletions(-)

-- 
2.8.1



[dpdk-dev] [PATCH 4/9] drivers/net: don't touch mbuf next or nb segs on Rx

2017-03-08 Thread Olivier Matz
Now that the m->next pointer and m->nb_segs is expected to be set (to
NULL and 1 respectively) after a mempool_get(), we can avoid to write them
in the Rx functions of drivers.

Only some drivers are patched, it's not an exhaustive patch. It gives
the idea to do the same in other drivers.

Signed-off-by: Olivier Matz 
---
 drivers/net/i40e/i40e_rxtx_vec_sse.c| 6 --
 drivers/net/ixgbe/ixgbe_rxtx.c  | 8 
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c | 6 --
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c  | 6 --
 drivers/net/null/rte_eth_null.c | 2 --
 drivers/net/virtio/virtio_rxtx.c| 4 
 6 files changed, 32 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c 
b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index b95cc8e..2f861fd 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -424,12 +424,6 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
/* store the resulting 32-bit value */
*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
split_packet += RTE_I40E_DESCS_PER_LOOP;
-
-   /* zero-out next pointers */
-   rx_pkts[pos]->next = NULL;
-   rx_pkts[pos + 1]->next = NULL;
-   rx_pkts[pos + 2]->next = NULL;
-   rx_pkts[pos + 3]->next = NULL;
}
 
/* C.3 calc available number of desc */
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index b056107..813c494 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1556,8 +1556,6 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool 
reset_mbuf)
/* populate the static rte mbuf fields */
mb = rxep[i].mbuf;
if (reset_mbuf) {
-   mb->next = NULL;
-   mb->nb_segs = 1;
mb->port = rxq->port_id;
}
 
@@ -2165,12 +2163,6 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf 
**rx_pkts, uint16_t nb_pkts,
goto next_desc;
}
 
-   /*
-* This is the last buffer of the received packet - return
-* the current cluster to the user.
-*/
-   rxm->next = NULL;
-
/* Initialize the first mbuf of the returned packet */
ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index e2715cb..2c04161 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -330,12 +330,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
*(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK;
 
split_packet += RTE_IXGBE_DESCS_PER_LOOP;
-
-   /* zero-out next pointers */
-   rx_pkts[pos]->next = NULL;
-   rx_pkts[pos + 1]->next = NULL;
-   rx_pkts[pos + 2]->next = NULL;
-   rx_pkts[pos + 3]->next = NULL;
}
 
rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP);
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index abbf284..65c5da3 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -425,12 +425,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct 
rte_mbuf **rx_pkts,
/* store the resulting 32-bit value */
*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
split_packet += RTE_IXGBE_DESCS_PER_LOOP;
-
-   /* zero-out next pointers */
-   rx_pkts[pos]->next = NULL;
-   rx_pkts[pos + 1]->next = NULL;
-   rx_pkts[pos + 2]->next = NULL;
-   rx_pkts[pos + 3]->next = NULL;
}
 
/* C.3 calc available number of desc */
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 57203e2..7e14da0 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -112,8 +112,6 @@ eth_null_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
break;
bufs[i]->data_len = (uint16_t)packet_size;
bufs[i]->pkt_len = packet_size;
-   bufs[i]->nb_segs = 1;
-   bufs[i]->next = NULL;
bufs[i]->port = h->internals->port_id;
}
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index cab6e8f..b3e6d80 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/

[dpdk-dev] [PATCH 8/9] mbuf: add a timestamp field

2017-03-08 Thread Olivier Matz
The field itself is not fully described yet, but this commit reserves
the room in the mbuf.

Signed-off-by: Olivier Matz 
---
 lib/librte_mbuf/rte_mbuf.c |  2 ++
 lib/librte_mbuf/rte_mbuf.h | 12 
 2 files changed, 14 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 0acc810..f679bce 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -322,6 +322,7 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
case PKT_RX_LRO: return "PKT_RX_LRO";
+   case PKT_RX_TIMESTAMP: return "PKT_RX_TIMESTAMP";
default: return NULL;
}
 }
@@ -356,6 +357,7 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t 
buflen)
{ PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
{ PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
{ PKT_RX_LRO, PKT_RX_LRO, NULL },
+   { PKT_RX_TIMESTAMP, PKT_RX_TIMESTAMP, NULL },
};
const char *name;
unsigned int i;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index c75a62a..fd97bd3 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -177,6 +177,11 @@ extern "C" {
  */
 #define PKT_RX_LRO   (1ULL << 16)
 
+/**
+ * Indicate that the timestamp field in the mbuf is valid.
+ */
+#define PKT_RX_TIMESTAMP (1ULL << 17)
+
 /* add new RX flags here */
 
 /* add new TX flags here */
@@ -474,6 +479,12 @@ struct rte_mbuf {
uint16_t vlan_tci_outer;
 
uint16_t buf_len; /**< Length of segment buffer. */
+
+   /** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
+* are not normalized but are always the same for a given port.
+*/
+   uint64_t timestamp;
+
/* second cache line - fields only used in slow path or on TX */
MARKER cacheline1 __rte_cache_min_aligned;
 
@@ -1201,6 +1212,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf 
*mi, struct rte_mbuf *m)
mi->nb_segs = 1;
mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
mi->packet_type = m->packet_type;
+   mi->timestamp = m->timestamp;
 
__rte_mbuf_sanity_check(mi, 1);
__rte_mbuf_sanity_check(m, 0);
-- 
2.8.1



[dpdk-dev] [PATCH 6/9] mbuf: use 2 bytes for port and nb segments

2017-03-08 Thread Olivier Matz
Change the size of m->port and m->nb_segs to 16 bits. It is now possible
to reference a port identifier larger than 256 and have a mbuf chain
larger than 256 segments.

Signed-off-by: Olivier Matz 
---
 app/test-pmd/csumonly.c  |  4 ++--
 .../linuxapp/eal/include/exec-env/rte_kni_common.h   |  4 ++--
 lib/librte_mbuf/rte_mbuf.h   | 12 +++-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 88cc842..5eaff9b 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -583,7 +583,7 @@ pkt_copy_split(const struct rte_mbuf *pkt)
rc = mbuf_copy_split(pkt, md, seglen, nb_seg);
if (rc < 0)
RTE_LOG(ERR, USER1,
-   "mbuf_copy_split for %p(len=%u, nb_seg=%hhu) "
+   "mbuf_copy_split for %p(len=%u, nb_seg=%u) "
"into %u segments failed with error code: %d\n",
pkt, pkt->pkt_len, pkt->nb_segs, nb_seg, rc);
 
@@ -801,7 +801,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
char buf[256];
 
printf("-\n");
-   printf("port=%u, mbuf=%p, pkt_len=%u, nb_segs=%hhu:\n",
+   printf("port=%u, mbuf=%p, pkt_len=%u, nb_segs=%u:\n",
fs->rx_port, m, m->pkt_len, m->nb_segs);
/* dump rx parsed packet info */
rte_get_rx_ol_flag_list(rx_ol_flags, buf, sizeof(buf));
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index f24f79f..2ac879f 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -118,8 +118,8 @@ struct rte_kni_mbuf {
uint64_t buf_physaddr;
uint16_t data_off;  /**< Start address of data in segment buffer. */
char pad1[2];
-   uint8_t nb_segs;/**< Number of segments. */
-   char pad4[3];
+   uint16_t nb_segs;   /**< Number of segments. */
+   char pad4[2];
uint64_t ol_flags;  /**< Offload features. */
char pad2[4];
uint32_t pkt_len;   /**< Total pkt len: sum of all segment 
data_len. */
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 4dc9a20..45cd6b9 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -393,12 +393,13 @@ struct rte_mbuf {
void *buf_addr;   /**< Virtual address of segment buffer. */
phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
 
-   /* next 6 bytes are initialised on RX descriptor rearm */
+   /* next 8 bytes are initialised on RX descriptor rearm */
MARKER64 rearm_data;
uint16_t data_off;
 
/**
-* 16-bit Reference counter.
+* Reference counter. Its size should at least equal to the size
+* of port field (16 bits), to support zero-copy broadcast.
 * It should only be accessed using the following functions:
 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
@@ -410,9 +411,10 @@ struct rte_mbuf {
rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
uint16_t refcnt;  /**< Non-atomically accessed 
refcnt */
};
-   uint8_t nb_segs;  /**< Number of segments. */
-   uint8_t port; /**< Input port. */
-   uint16_t pad; /**< 2B pad for naturally aligned ol_flags */
+   uint16_t nb_segs; /**< Number of segments. */
+
+   /** Input port (16 bits to support more than 256 virtual ports). */
+   uint16_t port;
 
uint64_t ol_flags;/**< Offload features. */
 
-- 
2.8.1



[dpdk-dev] [PATCH 9/9] mbuf: reorder VLAN tci and buffer len fields

2017-03-08 Thread Olivier Matz
Move the vlan_tci field near vlan_tci_outer and buf_len near data_len
for more consistency. It opens the door for get/set of the 2 vlan tci at
the same time.

Suggested-by: Andrey Chilikin 
Signed-off-by: Olivier Matz 
---
 lib/librte_mbuf/rte_mbuf.h | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index fd97bd3..ada98d5 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -449,8 +449,7 @@ struct rte_mbuf {
 
uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
uint16_t data_len;/**< Amount of data in segment buffer. */
-   /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
-   uint16_t vlan_tci;
+   uint16_t buf_len; /**< Size of segment buffer. */
 
union {
uint32_t rss; /**< RSS hash result if RSS enabled */
@@ -475,11 +474,11 @@ struct rte_mbuf {
uint32_t usr; /**< User defined tags. See 
rte_distributor_process() */
} hash;   /**< hash information */
 
+   /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
+   uint16_t vlan_tci;
/** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
uint16_t vlan_tci_outer;
 
-   uint16_t buf_len; /**< Length of segment buffer. */
-
/** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
 * are not normalized but are always the same for a given port.
 */
-- 
2.8.1



[dpdk-dev] [PATCH 7/9] mbuf: move sequence number in second cache line

2017-03-08 Thread Olivier Matz
Move this field in the second cache line, since no driver use it
in Rx path. The freed space will be used by a timestamp in next
commit.

Signed-off-by: Olivier Matz 
---
 lib/librte_mbuf/rte_mbuf.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 45cd6b9..c75a62a 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -470,8 +470,6 @@ struct rte_mbuf {
uint32_t usr; /**< User defined tags. See 
rte_distributor_process() */
} hash;   /**< hash information */
 
-   uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
-
/** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
uint16_t vlan_tci_outer;
 
@@ -516,6 +514,10 @@ struct rte_mbuf {
 
/** Timesync flags for use with IEEE1588. */
uint16_t timesync;
+
+   /** Sequence number. See also rte_reorder_insert(). */
+   uint32_t seqn;
+
 } __rte_cache_aligned;
 
 /**
-- 
2.8.1



[dpdk-dev] [PATCH 5/9] mbuf: make rearm data address naturally aligned

2017-03-08 Thread Olivier Matz
From: Jerin Jacob 

To avoid multiple stores on fast path, Ethernet drivers
aggregate the writes to data_off, refcnt, nb_segs and port
to an uint64_t data and write the data in one shot
with uint64_t* at &mbuf->rearm_data address.

Some of the non-IA platforms have store operation overhead
if the store address is not naturally aligned.This patch
fixes the performance issue on those targets.

Signed-off-by: Jerin Jacob 
Signed-off-by: Olivier Matz 
---
 drivers/net/fm10k/fm10k_rxtx_vec.c| 3 ---
 drivers/net/i40e/i40e_rxtx_vec_sse.c  | 5 +
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   | 3 ---
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c| 3 ---
 lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h | 3 +--
 lib/librte_mbuf/rte_mbuf.h| 6 +++---
 6 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c 
b/drivers/net/fm10k/fm10k_rxtx_vec.c
index 825e3c1..61a65e9 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -324,9 +324,6 @@ fm10k_rxq_rearm(struct fm10k_rx_queue *rxq)
 
/* Flush mbuf with pkt template.
 * Data to be rearmed is 6 bytes long.
-* Though, RX will overwrite ol_flags that are coming next
-* anyway. So overwrite whole 8 bytes with one load:
-* 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 */
p0 = (uintptr_t)&mb0->rearm_data;
*(uint64_t *)p0 = rxq->mbuf_initializer;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c 
b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 2f861fd..e17235a 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -87,11 +87,8 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
mb0 = rxep[0].mbuf;
mb1 = rxep[1].mbuf;
 
-/* Flush mbuf with pkt template.
+   /* Flush mbuf with pkt template.
 * Data to be rearmed is 6 bytes long.
-* Though, RX will overwrite ol_flags that are coming next
-* anyway. So overwrite whole 8 bytes with one load:
-* 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 */
p0 = (uintptr_t)&mb0->rearm_data;
*(uint64_t *)p0 = rxq->mbuf_initializer;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index 2c04161..bc8924f 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -85,9 +85,6 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
/*
 * Flush mbuf with pkt template.
 * Data to be rearmed is 6 bytes long.
-* Though, RX will overwrite ol_flags that are coming next
-* anyway. So overwrite whole 8 bytes with one load:
-* 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 */
vst1_u8((uint8_t *)&mb0->rearm_data, p);
paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c 
b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index 65c5da3..62afe31 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -90,9 +90,6 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
/*
 * Flush mbuf with pkt template.
 * Data to be rearmed is 6 bytes long.
-* Though, RX will overwrite ol_flags that are coming next
-* anyway. So overwrite whole 8 bytes with one load:
-* 6 bytes of rearm_data plus first 2 bytes of ol_flags.
 */
p0 = (uintptr_t)&mb0->rearm_data;
*(uint64_t *)p0 = rxq->mbuf_initializer;
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 09713b0..f24f79f 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -116,11 +116,10 @@ struct rte_kni_fifo {
 struct rte_kni_mbuf {
void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
uint64_t buf_physaddr;
-   char pad0[2];
uint16_t data_off;  /**< Start address of data in segment buffer. */
char pad1[2];
uint8_t nb_segs;/**< Number of segments. */
-   char pad4[1];
+   char pad4[3];
uint64_t ol_flags;  /**< Offload features. */
char pad2[4];
uint32_t pkt_len;   /**< Total pkt len: sum of all segment 
data_len. */
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index b4fe786..4dc9a20 100644
--- a/lib/librte_mbuf/rte_mbuf.h

Re: [dpdk-dev] [PATCH v3 2/2] ethdev: add hierarchical scheduler API

2017-03-08 Thread O'Driscoll, Tim
> From: Dumitrescu, Cristian
> 
> > -Original Message-
> > From: Thomas Monjalon [mailto:thomas.monja...@6wind.com]
> > Sent: Monday, March 6, 2017 8:07 PM
> > To: Dumitrescu, Cristian 
> > Cc: dev@dpdk.org; jerin.ja...@caviumnetworks.com;
> > balasubramanian.manoha...@cavium.com; hemant.agra...@nxp.com;
> > shreyansh.j...@nxp.com; Wiles, Keith ;
> Richardson,
> > Bruce 
> > Subject: Re: [PATCH v3 2/2] ethdev: add hierarchical scheduler API
> >
> > 2017-03-06 16:59, Dumitrescu, Cristian:
> > > From: Thomas Monjalon [mailto:thomas.monja...@6wind.com]
> > > > 2017-03-04 01:10, Cristian Dumitrescu:
> > > > > This patch introduces the generic ethdev API for the traffic
> manager
> > > > > capability, which includes: hierarchical scheduling, traffic
> shaping,
> > > > > congestion management, packet marking.
> > > >
> > > > We already have some API for QoS. Why integrating them in ethdev?
> > > > ethdev is an interface for networking drivers.
> > > > I think the QoS has nothing to do with drivers.
> > > > If there are some operations to offload in drivers, please
> identify them
> > > > and let's add the operations to ethdev.
> > > >
> > >
> > > The reason to add to ethdev is because QoS traffic
> > management/hierarchical scheduling is just another TX offload for
> Ethernet
> > devices. This TX offload is present in NICs, NPUs and SoCs from
> Broadcom,
> > Cavium, Intel, Mellanox, Netronome, NXP, others.
> > >
> > > The API we currently have in DPDK (librte_sched) is great, but it
> refers to
> > an implementation for a fixed set of features for a BRAS-like
> hierarchy. The
> > current abstraction layer proposal is intended to support pretty much
> any
> > hierarchy and traffic management features such as hierarchical
> scheduling,
> > traffic shaping, congestion management, marking under the same API. It
> > targets pretty much any implementation, either HW, SW or hybrid; it
> does
> > support the existing librte_sched library feature set, but it is not
> limited to it.
> >
> > OK I better understand now.
> > You should add this level of explanation in your patch.
> >
> > However I am reluctant to add an API if there is no user.
> > I think we should wait to have at least one existing driver
> implementing
> > this API before integrating it.
> > It was the approach of eventdev which has a dedicated next- tree.
> 
> The next-tree solution could work, but IMO is not the best for this
> case, as this is purely driver development. This is just a TX offload
> feature that is well understood, as opposed to a new library with a huge
> design effort required like eventdev.
> 
> I think we are reasonably close to get agreement on the API from Cavium,
> Intel and NXP. When this is done, how about including it in DPDK with
> the experimental tag attached to it until several drivers implement it?
> 
> From Intel side, there are solid plans to implement it for ixgbe and
> i40e drivers in next DPDK releases, I am CC-ing Tim to confirm this.

That's correct. We plan to add support for this in the ixgbe and i40e drivers 
in 17.08.

> On
> Cavium and NXP side, Jerin and Hemant can comment on the plans to
> implement this API.



Re: [dpdk-dev] [PATCH v2 2/5] ring: add a function to return the ring size

2017-03-08 Thread Olivier MATZ
On Thu, 23 Feb 2017 16:42:00 +, Bruce Richardson
 wrote:
> Applications and other libraries should not be reading inside the
> rte_ring structure directly to get the ring size. Instead add a fn
> to allow it to be queried.
> 
> Signed-off-by: Bruce Richardson 

Acked-by: Olivier Matz 


[dpdk-dev] [PATCH v3 0/4] net/tap: remote netdevice traffic capture

2017-03-08 Thread Pascal Mazon
This patchset adds the special "remote" feature to the tap PMD, that
actually enables capturing traffic from another netdevice. This is
especially useful to get packets into the DPDK app, when the remote
netdevice has no DPDK support.

The "remote" feature requires flow API support as flow rules will be
configured on the remote netdevice for redirection, using the same
mechanism.

This series applies on top of:

  [PATCH 0/4] net/tap: support flow API

v2 changes:
  - rebase on top of updated "net/tap: support flow API"
  - fix implicit flow flush when closing the netdevices

v3 changes:
  - memset(0) for remote_iface in rte_pmd_tap_probe()
  - use snprintf instead of strncpy to correctly handle terminating \0

Pascal Mazon (4):
  net/tap: add remote netdevice traffic capture
  net/tap: reflect tap flags on the remote
  net/tap: use the remote MAC address if available
  net/tap: set MTU on the remote

 doc/guides/nics/tap.rst   |  17 ++
 drivers/net/tap/rte_eth_tap.c | 215 +++---
 drivers/net/tap/tap.h |   4 +
 drivers/net/tap/tap_flow.c| 418 --
 drivers/net/tap/tap_flow.h|  24 +++
 5 files changed, 637 insertions(+), 41 deletions(-)

-- 
2.8.0.rc0



[dpdk-dev] [PATCH v3 1/4] net/tap: add remote netdevice traffic capture

2017-03-08 Thread Pascal Mazon
By default, a tap netdevice is of no use when not fed by a separate
process. The ability to automatically feed it from another netdevice
allows applications to capture any kind of traffic normally destined to
the kernel stack.

This patch implements this ability through a new optional "remote"
parameter.

Packets matching filtering rules created with the flow API are matched
on the remote device and redirected to the tap PMD, where the relevant
action will be performed.

Signed-off-by: Pascal Mazon 
Acked-by: Olga Shern 
---
 doc/guides/nics/tap.rst   |  17 ++
 drivers/net/tap/rte_eth_tap.c |  80 +++-
 drivers/net/tap/tap.h |   2 +
 drivers/net/tap/tap_flow.c| 418 --
 drivers/net/tap/tap_flow.h|  24 +++
 5 files changed, 527 insertions(+), 14 deletions(-)

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index cdb528b5eae4..676a569b00ca 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -58,6 +58,23 @@ needed, but the interface does not enforce that speed, for 
example::
 
--vdev=net_tap0,iface=foo0,speed=25000
 
+It is possible to specify a remote netdevice to capture packets from by adding
+``remote=foo1``, for example::
+
+   --vdev=net_tap,iface=tap0,remote=foo1
+
+If a ``remote`` is set, then all packets with the tap PMD's local MAC coming
+in on the remote netdevice will be redirected to the tap.
+If the tap is in promiscuous mode, then all packets will be redirected.
+In allmulti mode, all multicast packets will be redirected.
+It is possible to add explicit rte_flow rules on the tap PMD to capture 
specific
+traffic. For instance, in testpmd, the following rte_flow rule would capture
+packets with the given MAC address from the remote, and send it to the tap RX
+QUEUE 3::
+
+   testpmd> flow create 0 ingress pattern eth src is 02:03:04:05:06:07 / \
+end actions queue index 3 / end
+
 After the DPDK application is started you can send and receive packets on the
 interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
 point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 5727f6228b17..f3d9d8fe96aa 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -63,6 +63,7 @@
 
 #define ETH_TAP_IFACE_ARG   "iface"
 #define ETH_TAP_SPEED_ARG   "speed"
+#define ETH_TAP_REMOTE_ARG  "remote"
 
 #ifdef IFF_MULTI_QUEUE
 #define RTE_PMD_TAP_MAX_QUEUES 16
@@ -77,6 +78,7 @@ static struct rte_vdev_driver pmd_tap_drv;
 static const char *valid_arguments[] = {
ETH_TAP_IFACE_ARG,
ETH_TAP_SPEED_ARG,
+   ETH_TAP_REMOTE_ARG,
NULL
 };
 
@@ -435,6 +437,7 @@ tap_dev_close(struct rte_eth_dev *dev __rte_unused)
struct pmd_internals *internals = dev->data->dev_private;
 
tap_link_set_down(dev);
+   tap_flow_implicit_flush(dev, NULL);
 
for (i = 0; i < internals->nb_queues; i++) {
if (internals->rxq[i].fd != -1)
@@ -480,6 +483,8 @@ tap_promisc_enable(struct rte_eth_dev *dev)
 
dev->data->promiscuous = 1;
tap_link_set_flags(pmd, IFF_PROMISC, 1);
+   if (pmd->remote_if_index)
+   tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC);
 }
 
 static void
@@ -489,6 +494,8 @@ tap_promisc_disable(struct rte_eth_dev *dev)
 
dev->data->promiscuous = 0;
tap_link_set_flags(pmd, IFF_PROMISC, 0);
+   if (pmd->remote_if_index)
+   tap_flow_implicit_destroy(dev, TAP_REMOTE_PROMISC);
 }
 
 static void
@@ -498,6 +505,8 @@ tap_allmulti_enable(struct rte_eth_dev *dev)
 
dev->data->all_multicast = 1;
tap_link_set_flags(pmd, IFF_ALLMULTI, 1);
+   if (pmd->remote_if_index)
+   tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI);
 }
 
 static void
@@ -507,6 +516,8 @@ tap_allmulti_disable(struct rte_eth_dev *dev)
 
dev->data->all_multicast = 0;
tap_link_set_flags(pmd, IFF_ALLMULTI, 0);
+   if (pmd->remote_if_index)
+   tap_flow_implicit_destroy(dev, TAP_REMOTE_ALLMULTI);
 }
 
 static void
@@ -632,9 +643,42 @@ tap_setup_queue(struct rte_eth_dev *dev,
pmd->name);
return fd;
}
+   if (pmd->remote_if_index) {
+   /*
+* Flush usually returns negative value because it tries
+* to delete every QDISC (and on a running device, one
+* QDISC at least is needed). Ignore negative return
+* value.
+*/
+   qdisc_flush(pmd->nlsk_fd, pmd->remote_if_index);
+   if (qdisc_create_ingress(pmd->nlsk_fd,
+pmd->remote_if_index) < 0)
+   goto remote_fail;
+   LIST_I

[dpdk-dev] [PATCH v3 4/4] net/tap: set MTU on the remote

2017-03-08 Thread Pascal Mazon
Signed-off-by: Pascal Mazon 
---
 drivers/net/tap/rte_eth_tap.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 05ed0a131d63..9edf6b355a47 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -839,9 +839,8 @@ tap_set_mc_addr_list(struct rte_eth_dev *dev __rte_unused,
 }
 
 static int
-tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+tap_netdev_mtu_set(const char *iface, uint16_t mtu)
 {
-   struct pmd_internals *pmd = dev->data->dev_private;
struct ifreq ifr;
int err, s;
 
@@ -849,15 +848,15 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
if (s < 0) {
RTE_LOG(ERR, PMD,
"Unable to get a socket for %s to set flags: %s\n",
-   pmd->name, strerror(errno));
+   iface, strerror(errno));
return -1;
}
memset(&ifr, 0, sizeof(ifr));
-   snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
+   snprintf(ifr.ifr_name, IFNAMSIZ, "%s", iface);
err = ioctl(s, SIOCGIFMTU, &ifr);
if (err < 0) {
RTE_LOG(WARNING, PMD, "Unable to get %s device MTU: %s\n",
-   pmd->name, strerror(errno));
+   iface, strerror(errno));
close(s);
return -1;
}
@@ -865,11 +864,29 @@ tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
err = ioctl(s, SIOCSIFMTU, &ifr);
if (err < 0) {
RTE_LOG(WARNING, PMD, "Unable to set %s mtu %d: %s\n",
-   pmd->name, mtu, strerror(errno));
+   iface, mtu, strerror(errno));
close(s);
return -1;
}
close(s);
+   return 0;
+}
+
+static int
+tap_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+   struct pmd_internals *pmd = dev->data->dev_private;
+   int err;
+
+   /* First try to set mtu on the remote */
+   if (pmd->remote_if_index) {
+   err = tap_netdev_mtu_set(pmd->remote_iface, mtu);
+   if (err < 0)
+   return err;
+   }
+   err = tap_netdev_mtu_set(pmd->name, mtu);
+   if (err < 0)
+   return err;
dev->data->mtu = mtu;
return 0;
 }
-- 
2.8.0.rc0



[dpdk-dev] [PATCH v3 3/4] net/tap: use the remote MAC address if available

2017-03-08 Thread Pascal Mazon
The remote on a tap is most likely used with netdevices that are not
under DPDK control. Outgoing traffic is supposed to use the source MAC
address of the remote netdevice.

This commit synchronizes the MAC address of the local tap netdevice with
the remote one.

Of course, it is still possible to change the tap MAC address, using
standard DPDK APIs. It sets that MAC address on the tap PMD and redirect
any packets matching that destination MAC to the tap PMD. It also tries
setting the MAC address directly on the remote, if supported, through
ioctl() calls.

Signed-off-by: Pascal Mazon 
Acked-by: Olga Shern 
---
 drivers/net/tap/rte_eth_tap.c | 77 ---
 1 file changed, 65 insertions(+), 12 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index b0f5ffbb8ace..05ed0a131d63 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -529,6 +529,65 @@ tap_allmulti_disable(struct rte_eth_dev *dev)
}
 }
 
+static int
+tap_netdev_set_mac(const char *iface, struct ether_addr *mac_addr)
+{
+   struct ifreq ifr;
+   int err, s;
+
+   s = socket(AF_INET, SOCK_DGRAM, 0);
+   if (s < 0) {
+   RTE_LOG(ERR, PMD,
+   "Unable to get a socket to get MAC: %s\n",
+   strerror(errno));
+   return -1;
+   }
+   memset(&ifr, 0, sizeof(ifr));
+   strncpy(ifr.ifr_name, iface, IFNAMSIZ);
+   err = ioctl(s, SIOCGIFHWADDR, &ifr);
+   if (err < 0) {
+   RTE_LOG(ERR, PMD, "%s: couldn't get current MAC address (%s)\n",
+   iface, strerror(errno));
+   close(s);
+   return -1;
+   }
+   rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETHER_ADDR_LEN);
+   err = ioctl(s, SIOCSIFHWADDR, &ifr) == -1;
+   close(s);
+   if (err < 0) {
+   RTE_LOG(ERR, PMD, "%s: couldn't set current MAC address (%s)\n",
+   iface, strerror(errno));
+   return -1;
+   }
+   return 0;
+}
+
+static int
+tap_netdev_get_mac(const char *iface, struct ether_addr *mac_addr)
+{
+   struct ifreq ifr;
+   int err, s;
+
+   s = socket(AF_INET, SOCK_DGRAM, 0);
+   if (s < 0) {
+   RTE_LOG(ERR, PMD,
+   "Unable to get a socket to get MAC: %s\n",
+   strerror(errno));
+   return -1;
+   }
+   memset(&ifr, 0, sizeof(ifr));
+   strncpy(ifr.ifr_name, iface, IFNAMSIZ);
+   err = ioctl(s, SIOCGIFHWADDR, &ifr);
+   close(s);
+   if (err < 0) {
+   RTE_LOG(ERR, PMD, "%s: couldn't get MAC address (%s)\n",
+   iface, strerror(errno));
+   return -1;
+   }
+   rte_memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+   return 0;
+}
+
 static void
 tap_mac_remove(struct rte_eth_dev *dev __rte_unused,
   uint32_t index __rte_unused)
@@ -545,7 +604,6 @@ tap_mac_add(struct rte_eth_dev *dev, struct ether_addr 
*mac_addr,
 {
struct pmd_internals *internals = dev->data->dev_private;
int fd = internals->rxq[0].fd;
-   struct ifreq ifr;
 
if (index > RTE_PMD_TAP_MAX_MAC_ADDRS - 1) {
RTE_LOG(ERR, PMD,
@@ -565,19 +623,11 @@ tap_mac_add(struct rte_eth_dev *dev, struct ether_addr 
*mac_addr,
dev->data->name);
return;
}
-   memset(&ifr, 0, sizeof(struct ifreq));
-   if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
-   RTE_LOG(ERR, PMD, "%s: couldn't get current MAC address (%s)\n",
-   dev->data->name, strerror(errno));
+   if (tap_netdev_set_mac(internals->name, mac_addr) < 0)
return;
-   }
-   rte_memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETHER_ADDR_LEN);
-   if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
-   RTE_LOG(ERR, PMD, "%s: couldn't set current MAC address (%s)\n",
-   dev->data->name, strerror(errno));
-   return;
-   }
rte_memcpy(&dev->data->mac_addrs[index], mac_addr, ETHER_ADDR_LEN);
+   if (internals->remote_if_index)
+   tap_netdev_set_mac(internals->remote_iface, mac_addr);
 }
 
 static void
@@ -978,6 +1028,9 @@ eth_dev_tap_create(const char *name, char *tap_name, char 
*remote_iface)
RTE_LOG(ERR, PMD, "Could not find %s ifindex: "
"remote interface will remain unconfigured\n",
remote_iface);
+   else
+   /* Set the local mac address to the remote mac */
+   tap_netdev_get_mac(remote_iface, &pmd->eth_addr);
}
 
return 0;
-- 
2.8.0.rc0



[dpdk-dev] [PATCH v3 2/4] net/tap: reflect tap flags on the remote

2017-03-08 Thread Pascal Mazon
Synchronize PROMISC and ALLMULTI flags to the remote netdevice if
possible.

Leave the IFF_UP flag as it is, however.

Signed-off-by: Pascal Mazon 
Acked-by: Olga Shern 
---
 drivers/net/tap/rte_eth_tap.c | 37 -
 drivers/net/tap/tap.h |  2 ++
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index f3d9d8fe96aa..b0f5ffbb8ace 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -254,7 +254,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t 
nb_pkts)
 }
 
 static int
-tap_link_set_flags(struct pmd_internals *pmd, short flags, int add)
+tap_netdev_set_flags(const char *iface, short flags, int add)
 {
struct ifreq ifr;
int err, s;
@@ -271,11 +271,11 @@ tap_link_set_flags(struct pmd_internals *pmd, short 
flags, int add)
return -1;
}
memset(&ifr, 0, sizeof(ifr));
-   snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
+   snprintf(ifr.ifr_name, IFNAMSIZ, "%s", iface);
err = ioctl(s, SIOCGIFFLAGS, &ifr);
if (err < 0) {
RTE_LOG(WARNING, PMD, "Unable to get %s device flags: %s\n",
-   pmd->name, strerror(errno));
+   iface, strerror(errno));
close(s);
return -1;
}
@@ -293,6 +293,7 @@ tap_link_set_flags(struct pmd_internals *pmd, short flags, 
int add)
close(s);
 
return 0;
+
 }
 
 static int
@@ -301,7 +302,7 @@ tap_link_set_down(struct rte_eth_dev *dev)
struct pmd_internals *pmd = dev->data->dev_private;
 
dev->data->dev_link.link_status = ETH_LINK_DOWN;
-   return tap_link_set_flags(pmd, IFF_UP | IFF_NOARP, 0);
+   return tap_netdev_set_flags(pmd->name, IFF_UP | IFF_NOARP, 0);
 }
 
 static int
@@ -310,7 +311,7 @@ tap_link_set_up(struct rte_eth_dev *dev)
struct pmd_internals *pmd = dev->data->dev_private;
 
dev->data->dev_link.link_status = ETH_LINK_UP;
-   return tap_link_set_flags(pmd, IFF_UP | IFF_NOARP, 1);
+   return tap_netdev_set_flags(pmd->name, IFF_UP | IFF_NOARP, 1);
 }
 
 static int
@@ -482,9 +483,11 @@ tap_promisc_enable(struct rte_eth_dev *dev)
struct pmd_internals *pmd = dev->data->dev_private;
 
dev->data->promiscuous = 1;
-   tap_link_set_flags(pmd, IFF_PROMISC, 1);
-   if (pmd->remote_if_index)
+   tap_netdev_set_flags(pmd->name, IFF_PROMISC, 1);
+   if (pmd->remote_if_index) {
tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC);
+   tap_netdev_set_flags(pmd->remote_iface, IFF_PROMISC, 1);
+   }
 }
 
 static void
@@ -493,9 +496,11 @@ tap_promisc_disable(struct rte_eth_dev *dev)
struct pmd_internals *pmd = dev->data->dev_private;
 
dev->data->promiscuous = 0;
-   tap_link_set_flags(pmd, IFF_PROMISC, 0);
-   if (pmd->remote_if_index)
+   tap_netdev_set_flags(pmd->name, IFF_PROMISC, 0);
+   if (pmd->remote_if_index) {
tap_flow_implicit_destroy(dev, TAP_REMOTE_PROMISC);
+   tap_netdev_set_flags(pmd->remote_iface, IFF_PROMISC, 0);
+   }
 }
 
 static void
@@ -504,9 +509,11 @@ tap_allmulti_enable(struct rte_eth_dev *dev)
struct pmd_internals *pmd = dev->data->dev_private;
 
dev->data->all_multicast = 1;
-   tap_link_set_flags(pmd, IFF_ALLMULTI, 1);
-   if (pmd->remote_if_index)
+   tap_netdev_set_flags(pmd->name, IFF_ALLMULTI, 1);
+   if (pmd->remote_if_index) {
tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI);
+   tap_netdev_set_flags(pmd->remote_iface, IFF_ALLMULTI, 1);
+   }
 }
 
 static void
@@ -515,9 +522,11 @@ tap_allmulti_disable(struct rte_eth_dev *dev)
struct pmd_internals *pmd = dev->data->dev_private;
 
dev->data->all_multicast = 0;
-   tap_link_set_flags(pmd, IFF_ALLMULTI, 0);
-   if (pmd->remote_if_index)
+   tap_netdev_set_flags(pmd->name, IFF_ALLMULTI, 0);
+   if (pmd->remote_if_index) {
tap_flow_implicit_destroy(dev, TAP_REMOTE_ALLMULTI);
+   tap_netdev_set_flags(pmd->remote_iface, IFF_ALLMULTI, 0);
+   }
 }
 
 static void
@@ -962,6 +971,8 @@ eth_dev_tap_create(const char *name, char *tap_name, char 
*remote_iface)
 */
pmd->nlsk_fd = nl_init();
if (strlen(remote_iface)) {
+   snprintf(pmd->remote_iface, RTE_ETH_NAME_MAX_LEN,
+"%s", remote_iface);
pmd->remote_if_index = if_nametoindex(remote_iface);
if (!pmd->remote_if_index)
RTE_LOG(ERR, PMD, "Could not find %s ifindex: "
diff --git a/drivers/net/tap/tap.h b/drivers/net/tap/tap.h
index 4c4de939f1cc..9811d0b0f085 100644
--- a/drivers/net/tap/tap.h
+++ b/drivers/net/tap/tap.h
@@ -63,9 +63,11 @@ struct tx_queue {
 };
 
 struct pmd_internals {
+   char remote_iface[RTE_ETH_NAME_MAX_LEN];

Re: [dpdk-dev] [PATCH v2 07/14] ring: make bulk and burst fn return vals consistent

2017-03-08 Thread Olivier MATZ
On Tue,  7 Mar 2017 11:32:10 +, Bruce Richardson 
 wrote:
> The bulk fns for rings returns 0 for all elements enqueued and negative
> for no space. Change that to make them consistent with the burst functions
> in returning the number of elements enqueued/dequeued, i.e. 0 or N.
> This change also allows the return value from enq/deq to be used directly
> without a branch for error checking.
> 
> Signed-off-by: Bruce Richardson 

[...]

> @@ -716,7 +695,7 @@ rte_ring_enqueue_bulk(struct rte_ring *r, void * const 
> *obj_table,
>  static inline int __attribute__((always_inline))
>  rte_ring_mp_enqueue(struct rte_ring *r, void *obj)
>  {
> - return rte_ring_mp_enqueue_bulk(r, &obj, 1);
> + return rte_ring_mp_enqueue_bulk(r, &obj, 1) ? 0 : -ENOBUFS;
>  }
>  
>  /**

I'm wondering if these functions (enqueue/dequeue of one element) should
be modified to return 0 (fail) or 1 (success) too, for consistency with
the bulk functions.

Any opinion?



Olivier



[dpdk-dev] [PATCH] eventdev: improve API docs for timeout ticks

2017-03-08 Thread Harry van Haaren
Improve the documentation of the return values of the
rte_event_dequeue_timeout_ticks() function, adding a
-ENOTSUP value for eventdevs that do not support waiting.

Signed-off-by: Harry van Haaren 
Acked-by: Jerin Jacob 

---

Discussion and previous Acked from:
http://dpdk.org/ml/archives/dev/2017-March/059419.html
---
 lib/librte_eventdev/rte_eventdev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eventdev/rte_eventdev.h 
b/lib/librte_eventdev/rte_eventdev.h
index 7073987..bb216b4 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -1158,7 +1158,9 @@ rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
  *
  * @return
  *  - 0 on success.
- *  - <0 on failure.
+ *  - -ENOTSUP if the device doesn't support timeouts
+ *  - -EINVAL if *dev_id* is invalid or *timeout_ticks* is NULL
+ *  - other values < 0 on failure.
  *
  * @see rte_event_dequeue_burst(), RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT
  * @see rte_event_dev_configure()
-- 
2.7.4



Re: [dpdk-dev] [PATCH] net/ixgbe: delete useless function declaration

2017-03-08 Thread Ferruh Yigit
On 3/8/2017 6:58 AM, Wei Zhao wrote:
> delete useless function declarations in file ixgbe_flow.c and
> adjust function defination position to aviod compile error.
> 
> Signed-off-by: Wei Zhao 

Applied to dpdk-next-net/master, thanks.


Re: [dpdk-dev] [PATCH v2 1/2] net/ixgbe: move ixgbe 2 mac type check macro

2017-03-08 Thread Ferruh Yigit
On 3/8/2017 7:58 AM, Wei Zhao wrote:
> move ixgbe 2 mac type check macro to ixgbe_ethdev.h in
> order to be used by filter parser functions in file
> ixgbe_flow.c.
> 
> Fixes: 6c52c126f27a ("ixgbe: move to drivers/net/")
> 
> Signed-off-by: Wei Zhao 
> Signed-off-by: Wenzhuo Lu 

Series applied to dpdk-next-net/master, thanks.



Re: [dpdk-dev] [PATCH v1 12/14] ring: separate out head index manipulation for enq/deq

2017-03-08 Thread Olivier MATZ
On Thu, 23 Feb 2017 17:24:05 +, Bruce Richardson 
 wrote:
> We can write a single common function for head manipulation for enq
> and a common one for deq, allowing us to have a single worker function
> for enq and deq, rather than two of each. Update all other inline
> functions to use the new functions.
> 
> Signed-off-by: Bruce Richardson 
> ---
>  lib/librte_ring/rte_ring.c |   4 +-
>  lib/librte_ring/rte_ring.h | 328 
> -
>  2 files changed, 149 insertions(+), 183 deletions(-)
> 

[...]

> +static inline __attribute__((always_inline)) unsigned int
> +__rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table,
> +  unsigned int n, enum rte_ring_queue_behavior behavior,
> +  int is_sp, unsigned int *free_space)
>  {
> - uint32_t prod_head, cons_tail;
> - uint32_t prod_next, free_entries;
> - uint32_t mask = r->mask;
> -
> - prod_head = r->prod.head;
> - cons_tail = r->cons.tail;
> - /* The subtraction is done between two unsigned 32bits value
> -  * (the result is always modulo 32 bits even if we have
> -  * prod_head > cons_tail). So 'free_entries' is always between 0
> -  * and size(ring)-1. */
> - free_entries = mask + cons_tail - prod_head;
> -
> - /* check that we have enough room in ring */
> - if (unlikely(n > free_entries))
> - n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : free_entries;
> + uint32_t prod_head, prod_next;
> + uint32_t free_entries;
>  
> + n = __rte_ring_move_prod_head(r, is_sp, n, behavior,
> + &prod_head, &prod_next, &free_entries);
>   if (n == 0)
>   goto end;
>  
> -
> - prod_next = prod_head + n;
> - r->prod.head = prod_next;
> -
> - /* write entries in ring */
>   ENQUEUE_PTRS();
>   rte_smp_wmb();
>  
> + /*
> +  * If there are other enqueues in progress that preceded us,
> +  * we need to wait for them to complete
> +  */
> + while (unlikely(r->prod.tail != prod_head))
> + rte_pause();
> +

I'd say this part should not be done in case is_sp == 1.
Since it is sometimes a constant arg in an inline func, it may be better
to add the if (is_sp == 0).

[...]

> +static inline __attribute__((always_inline)) unsigned int
> +__rte_ring_do_dequeue(struct rte_ring *r, void **obj_table,
>unsigned int n, enum rte_ring_queue_behavior behavior,
> -  unsigned int *available)
> +  int is_mp, unsigned int *available)
>  {
> - uint32_t cons_head, prod_tail;
> - uint32_t cons_next, entries;
> - uint32_t mask = r->mask;
> -
> - cons_head = r->cons.head;
> - prod_tail = r->prod.tail;
> - /* The subtraction is done between two unsigned 32bits value
> -  * (the result is always modulo 32 bits even if we have
> -  * cons_head > prod_tail). So 'entries' is always between 0
> -  * and size(ring)-1. */
> - entries = prod_tail - cons_head;
> -
> - if (n > entries)
> - n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : entries;
> -
> - if (unlikely(entries == 0))
> - goto end;
> + uint32_t cons_head, cons_next;
> + uint32_t entries;
>  
> - cons_next = cons_head + n;
> - r->cons.head = cons_next;
> + n = __rte_ring_move_cons_head(r, is_mp, n, behavior,
> + &cons_head, &cons_next, &entries);
> + if (n == 0)
> + goto end;
>  
> - /* copy in table */
>   DEQUEUE_PTRS();
>   rte_smp_rmb();
>  
> + /*
> +  * If there are other enqueues in progress that preceded us,
> +  * we need to wait for them to complete
> +  */
> + while (unlikely(r->cons.tail != cons_head))
> + rte_pause();
> +
>   r->cons.tail = cons_next;

Same here.


Re: [dpdk-dev] [PATCH 1/3] app/testpmd: add port reset command into testpmd

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 4:56 AM, Wei Zhao wrote:
> Add vf port reset command into testpmd project, it is the interface for
> user to reset vf port.

I think it is better to change the order of this patch, first implement
new API in ethdev, later this patch implement new API in testpmd.

> 
> Signed-off-by: Wei Zhao 
> Signed-off-by: Wenzhuo Lu 
> ---
>  app/test-pmd/cmdline.c | 17 ++---
>  app/test-pmd/testpmd.c | 67 
> ++
>  app/test-pmd/testpmd.h |  1 +
>  3 files changed, 81 insertions(+), 4 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index 43fc636..59db672 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -596,6 +596,9 @@ static void cmd_help_long_parsed(void *parsed_result,
>   "port close (port_id|all)\n"
>   "Close all ports or port_id.\n\n"
>  
> + "port reset (port_id|all)\n"
> + "Reset all ports or port_id.\n\n"

It is not clear what reset does to the port. This is only for VF right?
Adding reset here hides that it is for VF.

<...>

> @@ -601,6 +602,7 @@ init_config(void)
>   if (init_fwd_streams() < 0)
>   rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
>  
> +

This may be unintentional.

<...>

> @@ -1350,6 +1363,10 @@ start_port(portid_t pid)
>   return -1;
>   }
>   }
> +
> + /* register reset interrupt callback */
> + rte_eth_dev_callback_register(pi, RTE_ETH_EVENT_INTR_RESET,
> + reset_event_callback, NULL);

So each port started will register a callback to handle reset events,

1- isn't this overkill for the usecases that does not need this reset?
2- should there be an unregister event?
3- This issue can be fixed in testpmd, but for user application, is this
the suggested way?

>   if (port->need_reconfig_queues > 0) {
>   port->need_reconfig_queues = 0;
>   /* setup tx queues */
> @@ -1559,6 +1576,56 @@ close_port(portid_t pid)
>  }
>  
>  void
> +reset_port(portid_t pid)
> +{
> + portid_t pi;
> + struct rte_port *port;
> +
> + if (port_id_is_invalid(pid, ENABLED_WARN))
> + return;
> +
> + printf("Closing ports...\n");
> +
> + FOREACH_PORT(pi, ports) {

Since we already know the port_id (pid), why iterating through all ports?

> + if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
> + continue;
> +
> + if (port_is_forwarding(pi) != 0 && test_done == 0) {
> + printf("Please remove port %d from forwarding "
> + "configuration.\n", pi);
> + continue;
> + }
> +
> + if (port_is_bonding_slave(pi)) {
> + printf("Please remove port %d from "
> + "bonded device.\n", pi);
> + continue;
> + }
> +
> + if (!reset_ports[pi]) {
> + printf("vf must get reset port %d info from "
> + "pf before reset.\n", pi);
> + continue;
> + }

Can there be a timing issue here? Is it possible that reset occurred
already and we are in the middle of the callback function when this
check done?

<...>


Re: [dpdk-dev] [PATCH v2 1/2] librte_net: add crc init and compute APIs

2017-03-08 Thread De Lara Guarch, Pablo


> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monja...@6wind.com]
> Sent: Monday, March 06, 2017 3:28 PM
> To: Singh, Jasvinder
> Cc: dev@dpdk.org; Doherty, Declan; De Lara Guarch, Pablo
> Subject: Re: [dpdk-dev] [PATCH v2 1/2] librte_net: add crc init and compute
> APIs
> 
> 2017-03-02 13:03, Singh, Jasvinder:
> > Hi Thomas,
> >
> > > -Original Message-
> > > From: Thomas Monjalon [mailto:thomas.monja...@6wind.com]
> > > Sent: Wednesday, March 1, 2017 6:46 PM
> > > To: Singh, Jasvinder 
> > > Cc: dev@dpdk.org; Doherty, Declan 
> > > Subject: Re: [dpdk-dev] [PATCH v2 1/2] librte_net: add crc init and
> compute
> > > APIs
> > >
> > > 2017-02-28 12:08, Jasvinder Singh:
> > > >  lib/librte_net/rte_net_crc.c   | 664
> > > +
> > > >  lib/librte_net/rte_net_crc.h   | 101 ++
> > >
> > > I think it should be in librte_hash.
> > >
> > > Please check lib/librte_hash/rte_hash_crc.h
> >
> > Is it good to include payload crc calculation in hash library as I see all 
> > hash
> related functionality there?
> 
> I think yes. Pablo?

I think this doesn't belong in the hash library. These new functions calculate 
CRC, but not as a hash function.
Yes, CRC can be used as hash function (in fact, it is used as such in the hash 
library,
the CRC32C version, and I assume that's why it is in there), but its use is 
much broader
(its main purpose is not to be a hash function, but for data error detection, 
for any data).

Therefore, I would suggest either creating a separate library for this, if we 
want to use this as a broader use,
or leave it in net library, if we want to focus on calculating CRC for Ethernet 
frames.

Regarding to the CRC that we have in the hash library, if we go for a separate 
library,
we could move that function there, but then it would have to follow the 
function prototype of a hash function,
defined in the hash library. 

Thanks,
Pablo


Re: [dpdk-dev] [RFC 0/8] mbuf: structure reorganization

2017-03-08 Thread Ananyev, Konstantin

Hi Olivier,

> 
> Hi Konstantin,
> 
> On Tue, 28 Feb 2017 22:53:55 +, "Ananyev, Konstantin" 
>  wrote:
> > > > Another thing that doesn't look very convenient to me here -
> > > > We can have 2 different values of timestamp (both normalized and not)
> > > > and there is no clear way for the application to know which one is in
> > > > use right now. So each app writer would have to come-up with his own
> > > > solution.
> > >
> > > It depends:
> > > - the solution you describe is to have the application storing the
> > >   normalized value in its private metadata.
> > > - another solution would be to store the normalized value in
> > >   m->timestamp. In this case, we would need a flag to tell if the
> > >   timestamp value is normalized.
> >
> > My first thought also was about second flag to specify was timestamp
> > already normalized or not.
> > Though I still in doubt - is it all really worth it: extra ol_flag, new 
> > function in eth_dev API.
> > My feeling that we trying to overcomplicate things.
> 
> I don't see what is so complicated. The idea is just to let the
> application do the normalization if it is required.

I meant 2 ol_flags and special function just to treat properly one of the mbuf 
field
seems too much.
Though after second thought might be 2 ol_flags is not a bad idea -
it gives PMD writer a freedom to choose provide a normalized or raw value
on return from rx_burst(). 

> 
> If the time is normalized in nanosecond in the PMD, we would still
> need to normalized the time reference (the 0). And for that we'd need
> a call to a synchronization code as well.
> 
> 
> 
> > > The problem pointed out by Jan is that doing the timestamp
> > > normalization may take some CPU cycles, even if a small part of packets
> > > requires it.
> >
> > I understand that point, but from what I've seen with real example:
> > http://dpdk.org/ml/archives/dev/2016-October/048810.html
> > the amount of calculations at RX is pretty small.
> > I don't think it would affect performance in a noticeable way
> > (though I don't have any numbers here to prove it).
> 
> I think we can consider by default that adding code in the data path
> impacts performance.
> 
> 
> > From other side, if user doesn't want a timestamp he can always disable
> > that feature anad save cycles, right?
> >
> > BTW, you and Jan both mention that not every packet would need a timestamp.
> > Instead we need sort of a timestamp for the group of packets?
> 
> I think that for many applications the timestamp should be as precise
> as possible for each packet.
> 
> 
> > Is that really the only foreseen usage model?
> 
> No, but it could be one.
> 
> 
> > If so, then why not to have a special function that would extract 'latest' 
> > timestamp
> > from the dev?
> > Or even have tx_burst_extra() that would return a latest timestamp (extra 
> > parameter or so).
> > Then there is no need to put timestamp into mbuf at all.
> 
> Doing that will give a poor precision for the timestamp.
> 
> 
> > > > > Applications that
> > > > > are doing this are responsible of what they change.
> > > > >
> > > > >
> > > > > > 3. In theory with eth_dev_detach() - mbuf->port value might be
> > > > > > not valid at the point when application would decide to do
> > > > > > normalization.
> > > > > >
> > > > > > So to me all that approach with delayed normalization seems
> > > > > > unnecessary overcomplicated. Original one suggested by Olivier,
> > > > > > when normalization is done in PMD at RX look much cleaner and
> > > > > > more manageable.
> > > > >
> > > > > Detaching a device requires a synchronization between control and
> > > > > data plane, and not only for this use case.
> > > >
> > > > Of course it does.
> > > > But right now it is possible to do:
> > > >
> > > > eth_rx_burst(port=0, ..., &mbuf, 1);
> > > > eth_dev_detach(port=0, ...);
> > > > ...
> > > > /*process previously received mbuf */
> > > >
> > > > With what you are proposing it would be not always possible any more.
> > >
> > > With your example, it does not work even without the timestamp feature,
> > > since the mbuf input port would reference an invalid port.
> > > This port  is usually used in the application to do a lookup for an port 
> > > structure,
> > > so it is expected that the entry is valid. It would be even worse if you
> > > do a detach + attach.
> >
> > I am not talking about the mbuf->port value usage.
> > Right now user can access/interpret  all metadata fields set by PMD RX 
> > routines
> > (vlan, rss hash, ol_flags, ptype, etc.) without need to accessing the 
> > device data or
> > calling device functions.
> > With that change it wouldn't be the case anymore.
> 
> That's the same for some other functions. If in my application I want
> to call eth_rx_queue_count(m->port), I will have the same problem.

Yes, but here you are trying to get extra information about device/queue based
on port value stored inside mbuf.
I am talking about information that already stored inside pa

Re: [dpdk-dev] [PATCH 3/3] net/i40e: implement device reset on port

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 4:56 AM, Wei Zhao wrote:
> Implement the device reset function on vf port.
> This restart function will detach device then
> attach device, reconfigure dev, re-setup the Rx/Tx queues.
> 
> Signed-off-by: Wei Zhao 
> Signed-off-by: Wenzhuo Lu 

<...>

> +static int i40evf_dev_uninit(struct rte_eth_dev *eth_dev);
> +static int i40evf_dev_init(struct rte_eth_dev *eth_dev);
> +static void i40evf_dev_close(struct rte_eth_dev *dev);
> +static int i40evf_dev_start(struct rte_eth_dev *dev);
> +static int i40evf_dev_configure(struct rte_eth_dev *dev);
> +static int i40evf_handle_vf_reset(struct rte_eth_dev *dev);

Some of them already seems declared, please avoid unnecessary or
duplicate declarations.

> +
>  
>  /* Default hash key buffer for RSS */
>  static uint32_t rss_key_default[I40E_VFQF_HKEY_MAX_INDEX + 1];
> @@ -230,6 +237,7 @@ static const struct eth_dev_ops i40evf_eth_dev_ops = {
>   .rss_hash_conf_get= i40evf_dev_rss_hash_conf_get,
>   .mtu_set  = i40evf_dev_mtu_set,
>   .mac_addr_set = i40evf_set_default_mac_addr,
> + .dev_reset= i40evf_handle_vf_reset,
>  };
>  
>  /*
> @@ -885,10 +893,13 @@ i40evf_add_mac_addr(struct rte_eth_dev *dev,
>   args.out_buffer = vf->aq_resp;
>   args.out_size = I40E_AQ_BUF_SZ;
>   err = i40evf_execute_vf_cmd(dev, &args);
> - if (err)
> + if (err) {
>   PMD_DRV_LOG(ERR, "fail to execute command "
>   "OP_ADD_ETHER_ADDRESS");
> -
> + goto DONE;

Please prefer lowercase labels,
also this is error exit, I would prefer other name than "done"

> + }
> + vf->vsi.mac_num++;
> +DONE:
>   return;
>  }
>  

<...>

> +static int
> +i40evf_handle_vf_reset(struct rte_eth_dev *dev)
> +{
> + struct i40e_adapter *adapter =
> + I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +
> + if (!dev->data->dev_started)
> + return 0;
> +
> + adapter->reset_number = 1;
> + i40e_vf_reset_dev(dev);

What happens if user called this function for PF ?

> + adapter->reset_number = 0;
> +
> + return 0;
> +}
> +

<...>


Re: [dpdk-dev] checksum and vlan insertion seem not working on X520

2017-03-08 Thread Chillance Zen
Hi Konstantin,Thank you so much ,I really
appreciate your suggestion...

it works well now ,I love it.

Linc

On 8 March 2017 at 17:20, Ananyev, Konstantin 
wrote:

> Hi Linc,
>
> Wonder what TX function are you using?
> Make sure that you are not using simple TX function that doesn’t support
> any offloads or multiseg packets.
> You need something like that at setup phase:
>
> rte_eth_dev_info_get(port_id, &dev_info);
> dev_info.default_txconf.txq_flags = 0;
> ...
> rte_eth_tx_queue_setup(…, &dev_info.default_txconf);
>
> Konstantin
>
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Chillance Zen
> > Sent: Wednesday, March 8, 2017 8:50 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] checksum and vlan insertion seem not working on X520
> >
> > Hi ,every one
> >
> > when I was conducting nic offloading exp,I can not make a x520 nic insert
> > vlan ,nor checksum,
> > here is my rx/tx capability:
> > rx offload capability:9f
> > tx offload capability:203f
> >
> > with code like this:
> > while(1){
> > nr_mbufs=rte_eth_rx_burst(0,queue_id,mbufs,32);
> > for(idx=0;idx > #if 0
> > buffer=rte_pktmbuf_mtod(mbufs[idx],char*);
> > ip4=(struct ipv4_hdr*)(buffer+14);
> > ip4->hdr_checksum=0;
> > mbufs[idx]->l2_len=14;
> > mbufs[idx]->l3_len=20;
> > mbufs[idx]->ol_flags=PKT_TX_IP_CKSUM|PKT_TX_IPV4;
> > #else
> > mbufs[idx]->vlan_tci=0xef00;
> > mbufs[idx]->ol_flags=PKT_TX_VLAN_PKT;
> > #endif
> > rte_eth_tx_burst(0,queue_id,&mbufs[idx],1);
> > }
> > }
> >
> > does anybody know what;s wrong with my code?
> > Thanks & regards
> > Linc
>



-- 
Linc @bjtu


Re: [dpdk-dev] i40e queues per VF

2017-03-08 Thread Thomas Monjalon
ping - still waiting for answers to below questions, please

2017-02-16 15:55, Thomas Monjalon:
> 2017-02-16 13:58, Wu, Jingjing:
> > From: Thomas Monjalon
> > > 
> > > Hi,
> > > 
> > > When reading the documentation, it is not easy to understand the 
> > > capability of
> > > i40evf for the number of queues.
> > > 
> > > First, please could you explain why we need a build-time config option?
> > > In the doc, there is neither justification nor tuning guidelines:
> > > 
> > > http://dpdk.org/doc/guides/nics/i40e.html#config-file-options
> > > "
> > > CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF (default 64) Number of
> > > queues reserved for PF.
> > > CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF (default 4) Number of
> > > queues reserved for each SR-IOV VF.
> > > "
> > 
> > This number is used as initialization time to allocate queue number
> > for PF/VF for HW's queue pool. Will add more description in i40e.rst.
> 
> The description "Number of queues reserved for each SR-IOV VF" seems
> partially wrong. Please explain it is a queue pair.
> 
> > > I feel these are hard limits and should be some constants in the code, 
> > > not some
> > > build configuration options.
> > > 
> > > The other doc to look at is:
> > > http://dpdk.org/doc/guides/nics/intel_vf.html#intel-fortville-10-40-gigabit-
> > > ethernet-controller-vf-infrastructure
> > > "
> > > Each VF can have a maximum of 16 queue pairs.
> > > "
> > > 
> > > Do we agree that a queue pair is 1 Rx queue / 1 Tx queue?
> > > Note: the concept of queue pairs in Intel VF should be explained 
> > > somewhere.
> > > 
> > Yes.
> > > Below, a different limitation is given:
> > > "
> > > The available queue number(at most 4) per VF depends on the total number 
> > > of
> > > pool, which is determined by the max number of VF at PF initialization 
> > > stage and
> > > the number of queue specified in config "
> > >
> > I think there may be some inconsistent description in  doc intel_vf.rst due 
> > to
> > Multiple kinds of NICs. We should correct them.
> > Thanks for pointing that.
> > 
> > > So what is the real maximum of queue pairs? 4 or 16?
> > > The datasheet talks about 16 queues. Is it 8 pairs?
> > 
> > That's is 16 queue pairs. 16 RX queues and 16 Tx queues.
> > > 
> > > Is there something to configure the number of queues when creating VF 
> > > with the
> > > kernel driver?
> > 
> > In kernel driver, it seems at most only 4 queues are supported. That's
> > Why we add  build-time config option to make more queues are possible.
> 
> If we can create 16 queue pairs, why restrict the default configuration to 4?
> Why is it a build-time config option?




Re: [dpdk-dev] [PATCH 0/2] Fix virtio-user multi-process crash.

2017-03-08 Thread Thomas Monjalon
2017-03-02 11:00, Ami Sabo:
> The patchset fixes secondary process crash issue when it tries
> to access virtio-user pmd (e.g. via rte_eth_rx_burst).
> 
> The crash happens because in virtio_user probing,
> eth_dev_attach_secondary is not being called, as it does from
> rte_eth_dev_pci_probe. Therefore, the device is not properly
> initialized.
> 
> The patchset contains 2 patches:
> 1. Export rte_eth_dev_attach_secondary, so non-pci drivers will be
> allowed to call it.
> 2. Fix the actual bug by calling the function during virtio_user probe.

I do not understand why nobody complains for other virtual devices.
We should have the same issue with pcap, tap, ring, af_packet, etc.
Probably that other drivers are broken in secondary processes.
Or should we make a fix to handle every secondary vdev in
rte_eth_dev_allocate() ?


Re: [dpdk-dev] [PATCH v2 0/5] net/i40e: support pipeline personalization profile

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 7:39 AM, Beilei Xing wrote:
> Add APIs and driver to support load/get
> i40e PPP (Pipeline Personalization Profile)

Can you please describe what is "Pipeline Personalisation Profile" is?
If possible please provide some links to documents. And please feel free
to update NIC document about these details.

What are the use cases, what are the benefits of this feature?

And can you please update release notes to announce about added feature?

> since PPP will be supported from FVL6 NVM.
> 
> v2 change:
>  Correct patch num.
> 
> Beilei Xing (5):
>   net/i40e: support pipeline personalization profile
>   net/i40e: add ppp processing
>   app/testpmd: add command for writing personalization profile
>   net/i40e: add get all loaded profiles
>   app/testpmd: add command for getting loaded profiles

<...>



Re: [dpdk-dev] [PATCH v1 12/14] ring: separate out head index manipulation for enq/deq

2017-03-08 Thread Bruce Richardson
On Wed, Mar 08, 2017 at 11:49:06AM +0100, Olivier MATZ wrote:
> On Thu, 23 Feb 2017 17:24:05 +, Bruce Richardson 
>  wrote:
> > We can write a single common function for head manipulation for enq
> > and a common one for deq, allowing us to have a single worker function
> > for enq and deq, rather than two of each. Update all other inline
> > functions to use the new functions.
> > 
> > Signed-off-by: Bruce Richardson 
> > ---
> >  lib/librte_ring/rte_ring.c |   4 +-
> >  lib/librte_ring/rte_ring.h | 328 
> > -
> >  2 files changed, 149 insertions(+), 183 deletions(-)
> > 
> 
> [...]
> 
> > +static inline __attribute__((always_inline)) unsigned int
> > +__rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table,
> > +unsigned int n, enum rte_ring_queue_behavior behavior,
> > +int is_sp, unsigned int *free_space)
> >  {
> > -   uint32_t prod_head, cons_tail;
> > -   uint32_t prod_next, free_entries;
> > -   uint32_t mask = r->mask;
> > -
> > -   prod_head = r->prod.head;
> > -   cons_tail = r->cons.tail;
> > -   /* The subtraction is done between two unsigned 32bits value
> > -* (the result is always modulo 32 bits even if we have
> > -* prod_head > cons_tail). So 'free_entries' is always between 0
> > -* and size(ring)-1. */
> > -   free_entries = mask + cons_tail - prod_head;
> > -
> > -   /* check that we have enough room in ring */
> > -   if (unlikely(n > free_entries))
> > -   n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : free_entries;
> > +   uint32_t prod_head, prod_next;
> > +   uint32_t free_entries;
> >  
> > +   n = __rte_ring_move_prod_head(r, is_sp, n, behavior,
> > +   &prod_head, &prod_next, &free_entries);
> > if (n == 0)
> > goto end;
> >  
> > -
> > -   prod_next = prod_head + n;
> > -   r->prod.head = prod_next;
> > -
> > -   /* write entries in ring */
> > ENQUEUE_PTRS();
> > rte_smp_wmb();
> >  
> > +   /*
> > +* If there are other enqueues in progress that preceded us,
> > +* we need to wait for them to complete
> > +*/
> > +   while (unlikely(r->prod.tail != prod_head))
> > +   rte_pause();
> > +
> 
> I'd say this part should not be done in case is_sp == 1.
> Since it is sometimes a constant arg in an inline func, it may be better
> to add the if (is_sp == 0).
> 
> [...]
> 

Yes, it's an unnecessary check. However, having it in place for the sp
case made no performance difference in my test, so I decided to keep
the code shorter by avoiding an additional branch. If there is a
performance hit I'll remove it, but I would rather not add more branches
to the code in the absense of a real impact to not having them.

Regards,
/Bruce



Re: [dpdk-dev] [PATCH v2 1/5] net/i40e: support pipeline personalization profile

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 7:39 AM, Beilei Xing wrote:
> Add admin queue functions for Pipeline Personalization
> Profile AQ commands defined in DCR 287:

You can drop DCR reference here.

>  - Write Recipe Command buffer (Opcode: 0x0270)
>  - Get Applied Profiles list (Opcode: 0x0271)
> This patch will be moved to base driver in future.
> 
> Signed-off-by: Beilei Xing 

<...>

> +
> +/**
> + * i40e_aq_write_ppp - Write pipeline personalization profile (ppp)
> + * @hw: pointer to the hw struct
> + * @buff: command buffer (size in bytes = buff_size)
> + * @buff_size: buffer size in bytes
> + * @track_id: package tracking id
> + * @error_offset: returns error offset
> + * @error_info: returns error information
> + * @cmd_details: pointer to command details structure or NULL
> + **/
> +enum
> +i40e_status_code i40e_aq_write_ppp(struct i40e_hw *hw, void *buff,
> +uint16_t buff_size, uint32_t track_id,
> +uint32_t *error_offset, uint32_t *error_info,
> +struct i40e_asq_cmd_details *cmd_details)

Is there a reason to not make these functions "static" ?

What do you think making function calls more consistent, below is
"i40e_aq_get_ppp_list", so thi can be "i40e_aq_ppp_write"

And since this is NIC driver, ppp cane be confused with "Point-to-Point
Protocol", is there any possible abbreviation or capitalization to
prevent confusion?

<...>

> +/**
> + * i40e_find_segment_in_package
> + * @segment_type: the segment type to search for (i.e., SEGMENT_TYPE_I40E)
> + * @pkg_hdr: pointer to the package header to be searched
> + *
> + * This function searches a package file for a particular segment type. On
> + * success it returns a pointer to the segment header, otherwise it will
> + * return NULL.
> + */
> +struct i40e_generic_seg_header *
> +i40e_find_segment_in_package(uint32_t segment_type,
> +  struct i40e_package_header *pkg_hdr)

This function name is also generic, what segment, what package. if this
is related to the ppp, please use relevant function name.

<...>

> +
> +/**
> + * i40e_write_profile
> + * @hw: pointer to the hardware structure
> + * @profile: pointer to the profile segment of the package to be downloaded
> + * @track_id: package tracking id
> + *
> + * Handles the download of a complete package.
> + */
> +enum i40e_status_code
> +i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
> +uint32_t track_id)

What about having "ppp" in API, like i40e_ppp_write()?

<...>

> +struct i40e_profile_section_header {
> + uint16_t tbl_size;
> + uint16_t data_end;
> + struct {
> +#define SECTION_TYPE_INFO0x010
> +#define SECTION_TYPE_MMIO0x800
> +#define SECTION_TYPE_AQ  0x801

unaligned

> +#define SECTION_TYPE_NOTE0x8000
> +#define SECTION_TYPE_NAME0x8001
> + uint32_t type;
> + uint32_t offset;
> + uint32_t size;
> + } section;
> +};
> +

<...>

> @@ -805,6 +915,23 @@ int i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
>  struct rte_eth_tunnel_filter_conf *tunnel_filter,
>  uint8_t add);
>  int i40e_fdir_flush(struct rte_eth_dev *dev);
> +enum i40e_status_code i40e_aq_write_ppp(struct i40e_hw *hw, void *buff,
> + uint16_t buff_size, uint32_t track_id,
> + uint32_t *error_offset, uint32_t *error_info,
> + struct i40e_asq_cmd_details *cmd_details);
> +enum i40e_status_code i40e_aq_get_ppp_list(struct i40e_hw *hw, void *buff,
> +uint16_t buff_size, uint8_t flags,
> +struct i40e_asq_cmd_details *cmd_details);
> +struct i40e_generic_seg_header *
> +i40e_find_segment_in_package(uint32_t segment_type,
> +  struct i40e_package_header *pkg_header);
> +enum i40e_status_code
> +i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
> +uint32_t track_id);
> +enum i40e_status_code
> +i40e_add_pinfo_to_list(struct i40e_hw *hw,
> +struct i40e_profile_segment *profile,
> +uint8_t *profile_info_sec, uint32_t track_id);

These declarations can go away if you make functions static.

>  
>  #define I40E_DEV_TO_PCI(eth_dev) \
>   RTE_DEV_TO_PCI((eth_dev)->device)
> 



Re: [dpdk-dev] [PATCH v2 2/5] net/i40e: add ppp processing

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 7:39 AM, Beilei Xing wrote:
> Add loading profile function.
> 
> Signed-off-by: Beilei Xing 
> ---
>  drivers/net/i40e/i40e_ethdev.c  | 59 
> +
>  drivers/net/i40e/rte_pmd_i40e.h |  7 +

Also you need to update *version.map file for new API.

<...>

> +
> +int
> +i40e_process_package(uint8_t port, uint8_t *buff)

Function name is so generic, please pick another one that associated
with ppp.

Also please use defined name_space for public API: rte_pmd_i40e_<...>

> +{
> + struct rte_eth_dev *dev = &rte_eth_devices[port];

Now this is public API to user applications, no more driver function, so
requires more attention. Input values needs to be verified before using
them.

Also you need to check if provided port_id if i40e port id.

<...>

>  
> +/**
> + * i40e_process_package - Load package
> + * @port: port id
> + * @buff: buffer of package
> + **/

Please provide proper doxygen tags, these are causing error.

> +int i40e_process_package(uint8_t port, uint8_t *buff);
> +
>  #endif /* _PMD_I40E_H_ */
> 



Re: [dpdk-dev] [PATCH v2 07/14] ring: make bulk and burst fn return vals consistent

2017-03-08 Thread Bruce Richardson
On Wed, Mar 08, 2017 at 11:22:40AM +0100, Olivier MATZ wrote:
> On Tue,  7 Mar 2017 11:32:10 +, Bruce Richardson 
>  wrote:
> > The bulk fns for rings returns 0 for all elements enqueued and negative
> > for no space. Change that to make them consistent with the burst functions
> > in returning the number of elements enqueued/dequeued, i.e. 0 or N.
> > This change also allows the return value from enq/deq to be used directly
> > without a branch for error checking.
> > 
> > Signed-off-by: Bruce Richardson 
> 
> [...]
> 
> > @@ -716,7 +695,7 @@ rte_ring_enqueue_bulk(struct rte_ring *r, void * const 
> > *obj_table,
> >  static inline int __attribute__((always_inline))
> >  rte_ring_mp_enqueue(struct rte_ring *r, void *obj)
> >  {
> > -   return rte_ring_mp_enqueue_bulk(r, &obj, 1);
> > +   return rte_ring_mp_enqueue_bulk(r, &obj, 1) ? 0 : -ENOBUFS;
> >  }
> >  
> >  /**
> 
> I'm wondering if these functions (enqueue/dequeue of one element) should
> be modified to return 0 (fail) or 1 (success) too, for consistency with
> the bulk functions.
> 
> Any opinion?
> 
I thought about that, but I would view it as risky, unless we want to go
changing the parameters to the function also, as the compiler won't flag
a change in return value like that.

/Bruce


Re: [dpdk-dev] [PATCH v2 3/5] app/testpmd: add command for writing personalization profile

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 7:39 AM, Beilei Xing wrote:
> This patch is to add testpmd CLI for writing personalization
> profile.
> 
> Signed-off-by: Beilei Xing 

<...>

> +static void
> +cmd_write_ppp_parsed(
> + void *parsed_result,
> + __attribute__((unused)) struct cmdline *cl,
> + __attribute__((unused)) void *data)
> +{
> + struct cmd_write_ppp_result *res = parsed_result;
> + uint8_t *buff;
> + int ret = -ENOTSUP;
> +
> + if (res->port_id > nb_ports) {
> + printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> + return;
> + }
> +
> + if (!all_ports_stopped()) {
> + printf("Please stop all ports first\n");
> + return;
> + }
> +
> + buff = open_package_file(res->filename);

Can you please update function name to indicate package is ppp package

> + if (!buff)
> + return;
> +
> + ret = i40e_process_package(res->port_id, buff);

What if i40e support not compiled into library?

> + if (ret < 0)
> + printf("Failed to write profile.\n");
> +
> + close_package_file(buff);
> +}
> +

<...>



Re: [dpdk-dev] [PATCH] eventdev: remove default queue overriding

2017-03-08 Thread Van Haaren, Harry
> -Original Message-
> From: Jerin Jacob [mailto:jerin.ja...@caviumnetworks.com]
> Sent: Monday, March 6, 2017 1:45 PM
> To: Van Haaren, Harry 
> Cc: dev@dpdk.org
> Subject: Re: [PATCH] eventdev: remove default queue overriding
> 
> On Wed, Mar 01, 2017 at 12:49:02PM +, Harry van Haaren wrote:
> > PMDs that only do a specific type of scheduling cannot provide
> > CFG_ALL_TYPES, so the Eventdev infrastructure should not demand
> > that every PMD supports CFG_ALL_TYPES.
> 
> Sure. Then I think then we can enumerate CFG_ALL_TYPES as capability.
> Meaning, New flag in event_dev_cap to denote PMD can support all the sched 
> type
> per queue. My reasoning for the capability flag is because, The application 
> flow
> will be depended on the logic of creating the queue with different flags.
> Thoughts ?

Yes, makes sense.

 
> I thought, In SW implementation, We can create 3 virtual queues per
> queue. Based on en-queue's shed_type, implementation can choose the
> correct underneath virtual queue. I guess, it has performance issues,
> if so, Maybe capability is the way forward.

You're right that it is possible, you're also right that the performance
hit in switching the queue to enqueue to in software is prohibitive of
supporting it.

+1 for a capability approach. I'll respin the patch, adding a capability,
and we can discuss more / merge then.

Thanks for the input!


> Setting to CFG_ALL_TYPES will be useful for flow based event pipeling
> as I mentioned earlier in other email.
> 
> > By not overriding the default configuration of the queue as
> > suggested by the PMD, the eventdev_common unit tests can pass
> > on all PMDs, regardless of thier capabilities.
> 
> Make sense. We can remove the default as CFG_ALL_TYPES.
> 
> >
> > RTE_EVENT_QUEUE_CFG_DEFAULT is no longer used by the eventdev layer
> > it can be removed now. Applications should use CFG_ALL_TYPES
> > if they require enqueue of all types a queue.
> >
> > Signed-off-by: Harry van Haaren 
> > ---
> >  lib/librte_eventdev/rte_eventdev.c | 1 -
> >  lib/librte_eventdev/rte_eventdev.h | 6 --
> >  2 files changed, 7 deletions(-)
> >
> > diff --git a/lib/librte_eventdev/rte_eventdev.c 
> > b/lib/librte_eventdev/rte_eventdev.c
> > index 68bfc3b..c32a776 100644
> > --- a/lib/librte_eventdev/rte_eventdev.c
> > +++ b/lib/librte_eventdev/rte_eventdev.c
> > @@ -593,7 +593,6 @@ rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id,
> > RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_def_conf,
> > -ENOTSUP);
> > (*dev->dev_ops->queue_def_conf)(dev, queue_id, &def_conf);
> > -   def_conf.event_queue_cfg = RTE_EVENT_QUEUE_CFG_DEFAULT;
> > queue_conf = &def_conf;
> > }
> >
> > diff --git a/lib/librte_eventdev/rte_eventdev.h 
> > b/lib/librte_eventdev/rte_eventdev.h
> > index 7073987..d836f61 100644
> > --- a/lib/librte_eventdev/rte_eventdev.h
> > +++ b/lib/librte_eventdev/rte_eventdev.h
> > @@ -471,12 +471,6 @@ rte_event_dev_configure(uint8_t dev_id,
> >  /* Event queue specific APIs */
> >
> >  /* Event queue configuration bitmap flags */
> > -#define RTE_EVENT_QUEUE_CFG_DEFAULT(0)
> > -/**< Default value of *event_queue_cfg* when rte_event_queue_setup() 
> > invoked
> > - * with queue_conf == NULL
> > - *
> > - * @see rte_event_queue_setup()
> > - */
> >  #define RTE_EVENT_QUEUE_CFG_TYPE_MASK  (3ULL << 0)
> >  /**< Mask for event queue schedule type configuration request */
> >  #define RTE_EVENT_QUEUE_CFG_ALL_TYPES  (0ULL << 0)
> > --
> > 2.7.4
> >


Re: [dpdk-dev] [PATCHv8 00/46] NXP DPAA2 PMD

2017-03-08 Thread Shreyansh Jain
Hello Ferruh,

> -Original Message-
> From: Ferruh Yigit [mailto:ferruh.yi...@intel.com]
> Sent: Tuesday, March 07, 2017 10:31 PM
> To: Hemant Agrawal ; dev@dpdk.org
> Cc: thomas.monja...@6wind.com; bruce.richard...@intel.com; Shreyansh Jain
> ; john.mcnam...@intel.com;
> jerin.ja...@caviumnetworks.com; Jan Blunck 
> Subject: Re: [PATCHv8 00/46] NXP DPAA2 PMD
> 

[...]

> >   net/dpaa2: enable frame queue based dequeuing
> >
> > Shreyansh Jain (1):
> >   mk: handle intra drivers dependencies for shared build
> 
> Hi Hemant,
> 
> Did you able to find a chance to check Jan Blunck's eth_driver [1]
> patchset. I remember in previous versions of this patchset there was a
> eth_driver update too.

Thanks for highlighting.
Yes, I had a look at that patchset a couple of days back.
In case of DPAA2 PMD, we are not actually using eth_driver. There is a dummy 
reference that we are creating which can easily be removed. We were doing only 
to be 'similar' to other existing PMDs.

In fact, I will go ahead and remove this use of eth_driver instance in v9 
(considering there are some comments from Olivier which too need changes).

> 
> Perhaps dpaa2 can benefit from those updates?

So, that patchset is positive for us, but doesn't really impact the existing 
patches (functionally). 

> 
> Thanks,
> ferruh
> 
> [1]
> http://dpdk.org/ml/archives/dev/2017-March/059376.html



Re: [dpdk-dev] [PATCH v3] eventdev: amend timeout criteria comment for burst dequeue

2017-03-08 Thread Jerin Jacob
On Wed, Mar 08, 2017 at 07:29:03AM +, Nipun Gupta wrote:
> Thanks Jerin. Patchwork still shows this patch in new state.
> Does this matter or shall I move it to accepted state?

I have moved to accepted state.
Thanks.

> 
> Regards,
> Nipun
> 
> > -Original Message-
> > From: Jerin Jacob [mailto:jerin.ja...@caviumnetworks.com]
> > Sent: Monday, March 06, 2017 18:52
> > To: Nipun Gupta 
> > Cc: dev@dpdk.org; Hemant Agrawal ;
> > bruce.richard...@intel.com; gage.e...@intel.com;
> > harry.van.haa...@intel.com
> > Subject: Re: [PATCH v3] eventdev: amend timeout criteria comment for burst
> > dequeue
> > 
> > On Fri, Feb 10, 2017 at 09:56:50PM +0530, Nipun Gupta wrote:
> > > Signed-off-by: Nipun Gupta 
> > > Acked-by: Harry van Haaren 
> > 
> > Applied to dpdk-next-eventdev/master. Thanks.
> > 
> > > ---
> > > Changes for v2:
> > >  - Fix errors reported by check-git-log.sh
> > > Changes for v3:
> > >  - Corrected comment's language
> > >
> > >  lib/librte_eventdev/rte_eventdev.h | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/lib/librte_eventdev/rte_eventdev.h
> > b/lib/librte_eventdev/rte_eventdev.h
> > > index c2f9310..29f0f46 100644
> > > --- a/lib/librte_eventdev/rte_eventdev.h
> > > +++ b/lib/librte_eventdev/rte_eventdev.h
> > > @@ -1216,7 +1216,7 @@ struct rte_eventdev {
> > >   *   - 0 no-wait, returns immediately if there is no event.
> > >   *   - >0 wait for the event, if the device is configured with
> > >   *   RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT then this function will
> > wait until
> > > - *   the event available or *timeout_ticks* time.
> > > + *   at least one event is available or *timeout_ticks* time.
> > >   *   if the device is not configured with
> > RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT
> > >   *   then this function will wait until the event available or
> > >   *   *dequeue_timeout_ns* ns which was previously supplied to
> > > --
> > > 1.9.1
> > >


Re: [dpdk-dev] [PATCHv8 19/46] pool/dpaa2: add DPAA2 hardware offloaded mempool

2017-03-08 Thread Hemant Agrawal

Hi Olivier,
Thanks for your detailed review.  Please see inline...

On 3/8/2017 2:35 PM, Olivier MATZ wrote:

Hi Hemant,

On Fri, 3 Mar 2017 18:16:36 +0530, Hemant Agrawal
 wrote:

Adding NXP DPAA2 architecture specific mempool support.

This patch also registers a dpaa2 type MEMPOOL OPS

Signed-off-by: Hemant Agrawal 
---
 MAINTAINERS   |   1 +
 config/common_base|   5 +
 config/defconfig_arm64-dpaa2-linuxapp-gcc |   8 +
 drivers/Makefile  |   1 +
 drivers/pool/Makefile |  40 +++
 drivers/pool/dpaa2/Makefile   |  72 ++
 drivers/pool/dpaa2/dpaa2_hw_mempool.c | 339
++
drivers/pool/dpaa2/dpaa2_hw_mempool.h |  95 
drivers/pool/dpaa2/rte_pool_dpaa2_version.map |   8 +


I think the current mempool handlers should be moved first in a
separate patch.



Are you seeing any benefit by making it a separate patch series?

it will be difficult and tricky for us. The dpaa2_pool has a dependency 
on mc bus patches. dpaa2_pmd has dependency on dpaa2_pool and mc buses.


This will mean that we have to split it into 3 patch series and it will 
become cumbersome to deal with 3 series.




I'd prefer drivers/mempool instead of drivers/pool (more precise and
more consistent with librte_mempool).



We will take care of it in next revision.





[...]

+
+struct dpaa2_bp_info rte_dpaa2_bpid_info[MAX_BPID];
+static struct dpaa2_bp_list *h_bp_list;
+
+static int
+hw_mbuf_create_pool(struct rte_mempool *mp)


Would it work for something else than mbufs?
The initial approach of the mempool is to work for kind of object. The
specialization in mbuf is done by the mbuf layer.


I think, we did discuss that hw offloaded mempool are mainly for packet 
buffers/mbufs. Currently we only support mbuf type of objects.


Ideally a hw buffer pool can work for any kind mempool. However, it is 
not the best way to use hw buffer pools. The latency to allocate buffers 
are higher than software.  The main advantage SoCs, get by using hw pool 
is that they work seamlessly with the MAC layer.






+{
+   struct dpaa2_bp_list *bp_list;
+   struct dpaa2_dpbp_dev *avail_dpbp;
+   struct dpbp_attr dpbp_attr;
+   uint32_t bpid;
+   int ret;
+
+   avail_dpbp = dpaa2_alloc_dpbp_dev();
+
+   if (!avail_dpbp) {
+   PMD_DRV_LOG(ERR, "DPAA2 resources not available");
+   return -1;
+   }


The other pool handlers return a -errno instead of -1. I think it
should be the same here.


We will fix it.



The same comment can applies to other locations/functions.


[...]
+
+   /* Set parameters of buffer pool list */
+   bp_list->buf_pool.num_bufs = mp->size;
+   bp_list->buf_pool.size = mp->elt_size
+   - sizeof(struct rte_mbuf) - rte_pktmbuf_priv_size(mp);
+   bp_list->buf_pool.bpid = dpbp_attr.bpid;
+   bp_list->buf_pool.h_bpool_mem = NULL;
+   bp_list->buf_pool.mp = mp;
+   bp_list->buf_pool.dpbp_node = avail_dpbp;
+   bp_list->next = h_bp_list;
+
+   bpid = dpbp_attr.bpid;
+
+
+   rte_dpaa2_bpid_info[bpid].meta_data_size = sizeof(struct rte_mbuf)
+   + rte_pktmbuf_priv_size(mp);


Are the 2 empty lines garbage?


we will fix it





+   rte_dpaa2_bpid_info[bpid].bp_list = bp_list;
+   rte_dpaa2_bpid_info[bpid].bpid = bpid;
+
+   mp->pool_data = (void *)&rte_dpaa2_bpid_info[bpid];
+
+   PMD_INIT_LOG(DEBUG, "BP List created for bpid =%d", dpbp_attr.bpid); +
+   h_bp_list = bp_list;
+   /* Identification for our offloaded pool_data structure
+*/
+   mp->flags |= MEMPOOL_F_HW_PKT_POOL;


I think this flag should be declared in rte_mempool.h,
not in drivers/bus/fslmc/portal/dpaa2_hw_pvt.h.

It should also be documented, what does this flag mean?


Currently we need a way to differentiate that this is a hw allocated pkt 
buffer pool or software based buffer pool. String comparison is costly.


This flag was discussed during the hw mempool patches of david. Not 
everyone was in favor of keeping it in librte_mempool.


So, we just hid it inside our offloaded mempool.




[...]

+static
+void rte_dpaa2_mbuf_release(struct rte_mempool *pool __rte_unused,
+   void * const *obj_table,
+   uint32_t bpid,
+   uint32_t meta_data_size,
+   int count)



Is there a reason why some functions are prefixed with rte_dpaa2_ and
other but hw_mbuf_?


initial reason was to use rte_ only for exported functions. we can fix it.





+{
+   struct qbman_release_desc releasedesc;
+   struct qbman_swp *swp;
+   int ret;
+   int i, n;
+   uint64_t bufs[DPAA2_MBUF_MAX_ACQ_REL];
+
+   if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+   ret = dpaa2_affine_qbman_swp();
+   if (ret != 0) {
+ 

[dpdk-dev] Reg DPDK & PMD

2017-03-08 Thread raman geetha gopalakrishnan
Hi All,

I have the following basic question. Hope to get an answer / link where i
can get myself clear.

1. In DPDK PMD is optimized driver for an given NIC to get maximum
performance.
That being the case why we are talking about DPDK supported NICs.

 A) My assumption is that NIC interface is standardized so that PMD
should actually work with any NIC (barring some NIC specific performance
tweaks)
  is that correct?

 B) if #A is correct , how can i make changes to PMD to support any NIC
?

if i have to put the above question in different way then it is

2. what is preventing us from having a common PMD layer for all NICs and
additional PMD specific to each NIC???

Thanks
Raman


Re: [dpdk-dev] [PATCH] mem: balanced allocation of hugepages

2017-03-08 Thread Sergio Gonzalez Monroy

Hi Ilya,

I have done similar tests and as you already pointed out, 'numactl 
--interleave' does not seem to work as expected.
I have also checked that the issue can be reproduced with quota limit on 
hugetlbfs mount point.


I would be inclined towards *adding libnuma as dependency* to DPDK to 
make memory allocation a bit more reliable.


Currently at a high level regarding hugepages per numa node:
1) Try to map all free hugepages. The total number of mapped hugepages 
depends if there were any limits, such as cgroups or quota in mount point.

2) Find out numa node of each hugepage.
3) Check if we have enough hugepages for requested memory in each numa 
socket/node.


Using libnuma we could try to allocate hugepages per numa:
1) Try to map as many hugepages from numa 0.
2) Check if we have enough hugepages for requested memory in numa 0.
3) Try to map as many hugepages from numa 1.
4) Check if we have enough hugepages for requested memory in numa 1.

This approach would improve failing scenarios caused by limits but It 
would still not fix issues regarding non-contiguous hugepages (worst 
case each hugepage is a memseg).
The non-contiguous hugepages issues are not as critical now that 
mempools can span over multiple memsegs/hugepages, but it is still a 
problem for any other library requiring big chunks of memory.


Potentially if we were to add an option such as 'iommu-only' when all 
devices are bound to vfio-pci, we could have a reliable way to allocate 
hugepages by just requesting the number of pages from each numa.


Thoughts?

Sergio

On 06/03/2017 09:34, Ilya Maximets wrote:

Hi all.

So, what about this change?

Best regards, Ilya Maximets.

On 16.02.2017 16:01, Ilya Maximets wrote:

Currently EAL allocates hugepages one by one not paying
attention from which NUMA node allocation was done.

Such behaviour leads to allocation failure if number of
available hugepages for application limited by cgroups
or hugetlbfs and memory requested not only from the first
socket.

Example:
# 90 x 1GB hugepages availavle in a system

cgcreate -g hugetlb:/test
# Limit to 32GB of hugepages
cgset -r hugetlb.1GB.limit_in_bytes=34359738368 test
# Request 4GB from each of 2 sockets
cgexec -g hugetlb:test testpmd --socket-mem=4096,4096 ...

EAL: SIGBUS: Cannot mmap more hugepages of size 1024 MB
EAL: 32 not 90 hugepages of size 1024 MB allocated
EAL: Not enough memory available on socket 1!
 Requested: 4096MB, available: 0MB
PANIC in rte_eal_init():
Cannot init memory

This happens beacause all allocated pages are
on socket 0.

Fix this issue by setting mempolicy MPOL_PREFERRED for each
hugepage to one of requested nodes in a round-robin fashion.
In this case all allocated pages will be fairly distributed
between all requested nodes.

New config option RTE_LIBRTE_EAL_NUMA_AWARE_HUGEPAGES
introduced and disabled by default because of external
dependency from libnuma.

Cc: 
Fixes: 77988fc08dc5 ("mem: fix allocating all free hugepages")

Signed-off-by: Ilya Maximets 
---
  config/common_base   |  1 +
  lib/librte_eal/Makefile  |  4 ++
  lib/librte_eal/linuxapp/eal/eal_memory.c | 66 
  mk/rte.app.mk|  3 ++
  4 files changed, 74 insertions(+)





Re: [dpdk-dev] [PATCH 1/2] Fix container_of() macro to work with const members

2017-03-08 Thread Thomas Monjalon
2017-02-27 15:14, Bruce Richardson:
> On Mon, Feb 27, 2017 at 03:28:14PM +0100, Jan Blunck wrote:
> > On Tue, Feb 14, 2017 at 3:36 PM, Jan Blunck  wrote:
> > > This fixes the usage of structure members that are declared const to get
> > > a pointer to the embedding parent structure.
> > 
> > Ping. Is anyone willing to review this?
> >
> Looks ok to me.
> 
> Acked-by: Bruce Richardson 

Applied, thanks


Re: [dpdk-dev] Reg DPDK & PMD

2017-03-08 Thread Bruce Richardson
On Wed, Mar 08, 2017 at 07:05:03PM +0530, raman geetha gopalakrishnan wrote:
> Hi All,
> 
> I have the following basic question. Hope to get an answer / link where i
> can get myself clear.
> 
> 1. In DPDK PMD is optimized driver for an given NIC to get maximum
> performance.
> That being the case why we are talking about DPDK supported NICs.
> 
>  A) My assumption is that NIC interface is standardized so that PMD
> should actually work with any NIC (barring some NIC specific performance
> tweaks)
>   is that correct?

No, that assumption is not correct, which is the reason we have so many
NIC drivers in DPDK. Each NIC uses a different method of talking to SW,
both in terms of the registers needed to be accessed to initialize the
NIC and then in terms of the metadata format used to receive or transmit
packets.

Regards,
/Bruce

> 
>  B) if #A is correct , how can i make changes to PMD to support any NIC
> ?
> 
> if i have to put the above question in different way then it is
> 
> 2. what is preventing us from having a common PMD layer for all NICs and
> additional PMD specific to each NIC???
> 
> Thanks
> Raman


Re: [dpdk-dev] Reg DPDK & PMD

2017-03-08 Thread Ferruh Yigit
On 3/8/2017 1:35 PM, raman geetha gopalakrishnan wrote:
> Hi All,
> 
> I have the following basic question. Hope to get an answer / link where i
> can get myself clear.
> 
> 1. In DPDK PMD is optimized driver for an given NIC to get maximum
> performance.
> That being the case why we are talking about DPDK supported NICs.
> 
>  A) My assumption is that NIC interface is standardized so that PMD
> should actually work with any NIC (barring some NIC specific performance
> tweaks)
>   is that correct?

PMDs (Poll Mode Drivers) are _real_ device drivers, they are not an
optimization layer on top off a standardized interface, they deal
directly with hardware, so needs to be specific to hardware.

> 
>  B) if #A is correct , how can i make changes to PMD to support any NIC
> ?
> 
> if i have to put the above question in different way then it is
> 
> 2. what is preventing us from having a common PMD layer for all NICs and
> additional PMD specific to each NIC???
> 
> Thanks
> Raman
> 



Re: [dpdk-dev] Reg DPDK & PMD

2017-03-08 Thread Wiles, Keith

> On Mar 8, 2017, at 8:25 AM, Ferruh Yigit  wrote:
> 
> On 3/8/2017 1:35 PM, raman geetha gopalakrishnan wrote:
>> Hi All,
>> 
>> I have the following basic question. Hope to get an answer / link where i
>> can get myself clear.
>> 
>> 1. In DPDK PMD is optimized driver for an given NIC to get maximum
>> performance.
>>That being the case why we are talking about DPDK supported NICs.
>> 
>> A) My assumption is that NIC interface is standardized so that PMD
>> should actually work with any NIC (barring some NIC specific performance
>> tweaks)
>>  is that correct?
> 
> PMDs (Poll Mode Drivers) are _real_ device drivers, they are not an
> optimization layer on top off a standardized interface, they deal
> directly with hardware, so needs to be specific to hardware.
> 
>> 
>> B) if #A is correct , how can i make changes to PMD to support any NIC
>> ?
>> 
>> if i have to put the above question in different way then it is
>> 
>> 2. what is preventing us from having a common PMD layer for all NICs and
>> additional PMD specific to each NIC???

Another minor point is DPDK has a generic API layer for the ethernet NICs it is 
called ethdev with a set of standard APIs for the application. The PMDs 
attached themselves to the ethdev using standardized APIs, which is about as 
close we can get to a standard interface to PMDs/hardware.

>> 
>> Thanks
>> Raman
>> 
> 

Regards,
Keith



Re: [dpdk-dev] [PATCH v2 0/5] minor fixes and cleanup

2017-03-08 Thread Thomas Monjalon
2017-02-23 16:41, Bruce Richardson:
> This patchset contains some minor fixes and improvements which
> I found in the context of preparing a new patchset for the
> rte_ring code. Having these merged separately reduces the
> number of patches in the ring set so should make everyone's
> life easier.
> 
> V2 Changes: new patch added with extra fix for quota_watermark example
> 
> Bruce Richardson (5):
>   app/pdump: fix duplicate macro definition
>   ring: add a function to return the ring size
>   crypto/null: use ring size function
>   examples/quota_watermark: correct code indentation
>   examples/quota_watermark: fix requirement for 2M pages

Applied, thanks


[dpdk-dev] [PATCH v2 02/13] ethdev: add flow API rule copy function

2017-03-08 Thread Gaetan Rivet
Take this helper from the testpmd app to offer it alongside the flow
API.

This allows PMDs and applications to save flow rules in their generic
format for later processing. This is useful when rules cannot be applied
immediately, such as when the device is not properly initialized.

Signed-off-by: Gaetan Rivet 
---
 app/test-pmd/config.c   | 263 +++-
 app/test-pmd/testpmd.h  |   5 +-
 lib/librte_ether/rte_flow.c | 283 
 lib/librte_ether/rte_flow.h |  59 +
 4 files changed, 387 insertions(+), 223 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 54e38c4..f20e258 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -932,208 +932,6 @@ port_mtu_set(portid_t port_id, uint16_t mtu)
printf("Set MTU failed. diag=%d\n", diag);
 }
 
-/* Generic flow management functions. */
-
-/** Generate flow_item[] entry. */
-#define MK_FLOW_ITEM(t, s) \
-   [RTE_FLOW_ITEM_TYPE_ ## t] = { \
-   .name = # t, \
-   .size = s, \
-   }
-
-/** Information about known flow pattern items. */
-static const struct {
-   const char *name;
-   size_t size;
-} flow_item[] = {
-   MK_FLOW_ITEM(END, 0),
-   MK_FLOW_ITEM(VOID, 0),
-   MK_FLOW_ITEM(INVERT, 0),
-   MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
-   MK_FLOW_ITEM(PF, 0),
-   MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
-   MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
-   MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */
-   MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
-   MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
-   MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
-   MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)),
-   MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)),
-   MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)),
-   MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
-   MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
-   MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
-};
-
-/** Compute storage space needed by item specification. */
-static void
-flow_item_spec_size(const struct rte_flow_item *item,
-   size_t *size, size_t *pad)
-{
-   if (!item->spec)
-   goto empty;
-   switch (item->type) {
-   union {
-   const struct rte_flow_item_raw *raw;
-   } spec;
-
-   case RTE_FLOW_ITEM_TYPE_RAW:
-   spec.raw = item->spec;
-   *size = offsetof(struct rte_flow_item_raw, pattern) +
-   spec.raw->length * sizeof(*spec.raw->pattern);
-   break;
-   default:
-empty:
-   *size = 0;
-   break;
-   }
-   *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
-}
-
-/** Generate flow_action[] entry. */
-#define MK_FLOW_ACTION(t, s) \
-   [RTE_FLOW_ACTION_TYPE_ ## t] = { \
-   .name = # t, \
-   .size = s, \
-   }
-
-/** Information about known flow actions. */
-static const struct {
-   const char *name;
-   size_t size;
-} flow_action[] = {
-   MK_FLOW_ACTION(END, 0),
-   MK_FLOW_ACTION(VOID, 0),
-   MK_FLOW_ACTION(PASSTHRU, 0),
-   MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
-   MK_FLOW_ACTION(FLAG, 0),
-   MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
-   MK_FLOW_ACTION(DROP, 0),
-   MK_FLOW_ACTION(COUNT, 0),
-   MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
-   MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
-   MK_FLOW_ACTION(PF, 0),
-   MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
-};
-
-/** Compute storage space needed by action configuration. */
-static void
-flow_action_conf_size(const struct rte_flow_action *action,
- size_t *size, size_t *pad)
-{
-   if (!action->conf)
-   goto empty;
-   switch (action->type) {
-   union {
-   const struct rte_flow_action_rss *rss;
-   } conf;
-
-   case RTE_FLOW_ACTION_TYPE_RSS:
-   conf.rss = action->conf;
-   *size = offsetof(struct rte_flow_action_rss, queue) +
-   conf.rss->num * sizeof(*conf.rss->queue);
-   break;
-   default:
-empty:
-   *size = 0;
-   break;
-   }
-   *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
-}
-
-/** Generate a port_flow entry from attributes/pattern/actions. */
-static struct port_flow *
-port_flow_new(const struct rte_flow_attr *attr,
- const struct rte_flow_item *pattern,
- const struct rte_flow_action *actions)
-{
-   const struct rte_flow_item *item;
-   const struct rte_flow_action *action;
-   struct 

[dpdk-dev] [PATCH v2 04/13] pci: expose device detach routine

2017-03-08 Thread Gaetan Rivet
Make the pci_detach_all_drivers public. No further changes.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  5 +
 lib/librte_eal/common/eal_common_pci.c  |  6 +++---
 lib/librte_eal/common/include/rte_pci.h | 15 +++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  5 +
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 2cf1ac8..3a87756 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -185,3 +185,8 @@ DPDK_17.02 {
rte_bus_unregister;
 
 } DPDK_16.11;
+
+DPDK_17.05 {
+   rte_eal_pci_detach_all_drivers;
+
+} DPDK_17.02;
diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 72547bd..d38335a 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -313,8 +313,8 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
  * registered driver for the given device. Return -1 if initialization
  * failed, return 1 if no driver is found for this device.
  */
-static int
-pci_detach_all_drivers(struct rte_pci_device *dev)
+int
+rte_eal_pci_detach_all_drivers(struct rte_pci_device *dev)
 {
struct rte_pci_driver *dr = NULL;
int rc = 0;
@@ -388,7 +388,7 @@ rte_eal_pci_detach(const struct rte_pci_addr *addr)
if (rte_eal_compare_pci_addr(&dev->addr, addr))
continue;
 
-   ret = pci_detach_all_drivers(dev);
+   ret = rte_eal_pci_detach_all_drivers(dev);
if (ret < 0)
goto err_return;
 
diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 4a67883..598a1ef 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -459,6 +459,21 @@ void pci_unmap_resource(void *requested_addr, size_t size);
 int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);
 
 /**
+ * Remove any PCI drivers tied to the device.
+ *
+ * If vendor/device ID match, call the remove() function of all
+ * registered driver for the given device.
+ *
+ * @param dev
+ * The PCI device to remove
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ *   - Positive if no driver exists for that device.
+ */
+int rte_eal_pci_detach_all_drivers(struct rte_pci_device *dev);
+
+/**
  * Close the single PCI device.
  *
  * Scan the content of the PCI bus, and find the pci device specified by pci
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 3c68ff5..192c0d5 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -189,3 +189,8 @@ DPDK_17.02 {
rte_bus_unregister;
 
 } DPDK_16.11;
+
+DPDK_17.05 {
+   rte_eal_pci_detach_all_drivers;
+
+} DPDK_17.02;
-- 
2.1.4



[dpdk-dev] [PATCH v2 05/13] pci: expose parse and probe routines

2017-03-08 Thread Gaetan Rivet
Make pci_probe_all_drivers public, no further changes to it.
Introduce a public function for pci_scan_one. This functions scan one
device, but does not allocate that device or insert it within the device
list.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  1 +
 lib/librte_eal/common/eal_common_pci.c  | 10 +++
 lib/librte_eal/common/include/rte_pci.h | 25 
 lib/librte_eal/linuxapp/eal/eal_pci.c   | 39 +++--
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  2 ++
 5 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 3a87756..ba8f8e4 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -188,5 +188,6 @@ DPDK_17.02 {
 
 DPDK_17.05 {
rte_eal_pci_detach_all_drivers;
+   rte_eal_pci_probe_all_drivers;
 
 } DPDK_17.02;
diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index d38335a..15a0c48 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -282,8 +282,8 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
  * registered driver for the given device. Return -1 if initialization
  * failed, return 1 if no driver is found for this device.
  */
-static int
-pci_probe_all_drivers(struct rte_pci_device *dev)
+int
+rte_eal_pci_probe_all_drivers(struct rte_pci_device *dev)
 {
struct rte_pci_driver *dr = NULL;
int rc = 0;
@@ -358,7 +358,7 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
if (rte_eal_compare_pci_addr(&dev->addr, addr))
continue;
 
-   ret = pci_probe_all_drivers(dev);
+   ret = rte_eal_pci_probe_all_drivers(dev);
if (ret)
goto err_return;
return 0;
@@ -430,10 +430,10 @@ rte_eal_pci_probe(void)
 
/* probe all or only whitelisted devices */
if (probe_all)
-   ret = pci_probe_all_drivers(dev);
+   ret = rte_eal_pci_probe_all_drivers(dev);
else if (devargs != NULL &&
devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
-   ret = pci_probe_all_drivers(dev);
+   ret = rte_eal_pci_probe_all_drivers(dev);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT
 " cannot be used\n", dev->addr.domain, 
dev->addr.bus,
diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 598a1ef..4291da8 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -373,6 +373,16 @@ rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
 int rte_eal_pci_scan(void);
 
 /**
+ * Parse the content of one PCI entry.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_pci_parse_sysfs_entry(struct rte_pci_device *dev,
+const char *dirname,
+const struct rte_pci_addr *addr);
+
+/**
  * Probe the PCI bus for registered drivers.
  *
  * Scan the content of the PCI bus, and call the probe() function for
@@ -459,6 +469,21 @@ void pci_unmap_resource(void *requested_addr, size_t size);
 int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);
 
 /**
+ * Probe all pci drivers against the device.
+ *
+ * If vendor/device ID match, call the probe() function of all
+ * registered driver for the given device.
+ *
+ * @param dev
+ * The PCI device to probe
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ *   - Positive if no driver exists for that device.
+ */
+int rte_eal_pci_probe_all_drivers(struct rte_pci_device *dev);
+
+/**
  * Remove any PCI drivers tied to the device.
  *
  * If vendor/device ID match, call the remove() function of all
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index e2fc219..51c6b84 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -226,27 +226,22 @@ pci_parse_sysfs_resource(const char *filename, struct 
rte_pci_device *dev)
return -1;
 }
 
-/* Scan one pci sysfs entry, and fill the devices list from it. */
-static int
-pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
+/* Parse a pci sysfs entry */
+int
+rte_eal_pci_parse_sysfs_entry(struct rte_pci_device *dev, const char *dirname,
+   const struct rte_pci_addr *addr)
 {
char filename[PATH_MAX];
unsigned long tmp;
-   struct rte_pci_device *dev;
char driver[PATH_MAX];
int ret;
 
-   dev = malloc(sizeof(*dev));
-   if (dev == NULL)
-   return -1;
-

[dpdk-dev] [PATCH v2 06/13] net/failsafe: add fail-safe PMD

2017-03-08 Thread Gaetan Rivet
Introduce the fail-safe poll mode driver initialization and enable its
build infrastructure.

This PMD allows for applications to benefits from true hot-plugging
support without having to implement it.

It intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back.
It also allows defining a contingency to the removal of a device, by
designating a fail-over device that will take on transmitting operations
if the preferred device is removed.

Applications only see a fail-safe instance, without caring for
underlying activity ensuring their continued operations.

Signed-off-by: Gaetan Rivet 
Acked-by: Olga Shern 
---
 MAINTAINERS |   5 +
 config/common_base  |   5 +
 doc/guides/nics/fail_safe.rst   | 133 +++
 doc/guides/nics/features/failsafe.ini   |  24 ++
 doc/guides/nics/index.rst   |   1 +
 drivers/net/Makefile|   1 +
 drivers/net/failsafe/Makefile   |  72 
 drivers/net/failsafe/failsafe.c | 229 +++
 drivers/net/failsafe/failsafe_args.c| 347 
 drivers/net/failsafe/failsafe_eal.c | 318 +++
 drivers/net/failsafe/failsafe_ops.c | 677 
 drivers/net/failsafe/failsafe_private.h | 232 +++
 drivers/net/failsafe/failsafe_rxtx.c| 107 +
 mk/rte.app.mk   |   1 +
 14 files changed, 2152 insertions(+)
 create mode 100644 doc/guides/nics/fail_safe.rst
 create mode 100644 doc/guides/nics/features/failsafe.ini
 create mode 100644 drivers/net/failsafe/Makefile
 create mode 100644 drivers/net/failsafe/failsafe.c
 create mode 100644 drivers/net/failsafe/failsafe_args.c
 create mode 100644 drivers/net/failsafe/failsafe_eal.c
 create mode 100644 drivers/net/failsafe/failsafe_ops.c
 create mode 100644 drivers/net/failsafe/failsafe_private.h
 create mode 100644 drivers/net/failsafe/failsafe_rxtx.c

diff --git a/MAINTAINERS b/MAINTAINERS
index cc3bf98..ab9ed0c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -315,6 +315,11 @@ M: Matej Vido 
 F: drivers/net/szedata2/
 F: doc/guides/nics/szedata2.rst
 
+Fail-safe PMD
+M: Gaetan Rivet 
+F: drivers/net/failsafe/
+F: doc/guides/nics/fail_safe.rst
+
 Intel e1000
 M: Wenzhuo Lu 
 F: drivers/net/e1000/
diff --git a/config/common_base b/config/common_base
index 71a4fcb..ae64a5b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -364,6 +364,11 @@ CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
 CONFIG_RTE_LIBRTE_PMD_NULL=y
 
 #
+# Compile fail-safe PMD
+#
+CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
new file mode 100644
index 000..056f85f
--- /dev/null
+++ b/doc/guides/nics/fail_safe.rst
@@ -0,0 +1,133 @@
+..  BSD LICENSE
+Copyright 2017 6WIND S.A.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+* Neither the name of 6WIND S.A. nor the names of its
+contributors may be used to endorse or promote products derived
+from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Fail-safe poll mode driver library
+==
+
+The Fail-safe poll mode driver library (**librte_pmd_failsafe**) is a virtual
+device that allows using any device supporting hotplug (sudden device removal
+and plugging on its bus), without modifying other components relying on such
+device (application, other PMDs).
+
+Additionally to the Seamless Hotplug feature, the Fail-safe PMD offers the
+ability to redirect operations to secondary devices when the

[dpdk-dev] [PATCH v2 08/13] net/failsafe: add flexible device definition

2017-03-08 Thread Gaetan Rivet
Add the "exec" device type.
The parameters given to this type of device will be executed in a shell.
The output of this command is then used as a definition for a device.

That command can be re-interpreted if the related device is not
plugged-in. It allows for a device definition to react to system
changes (e.g. changing PCI bus for a given device).

Signed-off-by: Gaetan Rivet 
Acked-by: Olga Shern 
---
 doc/guides/nics/fail_safe.rst   |  16 +
 drivers/net/failsafe/failsafe_args.c| 102 
 drivers/net/failsafe/failsafe_ether.c   |   7 +++
 drivers/net/failsafe/failsafe_private.h |   4 ++
 4 files changed, 129 insertions(+)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 74bd807..bb8a221 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -91,6 +91,15 @@ Fail-safe command line parameters
   additional sub-device parameters if need be. They will be passed on to the
   sub-device.
 
+- **exec()** parameter
+
+  This parameter allows the user to provide a command to the fail-safe PMD to
+  execute and define a sub-device. It is done within a regular shell context.
+  The first line of its output is read by the fail-safe PMD and otherwise
+  interpreted as if passed by the regular **dev** parameter. Any other line is
+  discarded. If the command fail or output an incorrect string, the sub-device
+  is not initialized.
+
 - **mac** parameter [MAC address]
 
   This parameter allows the user to set a default MAC address to the fail-safe
@@ -126,6 +135,13 @@ This section shows some example of using **testpmd** with 
a fail-safe PMD.
  
--vdev='net_failsafe0,mac=de:ad:be:ef:01:02,dev(84:00.0),dev(net_ring0,nodeaction=r1:0:CREATE)'
 -- \
  -i
 
+#. Start testpmd using a flexible device definition
+
+   .. code-block:: console
+
+  $RTE_TARGET/build/app/testpmd -c 0xff -n 4 --no-pci \
+ --vdev='net_failsafe0,exec(echo 84:00.0)' -- -i
+
 Using the Fail-safe PMD from an application
 ---
 
diff --git a/drivers/net/failsafe/failsafe_args.c 
b/drivers/net/failsafe/failsafe_args.c
index 773b322..839831f 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -112,6 +112,80 @@ parse_device(struct sub_device *sdev, char *args)
return 0;
 }
 
+static void
+sanitize_cmdline(char *args)
+{
+   size_t len;
+
+   len = strnlen(args, DEVARGS_MAXLEN);
+   args[len - 1] = '\0';
+}
+
+static int
+execute_cmd(struct sub_device *sdev, char *cmdline)
+{
+   FILE *fp;
+   /* store possible newline as well */
+   char output[DEVARGS_MAXLEN + 1];
+   size_t len;
+   int old_err;
+   int ret;
+
+   DEBUG("'%s'", cmdline);
+   if (cmdline == NULL &&
+   sdev->cmdline == NULL) {
+   /* debug: should never happen to a user */
+   DEBUG("Invalid command line");
+   return -EINVAL;
+   }
+   if (sdev->cmdline == NULL) {
+   char *new_str;
+
+   len = strlen(cmdline) + 1;
+   new_str = rte_realloc(sdev->cmdline, len,
+   RTE_CACHE_LINE_SIZE);
+   if (new_str == NULL) {
+   ERROR("Command line allocation failed");
+   return -ENOMEM;
+   }
+   sdev->cmdline = new_str;
+   snprintf(sdev->cmdline, len, "%s", cmdline);
+   } else {
+   if (strcmp(sdev->cmdline, cmdline))
+   DEBUG("cmd mismatch: '%s' != '%s'",
+   sdev->cmdline, cmdline);
+   cmdline = sdev->cmdline;
+   }
+   old_err = errno;
+   fp = popen(cmdline, "r");
+   if (fp == NULL) {
+   ret = errno;
+   ERROR("popen: %s", strerror(errno));
+   errno = old_err;
+   return ret;
+   }
+   /* We only read one line */
+   if (fgets(output, sizeof(output) - 1, fp) == NULL) {
+   DEBUG("Could not read command output");
+   return -ENODEV;
+   }
+   sanitize_cmdline(output);
+   ret = parse_device(sdev, output);
+   if (ret) {
+   ERROR("Parsing device '%s' failed", output);
+   goto ret_pclose;
+   }
+ret_pclose:
+   ret = pclose(fp);
+   if (ret) {
+   ret = errno;
+   ERROR("pclose: %s", strerror(errno));
+   errno = old_err;
+   return ret;
+   }
+   return ret;
+}
+
 static int
 parse_device_param(struct rte_eth_dev *dev, const char *param,
uint8_t head)
@@ -146,6 +220,14 @@ parse_device_param(struct rte_eth_dev *dev, const char 
*param,
ret = parse_device(sdev, args);
if (ret)
goto free_args;
+   } else if (strncmp(param, "exec", 4) == 0) {
+   ret = execute_

[dpdk-dev] [PATCH v2 10/13] net/failsafe: support offload capabilities

2017-03-08 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
Acked-by: Olga Shern 
---
 doc/guides/nics/features/failsafe.ini |   6 ++
 drivers/net/failsafe/failsafe_ops.c   | 125 +-
 2 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/features/failsafe.ini 
b/doc/guides/nics/features/failsafe.ini
index 9167b59..257f579 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -14,6 +14,12 @@ Unicast MAC filter   = Y
 Multicast MAC filter = Y
 VLAN filter  = Y
 Flow API = Y
+VLAN offload = Y
+QinQ offload = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
+Inner L3 checksum= Y
+Inner L4 checksum= Y
 Packet type parsing  = Y
 Basic stats  = Y
 Stats per queue  = Y
diff --git a/drivers/net/failsafe/failsafe_ops.c 
b/drivers/net/failsafe/failsafe_ops.c
index d837280..2a4d102 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -63,22 +63,143 @@ static struct rte_eth_dev_info default_infos = {
.nb_seg_max = UINT16_MAX,
.nb_mtu_seg_max = UINT16_MAX,
},
-   /* Set of understood capabilities */
-   .rx_offload_capa = 0x0,
+   /*
+* Set of capabilities that can be verified upon
+* configuring a sub-device.
+*/
+   .rx_offload_capa =
+   DEV_RX_OFFLOAD_VLAN_STRIP |
+   DEV_RX_OFFLOAD_QINQ_STRIP |
+   DEV_RX_OFFLOAD_IPV4_CKSUM |
+   DEV_RX_OFFLOAD_UDP_CKSUM |
+   DEV_RX_OFFLOAD_TCP_CKSUM |
+   DEV_RX_OFFLOAD_TCP_LRO,
.tx_offload_capa = 0x0,
.flow_type_rss_offloads = 0x0,
 };
 
+/**
+ * Check whether a specific offloading capability
+ * is supported by a sub_device.
+ *
+ * @return
+ *   0: all requested capabilities are supported by the sub_device
+ *   positive value: This flag at least is not supported by the sub_device
+ */
+static int
+fs_port_offload_validate(struct rte_eth_dev *dev,
+struct sub_device *sdev)
+{
+   struct rte_eth_dev_info infos = {0};
+   struct rte_eth_conf *cf;
+   uint32_t cap;
+
+   cf = &dev->data->dev_conf;
+   SUBOPS(sdev, dev_infos_get)(ETH(sdev), &infos);
+   /* RX capabilities */
+   cap = infos.rx_offload_capa;
+   if (cf->rxmode.hw_vlan_strip &&
+   ((cap & DEV_RX_OFFLOAD_VLAN_STRIP) == 0)) {
+   WARN("VLAN stripping offload requested but not supported by 
sub_device %d",
+ SUB_ID(sdev));
+   return DEV_RX_OFFLOAD_VLAN_STRIP;
+   }
+   if (cf->rxmode.hw_ip_checksum &&
+   ((cap & (DEV_RX_OFFLOAD_IPV4_CKSUM |
+DEV_RX_OFFLOAD_UDP_CKSUM |
+DEV_RX_OFFLOAD_TCP_CKSUM)) !=
+(DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM))) {
+   WARN("IP checksum offload requested but not supported by 
sub_device %d",
+ SUB_ID(sdev));
+   return DEV_RX_OFFLOAD_IPV4_CKSUM |
+  DEV_RX_OFFLOAD_UDP_CKSUM |
+  DEV_RX_OFFLOAD_TCP_CKSUM;
+   }
+   if (cf->rxmode.enable_lro &&
+   ((cap & DEV_RX_OFFLOAD_TCP_LRO) == 0)) {
+   WARN("TCP LRO offload requested but not supported by sub_device 
%d",
+ SUB_ID(sdev));
+   return DEV_RX_OFFLOAD_TCP_LRO;
+   }
+   if (cf->rxmode.hw_vlan_extend &&
+   ((cap & DEV_RX_OFFLOAD_QINQ_STRIP) == 0)) {
+   WARN("Stacked VLAN stripping offload requested but not 
supported by sub_device %d",
+ SUB_ID(sdev));
+   return DEV_RX_OFFLOAD_QINQ_STRIP;
+   }
+   /* TX capabilities */
+   /* Nothing to do, no tx capa supported */
+   return 0;
+}
+
+/*
+ * Disable the dev_conf flag related to an offload capability flag
+ * within an ethdev configuration.
+ */
+static int
+fs_port_disable_offload(struct rte_eth_conf *cf,
+   uint32_t ol_cap)
+{
+   switch (ol_cap) {
+   case DEV_RX_OFFLOAD_VLAN_STRIP:
+   INFO("Disabling VLAN stripping offload");
+   cf->rxmode.hw_vlan_strip = 0;
+   break;
+   case DEV_RX_OFFLOAD_IPV4_CKSUM:
+   case DEV_RX_OFFLOAD_UDP_CKSUM:
+   case DEV_RX_OFFLOAD_TCP_CKSUM:
+   case (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM):
+   INFO("Disabling IP checksum offload");
+   cf->rxmode.hw_ip_checksum = 0;
+   break;
+   case DEV_RX_OFFLOAD_TCP_LRO:
+   INFO("Disabling TCP LRO offload");
+   cf->rxmode.enable_lro = 0;
+   break;
+   case DEV_RX_OFFLOAD_QINQ_STRIP:
+   INFO("Disabling stacked VLAN stripping offload");
+   cf->rxmode.hw_vlan_extend =

[dpdk-dev] [PATCH v2 11/13] net/failsafe: add fast burst functions

2017-03-08 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
Acked-by: Olga Shern 
---
 drivers/net/failsafe/failsafe_private.h |   8 +++
 drivers/net/failsafe/failsafe_rxtx.c| 117 ++--
 2 files changed, 105 insertions(+), 20 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_private.h 
b/drivers/net/failsafe/failsafe_private.h
index c636199..faf0e71 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -148,11 +148,18 @@ int failsafe_plugin_alarm_cancel(struct rte_eth_dev *dev);
 
 /* RX / TX */
 
+void set_burst_fn(struct rte_eth_dev *dev);
+
 uint16_t failsafe_rx_burst(void *rxq,
struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 uint16_t failsafe_tx_burst(void *txq,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 
+uint16_t failsafe_rx_burst_fast(void *rxq,
+   struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t failsafe_tx_burst_fast(void *txq,
+   struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
 /* ARGS */
 
 int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
@@ -306,6 +313,7 @@ fs_switch_dev(struct rte_eth_dev *dev)
} else {
return;
}
+   set_burst_fn(dev);
 }
 
 #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */
diff --git a/drivers/net/failsafe/failsafe_rxtx.c 
b/drivers/net/failsafe/failsafe_rxtx.c
index a45b4e5..796a9ad 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -36,11 +36,53 @@
 
 #include "failsafe_private.h"
 
-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+static inline int
+sdev_rx_unsafe(struct sub_device *sdev)
+{
+   return (ETH(sdev) == NULL) ||
+   (ETH(sdev)->rx_pkt_burst == NULL) ||
+   (sdev->state != DEV_STARTED);
+}
+
+static inline int
+sdev_tx_unsafe(struct sub_device *sdev)
+{
+   return (sdev == NULL) ||
+   (ETH(sdev) == NULL) ||
+   (ETH(sdev)->tx_pkt_burst == NULL) ||
+   (sdev->state != DEV_STARTED);
+}
+
+void
+set_burst_fn(struct rte_eth_dev *dev)
+{
+   struct sub_device *sdev;
+   uint8_t i;
+   int need_safe;
+   int safe_set;
+
+   need_safe = 0;
+   FOREACH_SUBDEV(sdev, i, dev)
+   need_safe |= sdev_rx_unsafe(sdev);
+   safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
+   if (need_safe && !safe_set) {
+   DEBUG("Using safe RX bursts");
+   dev->rx_pkt_burst = &failsafe_rx_burst;
+   } else if (!need_safe && safe_set) {
+   DEBUG("Using fast RX bursts");
+   dev->rx_pkt_burst = &failsafe_rx_burst_fast;
+   }
+   need_safe = sdev_tx_unsafe(TX_SUBDEV(dev));
+   safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
+   if (need_safe && !safe_set) {
+   DEBUG("Using safe TX bursts");
+   dev->tx_pkt_burst = &failsafe_tx_burst;
+   } else if (!need_safe && safe_set) {
+   DEBUG("Using fast TX bursts");
+   dev->tx_pkt_burst = &failsafe_tx_burst_fast;
+   }
+}
+
 uint16_t
 failsafe_rx_burst(void *queue,
  struct rte_mbuf **rx_pkts,
@@ -63,11 +105,7 @@ failsafe_rx_burst(void *queue,
if (i == priv->subs_tail)
i = priv->subs_head;
sdev = &priv->subs[i];
-   if (unlikely(ETH(sdev) == NULL))
-   continue;
-   if (unlikely(ETH(sdev)->rx_pkt_burst == NULL))
-   continue;
-   if (unlikely(sdev->state != DEV_STARTED))
+   if (unlikely(sdev_rx_unsafe(sdev)))
continue;
sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
nb_rx = ETH(sdev)->
@@ -80,11 +118,39 @@ failsafe_rx_burst(void *queue,
return 0;
 }
 
-/*
- * TODO: write fast version,
- * without additional checks, to be activated once
- * everything has been verified to comply.
- */
+uint16_t
+failsafe_rx_burst_fast(void *queue,
+struct rte_mbuf **rx_pkts,
+uint16_t nb_pkts)
+{
+   struct fs_priv *priv;
+   struct sub_device *sdev;
+   struct rxq *rxq;
+   void *sub_rxq;
+   uint16_t nb_rx;
+   uint8_t nb_polled, nb_subs;
+   uint8_t i;
+
+   rxq = queue;
+   priv = rxq->priv;
+   nb_subs = priv->subs_tail - priv->subs_head;
+   nb_polled = 0;
+   for (i = rxq->last_polled; nb_polled < nb_subs; nb_polled++) {
+   i++;
+   if (i == priv->subs_tail)
+   i = priv->subs_head;
+   sdev = &priv->subs[i];
+   sub_rxq = ETH(sdev)->data->rx_queues[rxq->qid];
+   nb_rx = ETH(sdev)->
+   rx_pkt_burst(sub_rxq, rx_pkts, nb_pkts);
+   if (nb_rx) {
+   rxq->last_polled = i;
+  

[dpdk-dev] [PATCH v2 12/13] net/failsafe: support device removal

2017-03-08 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
Acked-by: Olga Shern 
---
 doc/guides/nics/fail_safe.rst   | 14 +++
 drivers/net/failsafe/failsafe_args.c| 22 +++
 drivers/net/failsafe/failsafe_eal.c |  2 +
 drivers/net/failsafe/failsafe_ether.c   | 67 -
 drivers/net/failsafe/failsafe_ops.c | 21 +++
 drivers/net/failsafe/failsafe_private.h |  7 
 6 files changed, 132 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index bb8a221..8811ed3 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -51,6 +51,12 @@ The Fail-safe PMD only supports a limited set of features. 
If you plan to use a
 device underneath the Fail-safe PMD with a specific feature, this feature must
 be supported by the Fail-safe PMD to avoid throwing any error.
 
+A notable exception is the device removal feature. The fail-safe PMD being a
+virtual device, it cannot currently be removed in the sense of a specific bus
+hotplug, like for PCI for example. It will however enable this feature for its
+sub-device automatically, detecting those that are capable and register the
+relevant callback for such event.
+
 Check the feature matrix for the complete set of supported features.
 
 Compilation options
@@ -166,3 +172,11 @@ emit and receive packets. It will store any applied 
configuration, and try to
 apply it upon the probing of its missing sub-device. After this configuration
 pass, the new sub-device will be synchronized with other sub-devices, i.e. be
 started if the fail-safe PMD has been started by the user before.
+
+Plug-out feature
+
+
+A sub-device supporting the device removal event can be removed from its bus at
+any time. The fail-safe PMD will register a callback for such event and react
+accordingly. It will try to safely stop, close and uninit the sub-device having
+emitted this event, allowing it to free its eventual resources.
diff --git a/drivers/net/failsafe/failsafe_args.c 
b/drivers/net/failsafe/failsafe_args.c
index 839831f..62033c4 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -462,6 +462,26 @@ failsafe_args_count_subdevice(struct rte_eth_dev *dev,
dev, params);
 }
 
+static int
+parse_sub_device(struct sub_device *sdev)
+{
+   struct rte_devargs *da;
+   char params[DEVARGS_MAXLEN] = "";
+
+   da = &sdev->devargs;
+   if (da->type == RTE_DEVTYPE_VIRTUAL)
+   snprintf(params, sizeof(params) - 1,
+   "%s,%s", da->virt.drv_name, da->args);
+   else
+   snprintf(params, sizeof(params) - 1,
+   PCI_PRI_FMT ",%s",
+   da->pci.addr.domain, da->pci.addr.bus,
+   da->pci.addr.devid, da->pci.addr.function,
+   da->args);
+
+   return parse_device(sdev, params);
+}
+
 int
 failsafe_args_parse_subs(struct rte_eth_dev *dev)
 {
@@ -474,6 +494,8 @@ failsafe_args_parse_subs(struct rte_eth_dev *dev)
continue;
if (sdev->cmdline)
ret = execute_cmd(sdev, sdev->cmdline);
+   else
+   ret = parse_sub_device(sdev);
if (ret == 0)
sdev->state = DEV_PARSED;
}
diff --git a/drivers/net/failsafe/failsafe_eal.c 
b/drivers/net/failsafe/failsafe_eal.c
index 9817fc9..8bb8d45 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -140,6 +140,7 @@ dev_init(struct rte_eth_dev *dev)
}
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
SUB_ID(sdev) = i;
+   sdev->fs_dev = dev;
sdev->state = DEV_PROBED;
}
}
@@ -191,6 +192,7 @@ pci_probe(struct rte_eth_dev *dev)
}
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
SUB_ID(sdev) = i;
+   sdev->fs_dev = dev;
sdev->state = DEV_PROBED;
}
}
diff --git a/drivers/net/failsafe/failsafe_ether.c 
b/drivers/net/failsafe/failsafe_ether.c
index 8c73b4c..f12b8d7 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -33,6 +33,7 @@
 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -256,6 +257,43 @@ eth_dev_conf_apply(struct rte_eth_dev *dev,
return 0;
 }
 
+static void
+fs_dev_remove(void *arg)
+{
+   struct sub_device *sdev = arg;
+   struct rte_devargs *da;
+   struct rte_pci_device *pdev;
+
+   switch (sdev->state) {
+   case DEV_STARTED:
+   rte_eth_dev_stop(PORT_ID(sdev));
+   sdev->state = DEV_ACTIVE;
+   /* fallthrough */
+   case DEV_ACTIVE:
+   rte_eth_dev_close(PORT_ID(sdev));
+

[dpdk-dev] [PATCH v2 13/13] net/failsafe: support link status change event

2017-03-08 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
---
 doc/guides/nics/features/failsafe.ini   |  1 +
 drivers/net/failsafe/failsafe.c |  1 +
 drivers/net/failsafe/failsafe_ether.c   | 18 ++
 drivers/net/failsafe/failsafe_ops.c | 24 
 drivers/net/failsafe/failsafe_private.h |  3 +++
 5 files changed, 47 insertions(+)

diff --git a/doc/guides/nics/features/failsafe.ini 
b/doc/guides/nics/features/failsafe.ini
index 257f579..251ce55 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -5,6 +5,7 @@
 ;
 [Features]
 Link status  = Y
+Link status event= Y
 Queue start/stop = Y
 MTU update   = Y
 Jumbo frame  = Y
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 6151736..f885c19 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -238,6 +238,7 @@ eth_dev_create(const char *name,
mac->addr_bytes[0], mac->addr_bytes[1],
mac->addr_bytes[2], mac->addr_bytes[3],
mac->addr_bytes[4], mac->addr_bytes[5]);
+   dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
return 0;
 free_args:
failsafe_args_free(dev);
diff --git a/drivers/net/failsafe/failsafe_ether.c 
b/drivers/net/failsafe/failsafe_ether.c
index f12b8d7..5c2e118 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -374,3 +374,21 @@ failsafe_eth_rmv_event_callback(uint8_t port_id 
__rte_unused,
  cb_arg))
ERROR("Could not set up deferred sub_device removal");
 }
+
+void
+failsafe_eth_lsc_event_callback(uint8_t port_id __rte_unused,
+   enum rte_eth_event_type event,
+   void *cb_arg)
+{
+   struct rte_eth_dev *dev = cb_arg;
+   int ret;
+
+   if (event != RTE_ETH_EVENT_INTR_LSC) {
+   ERROR("Incorrect event");
+   return;
+   }
+   ret = dev->dev_ops->link_update(dev, 0);
+   /* We must pass on the LSC event */
+   if (ret)
+   _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, 
NULL);
+}
diff --git a/drivers/net/failsafe/failsafe_ops.c 
b/drivers/net/failsafe/failsafe_ops.c
index 8d0e7a2..695e7b3 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -199,6 +199,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
}
FOREACH_SUBDEV(sdev, i, dev) {
int rmv_interrupt = 0;
+   int lsc_interrupt = 0;
+   int lsc_enabled;
 
if (sdev->state != DEV_PROBED)
continue;
@@ -211,6 +213,18 @@ fs_dev_configure(struct rte_eth_dev *dev)
} else {
DEBUG("sub_device %d does not support RMV event", i);
}
+
+   lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
+   lsc_interrupt = lsc_enabled &&
+   (ETH(sdev)->data->dev_flags &
+RTE_ETH_DEV_INTR_LSC);
+   if (lsc_interrupt) {
+   DEBUG("Enabling LSC interrupts for sub_device %d", i);
+   dev->data->dev_conf.intr_conf.lsc = 1;
+   } else if (lsc_enabled && !lsc_interrupt) {
+   DEBUG("Disabling LSC interrupts for sub_device %d", i);
+   dev->data->dev_conf.intr_conf.lsc = 0;
+   }
DEBUG("Configuring sub-device %d", i);
ret = rte_eth_dev_configure(PORT_ID(sdev),
dev->data->nb_rx_queues,
@@ -230,6 +244,16 @@ fs_dev_configure(struct rte_eth_dev *dev)
 SUB_ID(sdev));
}
dev->data->dev_conf.intr_conf.rmv = 0;
+   if (lsc_interrupt) {
+   ret = rte_eth_dev_callback_register(PORT_ID(sdev),
+   RTE_ETH_EVENT_INTR_LSC,
+   
failsafe_eth_lsc_event_callback,
+   dev);
+   if (ret)
+   WARN("Failed to register LSC callback for 
sub_device %d",
+SUB_ID(sdev));
+   }
+   dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
sdev->state = DEV_ACTIVE;
}
if (PRIV(dev)->state < DEV_ACTIVE)
diff --git a/drivers/net/failsafe/failsafe_private.h 
b/drivers/net/failsafe/failsafe_private.h
index 5efd084..27e2a0c 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -182,6 +182,9 @@ int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
 void failsafe_eth_rmv_event_callback(uint8_t port_id,
 enum rt

[dpdk-dev] [PATCH v2 01/13] ethdev: save VLAN filter setting

2017-03-08 Thread Gaetan Rivet
Other configuration items (i.e. MAC addresses) are stored within
rte_eth_dev_data, but not this one.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_ether/rte_ethdev.c | 19 ++-
 lib/librte_ether/rte_ethdev.h | 10 ++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 6c4b796..61a63b7 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1764,6 +1764,7 @@ int
 rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
 {
struct rte_eth_dev *dev;
+   int ret;
 
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
@@ -1779,7 +1780,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t 
vlan_id, int on)
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);
 
-   return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+   ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+   if (ret == 0) {
+   struct rte_vlan_filter_conf *vfc;
+   int vidx;
+   int vbit;
+
+   vfc = &dev->data->vlan_filter_conf;
+   vidx = vlan_id / 64;
+   vbit = vlan_id % 64;
+
+   if (on)
+   vfc->ids[vidx] |= UINT64_C(1) << vbit;
+   else
+   vfc->ids[vidx] &= ~(UINT64_C(1) << vbit);
+   }
+
+   return ret;
 }
 
 int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index ed99660..f01d140 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -375,6 +375,14 @@ enum rte_vlan_type {
 };
 
 /**
+ * A structure used to describe a vlan filter.
+ * If the bit corresponding to a VID is set, such VID is on.
+ */
+struct rte_vlan_filter_conf {
+   uint64_t ids[64];
+};
+
+/**
  * A structure used to configure the Receive Side Scaling (RSS) feature
  * of an Ethernet port.
  * If not NULL, the *rss_key* pointer of the *rss_conf* structure points
@@ -1716,6 +1724,8 @@ struct rte_eth_dev_data {
enum rte_kernel_driver kdrv;/**< Kernel driver passthrough */
int numa_node;  /**< NUMA node connection */
const char *drv_name;   /**< Driver name */
+   struct rte_vlan_filter_conf vlan_filter_conf;
+   /**< VLAN filter configuration. */
 };
 
 /** Device supports hotplug detach */
-- 
2.1.4



[dpdk-dev] [PATCH v2 00/13] introduce fail-safe PMD

2017-03-08 Thread Gaetan Rivet
This PMD intercepts and manages Ethernet device removal events issued by
slave PMDs and re-initializes them transparently when brought back so that
existing applications do not need to be modified to benefit from true
hot-plugging support.

The stacked PMD approach shares many similarities with the bonding PMD but
with a different purpose. While bonding provides the ability to group
several links into a single logical device for enhanced throughput and
supports fail-over at link level, this one manages the sudden disappearance
of the underlying device; it guarantees applications face a valid device in
working order at all times.

Each fail-safe instance is configured to run atop one or several
devices, with one defined as the preferred device. Hot-plug events are
handled on all of them, and Tx is always directed to the preferred device
if present or to the next available failover device (Rx is always performed
on all devices for simplicity).

Moreover, the configured slaves (preferred or failover) do not need to be
present at initialization time and may appear later.

Slaves configuration is continuously synchronized with that of the virtual
device, which exposes their common set of capabilities to the application.
Failure to apply the current configuration state to a slave for any reason
simply reschedules its initialization.

This series depends on the series
[PATCH 0/4] clarify eth_dev state management
[PATCH 0/5] add device removal event

v1 --> v2:

  - Wrote documentation
  - Fixed commit logs, signed-off-by
  - Added LSC event support
  - A few minor fixes

Gaetan Rivet (13):
  ethdev: save VLAN filter setting
  ethdev: add flow API rule copy function
  ethdev: add deferred intermediate device state
  pci: expose device detach routine
  pci: expose parse and probe routines
  net/failsafe: add fail-safe PMD
  net/failsafe: add plug-in support
  net/failsafe: add flexible device definition
  net/failsafe: support flow API
  net/failsafe: support offload capabilities
  net/failsafe: add fast burst functions
  net/failsafe: support device removal
  net/failsafe: support link status change event

 MAINTAINERS |   5 +
 app/test-pmd/config.c   | 263 ++-
 app/test-pmd/testpmd.h  |   5 +-
 config/common_base  |   5 +
 doc/guides/nics/fail_safe.rst   | 182 +
 doc/guides/nics/features/failsafe.ini   |  32 +
 doc/guides/nics/index.rst   |   1 +
 drivers/net/Makefile|   1 +
 drivers/net/failsafe/Makefile   |  74 ++
 drivers/net/failsafe/failsafe.c | 299 
 drivers/net/failsafe/failsafe_args.c| 503 ++
 drivers/net/failsafe/failsafe_eal.c | 300 
 drivers/net/failsafe/failsafe_ether.c   | 394 +++
 drivers/net/failsafe/failsafe_flow.c| 230 +++
 drivers/net/failsafe/failsafe_ops.c | 867 
 drivers/net/failsafe/failsafe_private.h | 329 +
 drivers/net/failsafe/failsafe_rxtx.c| 184 +
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   6 +
 lib/librte_eal/common/eal_common_pci.c  |  16 +-
 lib/librte_eal/common/include/rte_pci.h |  40 ++
 lib/librte_eal/linuxapp/eal/eal_pci.c   |  39 +-
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   7 +
 lib/librte_ether/rte_ethdev.c   |  22 +-
 lib/librte_ether/rte_ethdev.h   |  11 +
 lib/librte_ether/rte_flow.c | 283 
 lib/librte_ether/rte_flow.h |  59 ++
 mk/rte.app.mk   |   1 +
 27 files changed, 3910 insertions(+), 248 deletions(-)
 create mode 100644 doc/guides/nics/fail_safe.rst
 create mode 100644 doc/guides/nics/features/failsafe.ini
 create mode 100644 drivers/net/failsafe/Makefile
 create mode 100644 drivers/net/failsafe/failsafe.c
 create mode 100644 drivers/net/failsafe/failsafe_args.c
 create mode 100644 drivers/net/failsafe/failsafe_eal.c
 create mode 100644 drivers/net/failsafe/failsafe_ether.c
 create mode 100644 drivers/net/failsafe/failsafe_flow.c
 create mode 100644 drivers/net/failsafe/failsafe_ops.c
 create mode 100644 drivers/net/failsafe/failsafe_private.h
 create mode 100644 drivers/net/failsafe/failsafe_rxtx.c

-- 
2.1.4



[dpdk-dev] [PATCH v2 09/13] net/failsafe: support flow API

2017-03-08 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
Acked-by: Olga Shern 
---
 doc/guides/nics/features/failsafe.ini   |   1 +
 drivers/net/failsafe/Makefile   |   1 +
 drivers/net/failsafe/failsafe.c |   1 +
 drivers/net/failsafe/failsafe_eal.c |   2 +
 drivers/net/failsafe/failsafe_ether.c   |  76 +++
 drivers/net/failsafe/failsafe_flow.c| 230 
 drivers/net/failsafe/failsafe_ops.c |  29 
 drivers/net/failsafe/failsafe_private.h |  20 +++
 8 files changed, 360 insertions(+)
 create mode 100644 drivers/net/failsafe/failsafe_flow.c

diff --git a/doc/guides/nics/features/failsafe.ini 
b/doc/guides/nics/features/failsafe.ini
index 3c52823..9167b59 100644
--- a/doc/guides/nics/features/failsafe.ini
+++ b/doc/guides/nics/features/failsafe.ini
@@ -13,6 +13,7 @@ Allmulticast mode= Y
 Unicast MAC filter   = Y
 Multicast MAC filter = Y
 VLAN filter  = Y
+Flow API = Y
 Packet type parsing  = Y
 Basic stats  = Y
 Stats per queue  = Y
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 4567961..a53bb75 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -41,6 +41,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_flow.c
 
 # No exported include files
 
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 2063393..6151736 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -180,6 +180,7 @@ eth_dev_create(const char *name,
dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0];
dev->data->dev_link = eth_link;
PRIV(dev)->nb_mac_addr = 1;
+   TAILQ_INIT(&PRIV(dev)->flow_list);
dev->rx_pkt_burst = (eth_rx_burst_t)&failsafe_rx_burst;
dev->tx_pkt_burst = (eth_tx_burst_t)&failsafe_tx_burst;
if (params == NULL) {
diff --git a/drivers/net/failsafe/failsafe_eal.c 
b/drivers/net/failsafe/failsafe_eal.c
index a5e8c3c..9817fc9 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -139,6 +139,7 @@ dev_init(struct rte_eth_dev *dev)
continue;
}
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
+   SUB_ID(sdev) = i;
sdev->state = DEV_PROBED;
}
}
@@ -189,6 +190,7 @@ pci_probe(struct rte_eth_dev *dev)
continue;
}
ETH(sdev)->state = RTE_ETH_DEV_DEFERRED;
+   SUB_ID(sdev) = i;
sdev->state = DEV_PROBED;
}
}
diff --git a/drivers/net/failsafe/failsafe_ether.c 
b/drivers/net/failsafe/failsafe_ether.c
index a6ccf8f..8c73b4c 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -33,8 +33,46 @@
 
 #include 
 
+#include 
+#include 
+
 #include "failsafe_private.h"
 
+/** Print a message out of a flow error. */
+static int
+fs_flow_complain(struct rte_flow_error *error)
+{
+   static const char *const errstrlist[] = {
+   [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+   [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+   [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+   [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+   [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+   [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+   [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+   [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+   [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+   [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+   [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+   [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+   };
+   const char *errstr;
+   char buf[32];
+   int err = rte_errno;
+
+   if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
+   !errstrlist[error->type])
+   errstr = "unknown type";
+   else
+   errstr = errstrlist[error->type];
+   ERROR("Caught error type %d (%s): %s%s\n",
+   error->type, errstr,
+   error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+   error->cause), buf) : "",
+   error->message ? error->message : "(no stated reason)");
+   return -err;
+}
+
 static int
 eth_dev_conf_apply(struct rte_eth_dev *dev,
struct sub_device *sdev)
@@ -42,6 +80,8 @@ eth_dev_conf_apply(struct rte_eth_dev *dev,

[dpdk-dev] [PATCH v2 03/13] ethdev: add deferred intermediate device state

2017-03-08 Thread Gaetan Rivet
This device state means that the device is managed externally, by
whichever party has set this state (PMD or application).

Note: this new device state is only an information. The related device
structure and operators are still valid and can be used normally.

It is however made private by device management helpers within ethdev,
making the device invisible to applications.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_ether/rte_ethdev.c | 3 ++-
 lib/librte_ether/rte_ethdev.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 61a63b7..7824f87 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -382,7 +382,8 @@ int
 rte_eth_dev_is_valid_port(uint8_t port_id)
 {
if (port_id >= RTE_MAX_ETHPORTS ||
-   rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
+   (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED))
return 0;
else
return 1;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index f01d140..ae1e9e6 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1629,6 +1629,7 @@ struct rte_eth_rxtx_callback {
 enum rte_eth_dev_state {
RTE_ETH_DEV_UNUSED = 0,
RTE_ETH_DEV_ATTACHED,
+   RTE_ETH_DEV_DEFERRED,
 };
 
 /**
-- 
2.1.4



[dpdk-dev] [PATCH v2 07/13] net/failsafe: add plug-in support

2017-03-08 Thread Gaetan Rivet
Periodically check for the existence of a device.
If a device has not been initialized and exists on the system, then it
is probed and configured.

The configuration process strives to synchronize the states between the
plugged-in sub-device and the fail-safe device.

Signed-off-by: Gaetan Rivet 
Acked-by: Olga Shern 
---
 doc/guides/nics/fail_safe.rst   |  19 +++
 drivers/net/failsafe/Makefile   |   1 +
 drivers/net/failsafe/failsafe.c |  68 ++
 drivers/net/failsafe/failsafe_args.c|  32 +
 drivers/net/failsafe/failsafe_eal.c |  30 +
 drivers/net/failsafe/failsafe_ether.c   | 228 
 drivers/net/failsafe/failsafe_ops.c |  25 ++--
 drivers/net/failsafe/failsafe_private.h |  57 +++-
 8 files changed, 418 insertions(+), 42 deletions(-)
 create mode 100644 drivers/net/failsafe/failsafe_ether.c

diff --git a/doc/guides/nics/fail_safe.rst b/doc/guides/nics/fail_safe.rst
index 056f85f..74bd807 100644
--- a/doc/guides/nics/fail_safe.rst
+++ b/doc/guides/nics/fail_safe.rst
@@ -102,6 +102,11 @@ Fail-safe command line parameters
   address is generated, that will be subsequently applied to all sub-device 
once
   they are probed.
 
+- **plug_in_poll** parameter [UINT64] (default **2000**)
+
+  This parameter allows the user to configure the amount of time in 
milliseconds
+  between two sub-device probing attempt.
+
 Usage example
 ~
 
@@ -131,3 +136,17 @@ Care must be taken, however, to respect the **ether** API 
concerning device
 access, and in particular, using the ``RTE_ETH_FOREACH_DEV`` macro to iterate
 over ethernet devices, instead of directly accessing them or by writing one's
 own device iterator.
+
+Plug-in feature
+---
+
+A sub-device can be defined without existing on the system when the fail-safe
+PMD is initialized. Upon probing this device, the fail-safe PMD will detect its
+absence and postpone its use. It will then register for a periodic check on any
+missing sub-device.
+
+During this time, the fail-safe PMD can be used normally, configured and told 
to
+emit and receive packets. It will store any applied configuration, and try to
+apply it upon the probing of its missing sub-device. After this configuration
+pass, the new sub-device will be synchronized with other sub-devices, i.e. be
+started if the fail-safe PMD has been started by the user before.
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index 06199ad..4567961 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -40,6 +40,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_args.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_eal.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ops.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe_ether.c
 
 # No exported include files
 
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index cd60193..2063393 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -79,6 +79,69 @@ sub_device_free(struct rte_eth_dev *dev)
rte_free(PRIV(dev)->subs);
 }
 
+static void failsafe_plugin_alarm(void *arg);
+
+int
+failsafe_plugin_alarm_install(struct rte_eth_dev *dev)
+{
+   struct sub_device *sdev;
+   uint8_t i;
+   int ret;
+
+   if (PRIV(dev)->pending_alarm)
+   return 0;
+   FOREACH_SUBDEV(sdev, i, dev)
+   if (sdev->state != PRIV(dev)->state)
+   break;
+   if (i != PRIV(dev)->subs_tail) {
+   ret = rte_eal_alarm_set(plug_in_poll * 1000,
+   failsafe_plugin_alarm,
+   dev);
+   if (ret) {
+   ERROR("Could not set up plug-in event detection");
+   return ret;
+   }
+   PRIV(dev)->pending_alarm = 1;
+   }
+   return 0;
+}
+
+int
+failsafe_plugin_alarm_cancel(struct rte_eth_dev *dev)
+{
+   int ret = 0;
+
+   if (PRIV(dev)->pending_alarm) {
+   rte_errno = 0;
+   rte_eal_alarm_cancel(failsafe_plugin_alarm, dev);
+   if (rte_errno) {
+   ERROR("rte_eal_alarm_cancel failed (errno: %s)",
+ strerror(rte_errno));
+   ret = -rte_errno;
+   } else {
+   PRIV(dev)->pending_alarm = 0;
+   }
+   }
+   return ret;
+}
+
+static void
+failsafe_plugin_alarm(void *arg)
+{
+   struct rte_eth_dev *dev = arg;
+   int ret;
+
+   if (!PRIV(dev)->pending_alarm)
+   return;
+   PRIV(dev)->pending_alarm = 0;
+   ret = failsafe_eth_dev_state_sync(dev);
+   if (ret)
+   ERROR("Unable to synchronize sub_device state");
+   ret = failsafe_plugin_alarm_install(dev);
+   if (ret)
+

Re: [dpdk-dev] Issues with ixgbe and rte_flow

2017-03-08 Thread Adrien Mazarguil
CC'ing us...@dpdk.org since this issue primarily affects rte_flow users, and
several PMD maintainers to get their opinion on the matter, see below.

On Wed, Mar 08, 2017 at 09:24:26AM +, Le Scouarnec Nicolas wrote:
> My response is inline bellow, and further comment on the code excerpt also
> 
> 
> From: Lu, Wenzhuo 
> Sent: Wednesday, March 8, 2017 4:16 AM
> To: Le Scouarnec Nicolas; dev@dpdk.org; Adrien Mazarguil 
> (adrien.mazarg...@6wind.com)
> Cc: Yigit, Ferruh
> Subject: RE: Issues with ixgbe and rte_flow
>     
> >> I have been using the new API rte_flow to program filtering on an X540 
> >> (ixgbe)
> >> NIC. My goal is to send packets from different VLANs to different queues
> >> (filtering which should be supported by flow director as far as I 
> >> understand). I
> >> enclosed the setup code at the bottom of this email.
> >> For reference, here is the setup code I use
> >>
> >>   vlan_spec.tci = vlan_be;
> >>   vlan_spec.tpid = 0;
> >>
> >>   vlan_mask.tci = rte_cpu_to_be_16(0x0fff);
> >>   vlan_mask.tpid =  0;
> 
> >To my opinion, this setting is not right. As we know, vlan tag is inserted 
> >between MAC source address and Ether type.
> >So if we have a MAC+VLAN+IPv4 packet, the vlan_spec.tpid should be 0x8100, 
> >the eth_spec.type should be 0x0800.
> >+ Adrien, the author. He can correct me if I'm wrong.

That's right, however the confusion is understandable, perhaps the
documentation should be clearer. It currently states what follows without
describing the reason:

 /**
  * RTE_FLOW_ITEM_TYPE_VLAN
  *
  * Matches an 802.1Q/ad VLAN tag.
  *
  * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or
  * RTE_FLOW_ITEM_TYPE_VLAN.
  */

> Ok, I apologize, you're right. Being more used to the software-side than to 
> the hardware-side, I misunderstood struct rte_flow_item_vlan and though it 
> was the "equivalent" of struct vlan_hdr, in which case the vlan_hdr contains 
> the type of the encapsulated frame.
> 
> (  /**
>  * Ethernet VLAN Header.
>  * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type
>  * of the encapsulated frame.
>  */
> struct vlan_hdr {
>   uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */
>   uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */
> } __attribute__((__packed__));)

Indeed, struct vlan_hdr and struct rte_flow_item_vlan are not mapped at the
same offset; the former includes EtherType of the inner packet (eth_proto),
while the latter describes the inserted VLAN header itself starting with
TPID.

This approach was chosen for rte_flow for consistency with the fact each
pattern item describes exactly one protocol header, even though in the case
of VLAN and other layer 2.5 protocols, some happen to be embedded.
IPv4/IPv6 options will be provided as separate items in a similar fashion.

It also allows adding/removing VLAN tags to an existing rule without
modifying the EtherType of the inner frame.

Now assuming you're not the only one facing that issue, if the current
definition does not make sense, perhaps we can update the API before it's
too late. I'll attempt to summarize it with an example below.

In any case, matching nonspecific VLAN-tagged and QinQ UDPv4 packets in
testpmd is written as:

 flow create 0 pattern eth / vlan / ipv4 / udp / end actions queue 1 / end
 flow create 0 pattern eth / vlan / vlan / ipv4 / udp / end actions queue 1 / 
end

However, with the current API described above, specifying inner/outer
EtherTypes for the above packets yields (as a reminder, 0x8100 stands for
VLAN, 0x8000 for IPv4 and 0x88A8 for QinQ):

#1

 flow create 0 pattern eth type is 0x8000 / vlan tpid is 0x8100 / ipv4 / udp / 
actions queue 1 / end
 flow create 0 pattern eth type is 0x8000 / vlan tpid is 0x88A8 / vlan tpid is 
0x8100 / ipv4 / udp / actions queue 1 / end

Instead of the arguably more accurate (renaming "tpid" to "inner_type" for
clarity):

#2

 flow create 0 pattern eth type is 0x8100 / vlan type is 0x8000 / ipv4 / udp / 
actions queue 1 / end
 flow create 0 pattern eth type is 0x88A8 / vlan inner_type is 0x8100 / vlan 
inner_type is 0x8000 / ipv4 / udp / actions queue 1 / end

So, should the VLAN item be updated to behave as described in #2?

Note: doing so will cause a serious API/ABI breakage, I know it was not
supposed to happen according to the rte_flow sales pitch, but hey.

-- 
Adrien Mazarguil
6WIND


Re: [dpdk-dev] [PATCHv8 19/46] pool/dpaa2: add DPAA2 hardware offloaded mempool

2017-03-08 Thread Thomas Monjalon
2017-03-08 18:22, Hemant Agrawal:
> > On Fri, 3 Mar 2017 18:16:36 +0530, Hemant Agrawal
> >  wrote:
> > I think the current mempool handlers should be moved first in a
> > separate patch.

Yes it should have been done earlier.

> Are you seeing any benefit by making it a separate patch series?

A separate patchset for moving mempool handlers will be easy to review
and accept.
If integrated in this series, it is kind of hidden and prevent the
visibility and review it deserves.
By the way the mempool move should be directly committed in the main
repository, while this series targets next-net.

> it will be difficult and tricky for us. The dpaa2_pool has a dependency 
> on mc bus patches. dpaa2_pmd has dependency on dpaa2_pool and mc buses.

You will just have to rebase this series on top of the new one.


Re: [dpdk-dev] [PATCH 3/4] net/i40e: support tunnel filter to VF

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 9:31 AM, Beilei Xing wrote:
> This patch is to support tunnel filter to VF.
> 
> Signed-off-by: Bernard Iremonger 
> Signed-off-by: Yong Liu 
> Signed-off-by: Beilei Xing 
> ---

<...>

> +/**
> + * Tunneling Packet filter configuration.
> + */
> +struct i40e_tunnel_filter_conf {

There is already "rte_eth_tunnel_filter_conf", why driver is creating
its own version of structure instead of using / updating public one?

> + struct ether_addr outer_mac;/**< Outer MAC address to match. */
> + struct ether_addr inner_mac;/**< Inner MAC address to match. */
> + uint16_t inner_vlan;/**< Inner VLAN to match. */
> + uint32_t outer_vlan;/**< Outer VLAN to match */
> + enum rte_tunnel_iptype ip_type; /**< IP address type. */
> + /**
> +  * Outer destination IP address to match if ETH_TUNNEL_FILTER_OIP
> +  * is set in filter_type, or inner destination IP address to match
> +  * if ETH_TUNNEL_FILTER_IIP is set in filter_type.
> +  */
> + union {
> + uint32_t ipv4_addr; /**< IPv4 address in big endian. */
> + uint32_t ipv6_addr[4];  /**< IPv6 address in big endian. */
> + } ip_addr;
> + /** Flags from ETH_TUNNEL_FILTER_XX - see above. */
> + uint16_t filter_type;
> + enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */
> + uint32_t tenant_id; /**< Tenant ID to match. VNI, GRE key... */
> + uint16_t queue_id;  /**< Queue assigned to if match. */
> + uint8_t is_to_vf;   /**< 0 - to PF, 1 - to VF */
> + uint16_t vf_id; /**< VF id for tunnel filter insertion. */
> +};
> +

<...>


Re: [dpdk-dev] [PATCH 0/4] support replace filter function

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 9:31 AM, Beilei Xing wrote:
> This patchset adds replace filter function according
> to DCR288 and supports cloud filter to VF.

Is "cloud filter" common term, or specific to i40e ?

Can you please provide small description about "cloud filter" here and
provide links for more detailed information? Please feel free to update
i40e documentation if required.

> This patchset serves for QinQ and MPLSoUDP/MPLSoGRE.
> 
> Beilei Xing (4):
>   net/i40e: support replace filter type
>   net/i40e: rework tunnel filter functions
>   net/i40e: support tunnel filter to VF
>   net/i40e: refine consistent tunnel filter
> 
>  drivers/net/i40e/i40e_ethdev.c | 222 +++
>  drivers/net/i40e/i40e_ethdev.h | 153 +
>  drivers/net/i40e/i40e_flow.c   | 292 
> -
>  3 files changed, 574 insertions(+), 93 deletions(-)
> 



Re: [dpdk-dev] [PATCH 1/4] net/i40e: support replace filter type

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 9:31 AM, Beilei Xing wrote:
> Add new admin queue function and extended fields
> in DCR 288:
>  - Add admin queue function for Replace filter
>command (Opcode: 0x025F)
>  - Add General fields for Add/Remove Cloud filters
>command
> 
> This patch will be removed to base driver in future.
> 
> Signed-off-by: Bernard Iremonger 
> Signed-off-by: Stroe Laura 
> Signed-off-by: Jingjing Wu 
> Signed-off-by: Beilei Xing 
> ---
>  drivers/net/i40e/i40e_ethdev.h | 106 
>  drivers/net/i40e/i40e_flow.c   | 152 
> +
>  2 files changed, 258 insertions(+)
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index f545850..3a49865 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -729,6 +729,100 @@ struct i40e_valid_pattern {
>   parse_filter_t parse_filter;
>  };
>  
> +/* Support replace filter */
> +
> +/* i40e_aqc_add_remove_cloud_filters_element_big_data is used when
> + * I40E_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER flag is set. refer to
> + * DCR288

Please do not refer to DCR, unless you can provide a public link for it.

> + */
> +struct i40e_aqc_add_remove_cloud_filters_element_big_data {
> + struct i40e_aqc_add_remove_cloud_filters_element_data element;

What is the difference between
"i40e_aqc_add_remove_cloud_filters_element_big_data" and
"i40e_aqc_add_remove_cloud_filters_element_data", why need big_data one?

> + uint16_t general_fields[32];

Not very useful variable name.

<...>

> +/* Replace filter Command 0x025F
> + * uses the i40e_aqc_replace_cloud_filters,
> + * and the generic indirect completion structure
> + */
> +struct i40e_filter_data {
> + uint8_t filter_type;
> + uint8_t input[3];
> +};
> +
> +struct i40e_aqc_replace_cloud_filters_cmd {

Is replace does something different than remove old and add new cloud
filter?

<...>

> +enum i40e_status_code i40e_aq_add_cloud_filters_big_buffer(struct i40e_hw 
> *hw,
> +uint16_t seid,
> +struct i40e_aqc_add_remove_cloud_filters_element_big_data *filters,
> +uint8_t filter_count);
> +enum i40e_status_code i40e_aq_remove_cloud_filters_big_buffer(
> + struct i40e_hw *hw, uint16_t seid,
> + struct i40e_aqc_add_remove_cloud_filters_element_big_data *filters,
> + uint8_t filter_count);
> +enum i40e_status_code i40e_aq_replace_cloud_filters(struct i40e_hw *hw,
> + struct i40e_aqc_replace_cloud_filters_cmd *filters,
> + struct i40e_aqc_replace_cloud_filters_cmd_buf *cmd_buf);
> +

Do you need these function declarations?

>  #define I40E_DEV_TO_PCI(eth_dev) \
>   RTE_DEV_TO_PCI((eth_dev)->device)
>  
> diff --git a/drivers/net/i40e/i40e_flow.c b/drivers/net/i40e/i40e_flow.c
> index f163ce5..3c49228 100644
> --- a/drivers/net/i40e/i40e_flow.c
> +++ b/drivers/net/i40e/i40e_flow.c
> @@ -1874,3 +1874,155 @@ i40e_flow_flush_tunnel_filter(struct i40e_pf *pf)
>  
>   return ret;
>  }
> +
> +#define i40e_aqc_opc_replace_cloud_filters 0x025F
> +#define I40E_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER 1
> +/**
> + * i40e_aq_add_cloud_filters_big_buffer
> + * @hw: pointer to the hardware structure
> + * @seid: VSI seid to add cloud filters from
> + * @filters: Buffer which contains the filters in big buffer to be added
> + * @filter_count: number of filters contained in the buffer
> + *
> + * Set the cloud filters for a given VSI.  The contents of the
> + * i40e_aqc_add_remove_cloud_filters_element_big_data are filled
> + * in by the caller of the function.
> + *
> + **/
> +enum i40e_status_code i40e_aq_add_cloud_filters_big_buffer(

There are already non big_buffer versions of these functions, like
"i40e_aq_add_cloud_filters()" why big_data version required, what it
does differently?

And is there a reason that these functions are not static? (For this
patch they are not used at all and will cause build error, but my
question is after they started to be used)

<...>


Re: [dpdk-dev] [PATCH 4/4] net/i40e: refine consistent tunnel filter

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 9:31 AM, Beilei Xing wrote:
> Add i40e_tunnel_type enumeration type to refine consistent
> tunnel filter, it will be esay to add new tunnel type for

s/esay/easy

> i40e.
> 
> Signed-off-by: Beilei Xing 

<...>

>  /**
> + * Tunnel type.
> + */
> +enum i40e_tunnel_type {
> + I40E_TUNNEL_TYPE_NONE = 0,
> + I40E_TUNNEL_TYPE_VXLAN,
> + I40E_TUNNEL_TYPE_GENEVE,
> + I40E_TUNNEL_TYPE_TEREDO,
> + I40E_TUNNEL_TYPE_NVGRE,
> + I40E_TUNNEL_TYPE_IP_IN_GRE,
> + I40E_L2_TUNNEL_TYPE_E_TAG,
> + I40E_TUNNEL_TYPE_MAX,
> +};

Same question here, there is already "rte_eth_tunnel_type", why driver
is duplicating the structure?

<...>


Re: [dpdk-dev] [PATCH 2/4] net/i40e: rework tunnel filter functions

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 9:31 AM, Beilei Xing wrote:
> Rework tunnel filter functions to align with the
> new command buffer for add/remove cloud filter.

> This patch also changes tunnel filter function
> name to VXLAN filter function, it will be easy to
> add other tunnel type parsing function.

Can you please do renaming in different patch in this patchset, that
change should be trivial, but looking more confusing with other change.

> 
> Signed-off-by: Beilei Xing 
<...>



[dpdk-dev] ip_pipeline firewall customization

2017-03-08 Thread Shyam Shrivastav
Hi All

I am using ip_pipeline firewall as base for our project, need
comments/suggestions/corrections regarding following

1) We can not configure firewall  rule to drop packets, as portid is
mandatory in command. I am planning to allow this for our requirement with
following code changes
  a) Allow "port" as optional parameter (pipeline_firewall.c), pass -1
as port id if "port" is not specified to app_pipeline_firewall_add_rule and
change that parameter to int32_t.
  b) Make required changes in pipeline_firewall_msg_req_add_handler if
portid is -1, that is table entry action to be .action =
RTE_PIPELINE_ACTION_DROP.

2) I am registering a f_action_hit function for firewall table to perform
certain translations if action is pass (RTE_PIPELINE_ACTION_PORT).
Configured a rule like following

pipeline>p 1 firewall add priority 1 ipv4 0.0.0.0 0 0.0.0.0 0 0 65535 0
65535 0 0 port 0

which should be hit only by ipv4 packets. However even ARP packets are hit
by this ACL and my routine is called. If I configure a specific src or dst
ip then everything works fine and arp packets are not hit , for example
following rule hits only ipv4 icmp packets

pipeline>  p 1 firewall add priority 1 ipv4 0.0.0.0 0 45.35.70.12 32 0
65535 0 65535 1 0xf port 0

Is this a bug or am I missing something ?


Thanks
Shyam


Re: [dpdk-dev] [PATCH 3/3] net/i40e: enable cloud filter for MPLS

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 9:43 AM, Beilei Xing wrote:
> This patch enables MPLSoUDP and MPLSoGRE
> cloud filter with replace cloud filter.
> 
> Signed-off-by: Beilei Xing 

<...>

> @@ -1286,6 +1286,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
>   if (ret < 0)
>   goto err_init_fdir_filter_list;
>  
> + i40e_replace_mpls_l1_filter(pf);
> + i40e_replace_mpls_cloud_filter(pf);

Hi Beilei,

Can you please help me to understand what has been replaced here, and why?

Thanks,
ferruh

<...>


Re: [dpdk-dev] [PATCH 1/2] net/sfc/base: add MCDI agnostic wrapper for MAC stats clear

2017-03-08 Thread Ferruh Yigit
On 3/3/2017 12:49 PM, Andrew Rybchenko wrote:
> From: Ivan Malov 
> 
> If a libefx-based driver needs some way to clear port statistics,
> then an MCDI agnostic method is required.
> 
> Signed-off-by: Ivan Malov 
> Signed-off-by: Andrew Rybchenko 

Series applied to dpdk-next-net/master, thanks.



Re: [dpdk-dev] [dpdk-stable] [PATCH] net/i40e: fix incorrect packet index reference

2017-03-08 Thread Ferruh Yigit
On 3/5/2017 5:17 PM, Jianbo Liu wrote:
> On 4 March 2017 at 13:00, Jerin Jacob  wrote:
>> Fixes: ae0eb310f253 ("net/i40e: implement vector PMD for ARM")
>>
>> CC: sta...@dpdk.org
>> Signed-off-by: Jerin Jacob 
>> Signed-off-by: Sunil Kulkarni 

> Acked-by: Jianbo Liu 

Applied to dpdk-next-net/master, thanks.



Re: [dpdk-dev] [PATCH v2 00/13] introduce fail-safe PMD

2017-03-08 Thread Neil Horman
On Wed, Mar 08, 2017 at 04:15:33PM +0100, Gaetan Rivet wrote:
> This PMD intercepts and manages Ethernet device removal events issued by
> slave PMDs and re-initializes them transparently when brought back so that
> existing applications do not need to be modified to benefit from true
> hot-plugging support.
> 
> The stacked PMD approach shares many similarities with the bonding PMD but
> with a different purpose. While bonding provides the ability to group
> several links into a single logical device for enhanced throughput and
> supports fail-over at link level, this one manages the sudden disappearance
> of the underlying device; it guarantees applications face a valid device in
> working order at all times.
> 
Why not just add this feature to the bonding pmd then?  A bond is perfectly
capable of handling the trivial case of a single underlying device, and adding
an option to make the underly slave 'persistent' seem both much simpler in terms
of implementation and code size, than adding an entire new pmd, along with its
supporting code.

Neil



[dpdk-dev] [PATCH v3] app/crypto-perf: fix avoid wrong optype for AEAD algorithms

2017-03-08 Thread Daniel Mrzyglod
When somebody use bad --optype with aead alghorithms
segmentation fault could happen.

Fixes: f8be1786b1b8 ("app/crypto-perf: introduce performance test application")

Signed-off-by: Daniel Mrzyglod 
---
Changes for v3:
  - fix headline of this patch
  - add ccm alghorithm

Changes for v2:
  - fix checkpatch error related with whitespace
  - fix spelling error

How to reproduce:
 AESNI_GCM:
./build/app/dpdk-test-crypto-perf -c 0xc0 --vdev crypto_aesni_gcm_pmd
-w :5e:00.0 -w :3d:01.0 -- --ptest throughput
--devtype crypto_aesni_gcm --optype cipher-then-auth  --cipher-algo aes-gcm
--cipher-op encrypt --cipher-key-sz 16 --cipher-iv-sz 12 --auth-algo aes-gcm
--auth-op generate --auth-key-sz 16 --auth-aad-sz 4 --auth-digest-sz 8
--total-ops 1000 --burst-sz 32 --buffer-sz 1024

---
 app/test-crypto-perf/cperf_options_parsing.c | 11 +++
 app/test-crypto-perf/main.c  |  6 --
 doc/guides/tools/cryptoperf.rst  |  2 ++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/app/test-crypto-perf/cperf_options_parsing.c 
b/app/test-crypto-perf/cperf_options_parsing.c
index c1d5ffc..c87ba67 100644
--- a/app/test-crypto-perf/cperf_options_parsing.c
+++ b/app/test-crypto-perf/cperf_options_parsing.c
@@ -829,6 +829,17 @@ cperf_options_check(struct cperf_options *options)
}
}
 
+   if (options->cipher_algo == RTE_CRYPTO_CIPHER_AES_GCM ||
+   options->cipher_algo == RTE_CRYPTO_CIPHER_AES_CCM ||
+   options->auth_algo == RTE_CRYPTO_AUTH_AES_GCM ||
+   options->auth_algo == RTE_CRYPTO_AUTH_AES_CCM ||
+   options->auth_algo == RTE_CRYPTO_AUTH_AES_GMAC) {
+   if (options->op_type != CPERF_AEAD) {
+   RTE_LOG(ERR, USER1, "Use --optype aead\n");
+   return -EINVAL;
+   }
+   }
+
return 0;
 }
 
diff --git a/app/test-crypto-perf/main.c b/app/test-crypto-perf/main.c
index c1eaaff..fb3f72e 100644
--- a/app/test-crypto-perf/main.c
+++ b/app/test-crypto-perf/main.c
@@ -118,7 +118,8 @@ cperf_verify_devices_capabilities(struct cperf_options 
*opts,
 
if (opts->op_type == CPERF_AUTH_ONLY ||
opts->op_type == CPERF_CIPHER_THEN_AUTH ||
-   opts->op_type == CPERF_AUTH_THEN_CIPHER)  {
+   opts->op_type == CPERF_AUTH_THEN_CIPHER ||
+   opts->op_type == CPERF_AEAD)  {
 
cap_idx.type = RTE_CRYPTO_SYM_XFORM_AUTH;
cap_idx.algo.auth = opts->auth_algo;
@@ -139,7 +140,8 @@ cperf_verify_devices_capabilities(struct cperf_options 
*opts,
 
if (opts->op_type == CPERF_CIPHER_ONLY ||
opts->op_type == CPERF_CIPHER_THEN_AUTH ||
-   opts->op_type == CPERF_AUTH_THEN_CIPHER) {
+   opts->op_type == CPERF_AUTH_THEN_CIPHER ||
+   opts->op_type == CPERF_AEAD) {
 
cap_idx.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
cap_idx.algo.cipher = opts->cipher_algo;
diff --git a/doc/guides/tools/cryptoperf.rst b/doc/guides/tools/cryptoperf.rst
index 1117ebf..478d256 100644
--- a/doc/guides/tools/cryptoperf.rst
+++ b/doc/guides/tools/cryptoperf.rst
@@ -181,6 +181,8 @@ The following are the appication command-line options:
auth-then-cipher
aead
 
+For GCM/CCM algorithms you should use aead flag.
+
 * ``--sessionless``
 
 Enable session-less crypto operations mode.
-- 
2.7.4



Re: [dpdk-dev] [PATCH 1/2] net/sfc/base: separate limitations on Tx DMA descriptors

2017-03-08 Thread Ferruh Yigit
On 3/6/2017 1:05 PM, Andrew Rybchenko wrote:
> Siena has limitation on maximum byte count and 4k boundary crosssing
> (which is stricter than maximum byte count).
> EF10 has limitation on maximum byte count only.
> 
> Fixes: f7dc06bf35f2 ("net/sfc/base: import 5xxx/6xxx family support")
> Fixes: e7cd430c864f ("net/sfc/base: import SFN7xxx family support")
> Fixes: 94190e3543bf ("net/sfc/base: import SFN8xxx family support")
> 
> Signed-off-by: Andrew Rybchenko 

Series applied to dpdk-next-net/master, thanks.



Re: [dpdk-dev] [PATCH v3] net/tap: fix dev name look-up

2017-03-08 Thread Ferruh Yigit
On 3/6/2017 4:14 PM, Wiles, Keith wrote:
> 
>> On Mar 6, 2017, at 10:03 AM, Yigit, Ferruh  wrote:
>>
>> On 3/6/2017 3:42 PM, Ferruh Yigit wrote:
>>> On 3/6/2017 3:16 PM, Wiles, Keith wrote:

> On Mar 6, 2017, at 9:13 AM, Pascal Mazon  wrote:
>
> Store the device name in dev->data->name, to have symmetrical behavior
> between rte_pmd_tap_probe(name) and rte_pmd_tap_remove(name).
>
> The netdevice name (linux interface name) is stored in the name field of
> struct pmd_internals.
>
> snprintf(data->name) has been moved closer to the rte_ethdev_allocate()
> as it should use the same name.
>
> Signed-off-by: Pascal Mazon 

> Acked-by: Keith Wiles 

Applied to dpdk-next-net/master, thanks.


Re: [dpdk-dev] [PATCH v5 09/12] doc: add NXP dpaa2 sec in cryptodev

2017-03-08 Thread Mcnamara, John
> -Original Message-
> From: Akhil Goyal [mailto:akhil.go...@nxp.com]
> Sent: Friday, March 3, 2017 7:50 PM
> To: dev@dpdk.org
> Cc: thomas.monja...@6wind.com; Doherty, Declan ;
> De Lara Guarch, Pablo ; Mcnamara, John
> ; nhor...@tuxdriver.com; hemant.agra...@nxp.com;
> Akhil Goyal 
> Subject: [PATCH v5 09/12] doc: add NXP dpaa2 sec in cryptodev

Hi,

thanks for the doc. Some minor comments below.


> +
> +NXP(R) DPAA2 CAAM Accelerator Based (DPAA2_SEC) Crypto Poll Mode Driver
> +===

This title is quite long and the "Crypto Poll Mode Driver" part is probably
unnecessary in the context of the doc. Maybe something like:

NXP DPAA2 CAAM Accelerator
==



> +
> +The DPAA2_SEC PMD provides poll mode crypto driver support for NXP
> +DPAA2 CAAM hardware accelerator.
> +
> +Architecture
> +
> +
> +SEC is the SOC's security engine, which serves as NXP's latest
> +cryptographic acceleration and offloading hardware. It combines
> +functions previously implemented in separate modules to create a
> +modular and scalable acceleration and assurance engine. It also
> +implements block encryption algorithms, stream cipher algorithms,
> +hashing algorithms, public key algorithms, run-time integrity checking,
> +and a hardware random number generator. SEC performs higher-level
> +cryptographic operations than previous NXP cryptographic accelerators.
> This provides significant improvement to system level performance.
> +
> +DPAA2_SEC is one of the hardware resource in DPAA2 Architecture. More
> +information on DPAA2 Architecture is described in
> +docs/guides/nics/dpaa2.rst


This needs to be a RST link to the dpaa2.rst doc which means to it will
also require a target in dpaa2.rst. See the following section of the
contributors guide:

http://dpdk.org/doc/guides/contributing/documentation.html#hyperlinks


> +
> +DPAA2_SEC PMD is one of DPAA2 drivers which interacts with Management
> +Complex (MC) portal to access the hardware object - DPSECI. The MC
> +provides access to create, discover, connect, configure and destroy
> dpseci object in DPAA2_SEC PMD.

s/object/objects/


> +
> +DPAA2_SEC PMD also uses some of the other hardware resources like
> +buffer pools, queues, queue portals to store and to enqueue/dequeue data
> to the hardware SEC.
> +
> +DPSECI objects are detected by PMD using a resource container called
> +DPRC(like in docs/guides/nics/dpaa2.rst).

Requires a space before the bracket and a real link, like above


> +
> +For example:
> +
> +.. code-block:: console
> +
> +DPRC.1 (bus)
> +  |
> +  +--++---+---+---+-+
> + ||   |   |   |  |
> +   DPMCP.1  DPIO.1  DPBP.1  DPNI.1  DPMAC.1  DPSECI.1
> +   DPMCP.2  DPIO.2   DPNI.2  DPMAC.2  DPSECI.2
> +   DPMCP.3

There are tabs in this figure that break the alignment. Also in the
other figure.


> +Supported DPAA2 SoCs
> +
> +
> +- LS2080A/LS2040A
> +- LS2084A/LS2044A
> +- LS2088A/LS2048A
> +- LS1088A/LS1048A

Use * for bullet list, for consistency with the doc guidelines and the
rest of the doc. Here and elsewhere.


> +
> +Limitations
> +---
> +
> +* Chained mbufs are not supported.
> +* Hash followed by Cipher mode is not supported
> +* Only supports the session-oriented API implementation (session-less
> APIs are not supported).
> +
> +Prerequisites
> +-
> +
> +DPAA2_SEC driver has similar pre-requisites as listed in dpaa2
> pmd(docs/guides/nics/dpaa2.rst).

Same space and link comment as above.


> +The following dependencies are not part of DPDK and must be installed
> separately:
> +
> +- **NXP Linux SDK**
> +
> +  NXP Linux software development kit (SDK) includes support for family

s/family/the family/



> + of QorIQ® ARM-Architecture-based system on chip (SoC) processors  and
> + corresponding boards.
> +
> +  It includes the Linux board support packages (BSPs) for NXP SoCs,  a
> + fully operational tool chain, kernel and board specific modules.
> +
> +  SDK and related information can be obtained from:  `NXP QorIQ SDK
>  sdk/linux-sdk-for-qoriq-processors:SDKLINUX>`_.
> +
> +- **DPDK Helper Scripts**
> +
> +  DPAA2 based resources can be configured easily with the help of ready
> + scripts  as provided in the DPDK helper repository.
> +
> +  `DPDK Helper Scripts  helper>`_.
> +
> +Currently supported by DPDK:
> +
> +- NXP SDK **2.0+**.
> +- MC Firmware version **10.0.0** and higher.
> +- Supported architectures:  **arm64 LE**.
> +
> +- Follow the DPDK :ref:`Getting Started Guide for Linux ` to
> setup the basic DPDK environment.
> +
> +Pre-Installation Configuration
> +--
> +
> +Config File Options
> +~~~
> +
> +Basic DPAA2 config file options are descr

[dpdk-dev] [PATCH v5 0/3] examples/l3fwd: merge l3fwd-acl code into l3fwd

2017-03-08 Thread Ravi Kerur
This patchset merges l3fwd-acl and l3fwd code into common directory.
Adds file read options to build LPM and EM tables.

Ravi Kerur (3):
  examples/l3fwd: Merge l3fwd-acl code into l3fwd
  examples/l3fwd: add config file support for LPM
  examples/l3fwd: add config file support for EM

 examples/l3fwd-acl/Makefile   |   56 -
 examples/l3fwd-acl/main.c | 2079 -
 examples/l3fwd/Makefile   |2 +-
 examples/l3fwd/l3fwd.h|   77 ++
 examples/l3fwd/l3fwd_acl.c| 1033 ++
 examples/l3fwd/l3fwd_acl.h|  234 +
 examples/l3fwd/l3fwd_acl_scalar.h |  182 
 examples/l3fwd/l3fwd_em.c |  390 +--
 examples/l3fwd/l3fwd_lpm.c|  323 --
 examples/l3fwd/main.c |  250 +++--
 10 files changed, 2286 insertions(+), 2340 deletions(-)
 delete mode 100644 examples/l3fwd-acl/Makefile
 delete mode 100644 examples/l3fwd-acl/main.c
 create mode 100644 examples/l3fwd/l3fwd_acl.c
 create mode 100644 examples/l3fwd/l3fwd_acl.h
 create mode 100644 examples/l3fwd/l3fwd_acl_scalar.h

-- 
2.7.4



[dpdk-dev] [PATCH v5 2/3] examples/l3fwd: add config file support for LPM

2017-03-08 Thread Ravi Kerur
Add config file support for v4 and v6 to build forwarding
tables.

v5:
> Changes is_bypass_line from inline to non-line.

v4:
> No changes.

v3:
> Fix additional checkpatch coding style issues.

v2:
> Fix checkpatch warnings related to code
> MACRO GET_CB_FIELD checkpatch warning not fixed

v1:
> Remove static array configuration of Destination IP, MASK
and IF_OUT for LPM and LPM6 config.
> Add reading configuration from a file.
> Format of configuration file is as follows
#LPM route entries Dest-IP/Mask IF_OUT
L1.1.1.0/24 0
L2.1.1.0/24 1
L3.1.1.0/24 2
...

#LPM6 route entries Dest-IP/Mask IF_OUT
L:::::::/48 0
L2111:::::::/48 1
L3111:::::::/48 2
...

Signed-off-by: Ravi Kerur 
---
 examples/l3fwd/l3fwd.h |  28 +
 examples/l3fwd/l3fwd_acl.c |  39 +-
 examples/l3fwd/l3fwd_acl.h |  29 -
 examples/l3fwd/l3fwd_lpm.c | 308 +
 examples/l3fwd/main.c  |  47 ++-
 5 files changed, 332 insertions(+), 119 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index 93e08f6..35f7b72 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -94,6 +94,29 @@
 #define ACL_LEAD_CHAR  ('@')
 #define ROUTE_LEAD_CHAR('R')
 #define COMMENT_LEAD_CHAR  ('#')
+#define LPM_LEAD_CHAR  ('L')
+#define EM_LEAD_CHAR   ('E')
+
+#defineIPV6_ADDR_LEN   16
+#defineIPV6_ADDR_U16   (IPV6_ADDR_LEN / sizeof(uint16_t))
+#defineIPV6_ADDR_U32   (IPV6_ADDR_LEN / sizeof(uint32_t))
+
+#define GET_CB_FIELD(in, fd, base, lim, dlm)   do {\
+   unsigned long val;  \
+   char *end;  \
+   errno = 0;  \
+   val = strtoul((in), &end, (base));  \
+   if (errno != 0 || end[0] != (dlm) || val > (lim))   \
+   return -EINVAL;   \
+   (fd) = (typeof(fd))val; \
+   (in) = end + 1; \
+} while (0)
+
+struct parm_cfg {
+   const char *rule_ipv4_name;
+   const char *rule_ipv6_name;
+   int scalar;
+};
 
 struct mbuf_table {
uint16_t len;
@@ -134,6 +157,8 @@ extern xmm_t val_eth[RTE_MAX_ETHPORTS];
 
 extern struct lcore_conf lcore_conf[RTE_MAX_LCORE];
 
+extern struct parm_cfg parm_config;
+
 extern int numa_on; /**< NUMA is enabled by default. */
 
 /* Send burst of packets on an output interface */
@@ -287,4 +312,7 @@ l3fwd_acl_set_rule_ipv6_name(const char *optarg);
 void
 l3fwd_acl_set_rule_ipv4_name(const char *optarg);
 
+int
+is_bypass_line(char *buff);
+
 #endif  /* __L3_FWD_H__ */
diff --git a/examples/l3fwd/l3fwd_acl.c b/examples/l3fwd/l3fwd_acl.c
index 388b978..66ed23d 100644
--- a/examples/l3fwd/l3fwd_acl.c
+++ b/examples/l3fwd/l3fwd_acl.c
@@ -147,10 +147,6 @@ struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = {
},
 };
 
-#defineIPV6_ADDR_LEN   16
-#defineIPV6_ADDR_U16   (IPV6_ADDR_LEN / sizeof(uint16_t))
-#defineIPV6_ADDR_U32   (IPV6_ADDR_LEN / sizeof(uint32_t))
-
 enum {
PROTO_FIELD_IPV6,
SRC1_FIELD_IPV6,
@@ -297,12 +293,6 @@ static struct {
 
 const char cb_port_delim[] = ":";
 
-static struct {
-   const char *rule_ipv4_name;
-   const char *rule_ipv6_name;
-   int scalar;
-} parm_config;
-
 /*
  * Print and dump ACL/Route rules functions are defined in
  * following header file.
@@ -316,27 +306,6 @@ static struct {
 #include "l3fwd_acl_scalar.h"
 
 /*
- * API's called during initialization to setup ACL rules.
- */
-void
-l3fwd_acl_set_rule_ipv4_name(const char *optarg)
-{
-   parm_config.rule_ipv4_name = optarg;
-}
-
-void
-l3fwd_acl_set_rule_ipv6_name(const char *optarg)
-{
-   parm_config.rule_ipv6_name = optarg;
-}
-
-void
-l3fwd_acl_set_scalar(void)
-{
-   parm_config.scalar = 1;
-}
-
-/*
  * Parses IPV6 address, exepcts the following format:
  * ::::::: (where X - is a hexedecimal digit).
  */
@@ -566,7 +535,7 @@ parse_cb_ipv4vlan_rule(char *str, struct rte_acl_rule *v, 
int has_userdata)
 }
 
 static int
-add_rules(const char *rule_path,
+acl_add_rules(const char *rule_path,
struct rte_acl_rule **proute_base,
unsigned int *proute_num,
struct rte_acl_rule **pacl_base,
@@ -764,8 +733,8 @@ setup_acl(const int socket_id __attribute__((unused)))
 
dump_acl_config();
 
-   /* Load  rules from the input file */
-   if (add_rules(parm_config.rule_ipv4_name, &route_base_ipv4,
+   /* L

[dpdk-dev] [PATCH v5 3/3] examples/l3fwd: add config file support for EM

2017-03-08 Thread Ravi Kerur
Add config file support for v4 and v6 to build exact match
forwarding tables.

v5:
> No changes.

v4:
> No changes.

v3:
> Fix additional checkpatch coding style issues.

v2:
> Fix checkpatch warnings.

v1:
> Remove static array configuration of Dest IP,Src IP, Dest
port, Src port, Proto and IF_OUT for EM and EM6 config.
> Add reading configuration from a file.
> Format of configuration file is as follows
#EM route entries,
#Dest-IP Src-IP Dest-port Src-port Proto IF_OUT
E101.0.0.0 100.10.0.0 101 11 0x06 0
E201.0.0.0 200.20.0.0 102 12 0x06 1
E111.0.0.0 211.30.0.0 101 11 0x06 2
...

#EM6 route entries
#Dest-IP Src-IP Dest-port Src-port Proto IF_OUT
Efe80::::021e:67ff:fe00:
fe80::::021b:21ff:fe91:3805 101 11 0x06 0
Efe90::::021e:67ff:fe00:
fe90::::021b:21ff:fe91:3805 102 12 0x06 1
...

Signed-off-by: Ravi Kerur 
---
 examples/l3fwd/l3fwd_em.c | 376 +-
 1 file changed, 303 insertions(+), 73 deletions(-)

diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 6fdabf7..cd6b443 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -95,8 +95,14 @@ union ipv4_5tuple_host {
 #define XMM_NUM_IN_IPV6_5TUPLE 3
 
 struct ipv6_5tuple {
-   uint8_t  ip_dst[IPV6_ADDR_LEN];
-   uint8_t  ip_src[IPV6_ADDR_LEN];
+   union {
+   uint8_t  ip_dst[IPV6_ADDR_LEN];
+   uint32_t ip32_dst[4];
+   };
+   union {
+   uint8_t  ip_src[IPV6_ADDR_LEN];
+   uint32_t ip32_src[4];
+   };
uint16_t port_dst;
uint16_t port_src;
uint8_t  proto;
@@ -116,47 +122,24 @@ union ipv6_5tuple_host {
xmm_t xmm[XMM_NUM_IN_IPV6_5TUPLE];
 };
 
-
-
-struct ipv4_l3fwd_em_route {
-   struct ipv4_5tuple key;
-   uint8_t if_out;
+enum {
+   CB_FLD_DST_ADDR,
+   CB_FLD_SRC_ADDR,
+   CB_FLD_DST_PORT,
+   CB_FLD_SRC_PORT,
+   CB_FLD_PROTO,
+   CB_FLD_IF_OUT,
+   CB_FLD_MAX
 };
 
-struct ipv6_l3fwd_em_route {
-   struct ipv6_5tuple key;
+struct em_rule {
+   union {
+   struct ipv4_5tuple v4_key;
+   struct ipv6_5tuple v6_key;
+   };
uint8_t if_out;
 };
 
-static struct ipv4_l3fwd_em_route ipv4_l3fwd_em_route_array[] = {
-   {{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1),  101, 11, IPPROTO_TCP}, 0},
-   {{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1),  102, 12, IPPROTO_TCP}, 1},
-   {{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1),  101, 11, IPPROTO_TCP}, 2},
-   {{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1),  102, 12, IPPROTO_TCP}, 3},
-};
-
-static struct ipv6_l3fwd_em_route ipv6_l3fwd_em_route_array[] = {
-   {{
-   {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
-   {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 
0x38, 0x05},
-   101, 11, IPPROTO_TCP}, 0},
-
-   {{
-   {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
-   {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 
0x38, 0x05},
-   102, 12, IPPROTO_TCP}, 1},
-
-   {{
-   {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
-   {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 
0x38, 0x05},
-   101, 11, IPPROTO_TCP}, 2},
-
-   {{
-   {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
-   {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 
0x38, 0x05},
-   102, 12, IPPROTO_TCP}, 3},
-};
-
 struct rte_hash *ipv4_l3fwd_em_lookup_struct[NB_SOCKETS];
 struct rte_hash *ipv6_l3fwd_em_lookup_struct[NB_SOCKETS];
 
@@ -233,12 +216,6 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t 
data_len,
return init_val;
 }
 
-#define IPV4_L3FWD_EM_NUM_ROUTES \
-   (sizeof(ipv4_l3fwd_em_route_array) / 
sizeof(ipv4_l3fwd_em_route_array[0]))
-
-#define IPV6_L3FWD_EM_NUM_ROUTES \
-   (sizeof(ipv6_l3fwd_em_route_array) / 
sizeof(ipv6_l3fwd_em_route_array[0]))
-
 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
 
@@ -338,6 +315,224 @@ em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, 
void *lookup_struct)
 #include "l3fwd_em.h"
 #endif
 
+static int
+em_parse_v6_addr(const char *in, const char **end, uint32_t v[IPV6_ADDR_U32],
+   char dlm)
+{
+   uint32_t addr[IPV6_ADDR_U16];
+
+   GET_CB_FIELD(in, addr[0], 16, UINT16_MAX, ':');
+   GET_CB_FIELD(in, addr[1], 16, UINT16_MAX, ':');
+   GET_CB_FIELD(in, addr[2], 16, UINT16_MAX, ':');
+   GET_CB_FIELD(in, addr[3], 16, UINT16_MA

[dpdk-dev] [PATCH v5 1/3] examples/l3fwd: merge l3fwd-acl code into l3fwd

2017-03-08 Thread Ravi Kerur
Merge l3fwd-acl code into l3fwd with '-A' cmdline option to run ACL.

v5:
> None.

v4:
> Initialize rss_hf to IP for LPM, EM and ACL.
> Update rss_hf with l4 in parse_args for ACL.
> Fix pending checkpatch code indentation warning.

v3:
> Fix additional checkpatch coding style issues.

v2:
> Fix checkpatch errors and warnings related to non strings
greater than 80 characters.
> MACRO GET_CB_FIELD and strings greater than 80 characters
warnings are not fixed.

v1:
l3fwd-acl changes:
> Merge common init code in l3fwd-acl and l3fwd into
main.c.
> Move non-critical inline functions to l3fwd_acl.h.
> Move critial packet processing inline functions to
l3fwd_acl_scalar.h
> Move l3fwd-acl init code to l3fwd_acl.c.
> Delete l3fwd-acl directory.

l3fwd changes:
> Add '-A' as an option for ACL processing.
> Merge parsing options from l3fwd-acl and l3fwd. Retain
l3fwd-acl definitions.
> Move specific setup functions (setup_acl, setup_lpm
and setup_hash).
Testing:
> Compiled successfully for x86_64-native-linuxapp-gcc
> Tested LPM, EM and ACL basic functionality.

Signed-off-by: Ravi Kerur 
---
 examples/l3fwd-acl/Makefile   |   56 -
 examples/l3fwd-acl/main.c | 2079 -
 examples/l3fwd/Makefile   |2 +-
 examples/l3fwd/l3fwd.h|   49 +
 examples/l3fwd/l3fwd_acl.c| 1064 +++
 examples/l3fwd/l3fwd_acl.h|  263 +
 examples/l3fwd/l3fwd_acl_scalar.h |  182 
 examples/l3fwd/l3fwd_em.c |   14 +-
 examples/l3fwd/l3fwd_lpm.c|   23 +-
 examples/l3fwd/main.c |  209 ++--
 10 files changed, 1722 insertions(+), 2219 deletions(-)
 delete mode 100644 examples/l3fwd-acl/Makefile
 delete mode 100644 examples/l3fwd-acl/main.c
 create mode 100644 examples/l3fwd/l3fwd_acl.c
 create mode 100644 examples/l3fwd/l3fwd_acl.h
 create mode 100644 examples/l3fwd/l3fwd_acl_scalar.h

diff --git a/examples/l3fwd-acl/Makefile b/examples/l3fwd-acl/Makefile
deleted file mode 100644
index a3473a8..000
--- a/examples/l3fwd-acl/Makefile
+++ /dev/null
@@ -1,56 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in
-#   the documentation and/or other materials provided with the
-#   distribution.
-# * Neither the name of Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived
-#   from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-ifeq ($(RTE_SDK),)
-$(error "Please define RTE_SDK environment variable")
-endif
-
-# Default target, can be overriden by command line or environment
-RTE_TARGET ?= x86_64-native-linuxapp-gcc
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-# binary name
-APP = l3fwd-acl
-
-# all source are stored in SRCS-y
-SRCS-y := main.c
-
-CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS)
-
-# workaround for a gcc bug with noreturn attribute
-# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
-ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
-CFLAGS_main.o += -Wno-return-type
-endif
-
-include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
deleted file mode 100644
index 3cfbb40..000
--- a/examples/l3fwd-acl/main.c
+++ /dev/null
@@ -1,2079 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2016 Intel Corporation. Al

Re: [dpdk-dev] [PATCH v6 06/26] eal-common: introduce a way to query cpu support

2017-03-08 Thread Thomas Monjalon
2017-02-28 13:52, Aaron Conole:
> +/**
> + * This function checks that the currently used CPU supports the CPU features
> + * that were specified at compile time. It is called automatically within the
> + * EAL, so does not need to be used by applications.  This version returns a
> + * result so that decisions may be made (for instance, graceful shutdowns).
> + */
> +int
> +rte_cpu_is_supported(void);
>  #endif /* _RTE_CPUFLAGS_H_ */

A blank line is missing.



Re: [dpdk-dev] [v4 0/3] Merge l3fwd-acl and l3fwd

2017-03-08 Thread Ravi Kerur
John, Konstantin,

Kindly let me know new 'v5' patch follows dpdk guidelines?

I just followed what's in the dpdk contributing code guidelines.


   - git format-patch -3 -o patch/ -v 5 --cover-letter
   - git send-email --to dev@dpdk.org --in-reply-to  patch/*

Thanks.


On Mon, Mar 6, 2017 at 3:20 PM, Ravi Kerur  wrote:

> Hi John,
>
> Should I work with documentation team to update the document? If yes,
> please let me know the contact information.
>
> Thanks.
>
> On Mon, Mar 6, 2017 at 5:33 AM, Mcnamara, John 
> wrote:
>
>> > -Original Message-
>> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Ravi Kerur
>> > Sent: Sunday, March 5, 2017 7:47 PM
>> > To: dev@dpdk.org
>> > Cc: Ananyev, Konstantin ; Richardson,
>> Bruce
>> > ; Ravi Kerur 
>> > Subject: [dpdk-dev] [v4 0/3] Merge l3fwd-acl and l3fwd
>> >
>> > This patchset merges l3fwd-acl and l3fwd code into common directory.
>> > Adds file read options to build LPM and EM tables.
>>
>> Hi Ravi,
>>
>> Thanks to this. It seems like a good change. There are probably too many
>> L2/L3 variants and some of them should be merged.
>>
>> Note, you will also have to merge the sample app guides in some sensible
>> way:
>>
>> http://dpdk.org/doc/guides/sample_app_ug/l3_forward.html
>> http://dpdk.org/doc/guides/sample_app_ug/l3_forward_access_ctrl.html
>>
>> John
>>
>>
>


Re: [dpdk-dev] [PATCH v6 00/26] linux/eal: Remove most causes of panic on init

2017-03-08 Thread Thomas Monjalon
Hi,

Thanks for the work.
I think it needs to be completed to have the same behaviour on bsdapp.

As a another version is required, I add some small comments about
the formatting.

I think you should use the form "do not panic on " for most
of the commits.

Some commits may be squashed (see below):

2017-02-28 13:52, Aaron Conole:
> Aaron Conole (26):
>   eal: cpu init will no longer panic
>   eal: return error instead of panic for cpu init
squashed?

>   eal: do not panic on hugepage info init
>   eal: do not panic on failed hugepage query
squashed?

>   eal: do not panic if parsing args returns error
>   eal-common: introduce a way to query cpu support
>   eal: do not panic when CPU isn't supported
squashed?

>   eal: do not panic on memzone initialization fails
>   eal: set errno when exiting for already called
>   eal: do not panic on log failures
>   eal: do not panic on PCI-probe
It is not really the probe here

>   eal: do not panic on vfio failure
>   eal: do not panic on memory init
>   eal: do not panic on tailq init
>   eal: do not panic on alarm init
>   eal: convert timer init not to call panic
>   eal: change the private pipe call to reflect errno
>   eal: do not panic on interrupt thread init
>   eal: do not error if plugins fail to init
>   eal_pci: continue probing even on failures
>   eal: do not panic on failed PCI-probe
squashed?

>   eal_common_dev: continue initializing vdevs
>   eal: do not panic (or abort) if vdev init fails
squashed?

>   eal: do not panic when bus probe fails
>   eal: do not panic on failed bus scan
>   rte_eal_init: add info about various error codes



Re: [dpdk-dev] [PATCH v6 20/26] eal_pci: continue probing even on failures

2017-03-08 Thread Thomas Monjalon
2017-02-28 13:53, Aaron Conole:
> + int ret_1 = 0;

You do not need to add a new variable.

>   int ret = 0;
>  
>   if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0)
> @@ -430,17 +432,20 @@ rte_eal_pci_probe(void)
>  
>   /* probe all or only whitelisted devices */
>   if (probe_all)
> - ret = pci_probe_all_drivers(dev);
> + ret_1 = pci_probe_all_drivers(dev);
>   else if (devargs != NULL &&
>   devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
> - ret = pci_probe_all_drivers(dev);
> - if (ret < 0)
> - rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT
> + ret_1 = pci_probe_all_drivers(dev);
> + if (ret_1 < 0) {
> + RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT
>" cannot be used\n", dev->addr.domain, 
> dev->addr.bus,
>dev->addr.devid, dev->addr.function);
> + rte_errno = errno;
> + ret = 1;
> + }
>   }
>  
> - return 0;
> + return -ret;

It may be more explicit to use only one variable ret and filter
the positive values:
ret < 0 ? -1 : 0


Re: [dpdk-dev] [PATCH v3] eal: sPAPR IOMMU support in pci probing for vfio-pci in ppc64le

2017-03-08 Thread Chao Zhu
> From: Gowrishankar [mailto:gowrishanka...@linux.vnet.ibm.com]
> Sent: 2017年3月6日 23:04
> To: dev@dpdk.org
> Cc: Chao Zhu ; Anatoly Burakov
> ; Thomas Monjalon
> ; Gowrishankar Muthukrishnan
> 
> Subject: [PATCH v3] eal: sPAPR IOMMU support in pci probing for vfio-pci
in
> ppc64le
> 
> From: Gowrishankar Muthukrishnan 
> 
> Below changes adds pci probing support for vfio-pci devices in power8.
> 
> v3 - better validation for kernel not implementing few iocts called
> v2 - kernel version checked and doc updated
> 
> Signed-off-by: Gowrishankar Muthukrishnan
> 
> ---
>  doc/guides/rel_notes/release_17_05.rst |  4 ++
> lib/librte_eal/linuxapp/eal/eal_vfio.c | 90
> ++
>  lib/librte_eal/linuxapp/eal/eal_vfio.h | 25 ++
>  3 files changed, 119 insertions(+)
> 
> diff --git a/doc/guides/rel_notes/release_17_05.rst
> b/doc/guides/rel_notes/release_17_05.rst
> index e25ea9f..4b90036 100644
> --- a/doc/guides/rel_notes/release_17_05.rst
> +++ b/doc/guides/rel_notes/release_17_05.rst
> @@ -42,6 +42,10 @@ New Features
>   =
> 
> 
> +* **Added powerpc support in pci probing for vfio-pci devices.**
> +
> +  sPAPR IOMMU based pci probing enabled for vfio-pci devices.
> +
>  Resolved Issues
>  ---
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c
> b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> index 702f7a2..9377a66 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> @@ -50,12 +50,15 @@
>  static struct vfio_config vfio_cfg;
> 
>  static int vfio_type1_dma_map(int);
> +static int vfio_spapr_dma_map(int);
>  static int vfio_noiommu_dma_map(int);
> 
>  /* IOMMU types we support */
>  static const struct vfio_iommu_type iommu_types[] = {
>   /* x86 IOMMU, otherwise known as type 1 */
>   { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
> + /* ppc64 IOMMU, otherwise known as spapr */
> + { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
>   /* IOMMU-less mode */
>   { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},  };
> @@ -540,6 +543,93 @@ int vfio_setup_device(const char *sysfs_base, const
> char *dev_addr,  }
> 
>  static int
> +vfio_spapr_dma_map(int vfio_container_fd) {
> + const struct rte_memseg *ms = rte_eal_get_physmem_layout();
> + int i, ret;
> +
> + struct vfio_iommu_spapr_register_memory reg = {
> + .argsz = sizeof(reg),
> + .flags = 0
> + };
> + struct vfio_iommu_spapr_tce_info info = {
> + .argsz = sizeof(info),
> + };
> + struct vfio_iommu_spapr_tce_create create = {
> + .argsz = sizeof(create),
> + };
> + struct vfio_iommu_spapr_tce_remove remove = {
> + .argsz = sizeof(remove),
> + };
> +
> + /* query spapr iommu info */
> + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO,
> &info);
> + if (ret) {
> + RTE_LOG(ERR, EAL, "  cannot get iommu info, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> +
> + /* remove default DMA of 32 bit window */
> + remove.start_addr = info.dma32_window_start;
> + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE,
> &remove);
> + if (ret) {
> + RTE_LOG(ERR, EAL, "  cannot remove default DMA window, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> +
> + /* calculate window size based on number of hugepages configured */
> + create.window_size = rte_eal_get_physmem_size();
> + create.page_shift = __builtin_ctzll(ms->hugepage_sz);
> + create.levels = 2;
> +
> + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE,
> &create);
> + if (ret) {
> + RTE_LOG(ERR, EAL, "  cannot create new DMA window, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> +
> + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
> + for (i = 0; i < RTE_MAX_MEMSEG; i++) {
> + struct vfio_iommu_type1_dma_map dma_map;
> +
> + if (ms[i].addr == NULL)
> + break;
> +
> + reg.vaddr = (uintptr_t) ms[i].addr;
> + reg.size = ms[i].len;
> + ret = ioctl(vfio_container_fd,
> + VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®);
> + if (ret) {
> + RTE_LOG(ERR, EAL, "  cannot register vaddr for
IOMMU, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> +
> + memset(&dma_map, 0, sizeof(dma_map));
> + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
> + dma_map.vaddr = ms[i].addr_64;
> + dma_map.size = ms[i].len;
> + dma_map.iova = ms[i].phys_

Re: [dpdk-dev] [PATCH v3] eal: sPAPR IOMMU support in pci probing for vfio-pci in ppc64le

2017-03-08 Thread Chao Zhu
Thomas,

Thanks for the reminder! I changed the mailer settings and acked again.

> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monja...@6wind.com]
> Sent: 2017年3月7日 21:08
> To: Chao Zhu 
> Cc: 'Gowrishankar' ; dev@dpdk.org;
> 'Anatoly Burakov' 
> Subject: Re: [PATCH v3] eal: sPAPR IOMMU support in pci probing for
vfio-pci in
> ppc64le
> 
> Chao, there is an issue with your mailer: it is not quoting original
email.
> Please check html is disabled and remove useless context when replying.
> 
> 2017-03-07 20:03, Chao Zhu:
> > From: Gowrishankar Muthukrishnan 
> >
> > Below changes adds pci probing support for vfio-pci devices in power8.
> >
> > v3 - better validation for kernel not implementing few iocts called
> > v2 - kernel version checked and doc updated
> >
> > Signed-off-by: Gowrishankar Muthukrishnan
> > 
> [...]
> > Acked-by: Chao Zhu 
> 




[dpdk-dev] dpdk 0005-net-bonding-reconfigure-all-slave-queues-every-time.patch issue

2017-03-08 Thread Wen Chiu

Hi,

0005-net-bonding-reconfigure-all-slave-queues-every-time.patch is now 
officially in dpdk 17.02. But, it caused segmentation fault every time 
when I configured bonding. In slave_configure(), "Setup Tx Queues" logic 
change from for q_id=old_nb_tx_queues to qid=0 which always enters the 
for loop and calls rte_eth_tx_queue_setup. After that, 
rte_eth_dev_start() is called to start the device. In 
rte_eth_dev_start(), vmxnet3_dev_start() is called which calls 
vmxnet3_dev_rxtx_init(). In vmxnet3_dev_rxtx_init(), after for loop for 
rx_queues; dev->data->tx_queues[0] is override with value like 
0x121b20600 which is an invalid memory address that caused the fault.


Without this 0005 patch, looks like rte_eth_tx_queue_setup() is never 
called as q_id=old_nb_tx_queues never < nb_tx_queues. So, I suspect the 
calls to queue_setup() somehow makes the queues to be setup incorrectly 
or incompletely which causes the fault. Has anyone else encounters the 
same issue?


Regards,

Wen Chiu





Re: [dpdk-dev] [PATCH 2/2] app/testpmd: enable VF untag drop in testpmd

2017-03-08 Thread Zhang, Qi Z


> -Original Message-
> From: Yigit, Ferruh
> Sent: Tuesday, March 7, 2017 7:14 PM
> To: Zhang, Qi Z ; Wu, Jingjing ;
> Zhang, Helin 
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 2/2] app/testpmd: enable VF untag drop in
> testpmd
> 
> On 3/3/2017 1:59 AM, Qi Zhang wrote:
> > Add command line to support untag drop to specific VF in testpmd.
> >
> > Signed-off-by: Qi Zhang 
> > ---
> >  app/test-pmd/cmdline.c | 104
> > +
> >  1 file changed, 104 insertions(+)
> >
> > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > 43fc636..4ddc2c9 100644
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -311,6 +311,10 @@ static void cmd_help_long_parsed(void
> > *parsed_result,
> >
> > "set vf vlan antispoof (port_id) (vf_id) (on|off)\n"
> > "Set VLAN antispoof for a VF from the PF.\n\n"
> > +#ifdef RTE_LIBRTE_I40E_PMD
> > +   "set vf vlan untagdrop (port_id) (vf_id) (on|off)\n"
> > +   "Set VLAN untag drop for a VF from the PF.\n\n"
> > +#endif
> 
> We should decide how to implement PMD specific APIs in testpmd, and be
> consistent about it.
> 
> Currently there are two approaches:
> 
> 1- Wrap PMD specific feature and API with and PMD #ifdef, as done here.
> 
> 2- Enable feature by default, return -ENOTSUP for port_id that does not 
> support
> it. Ex: cmd_vf_rxvlan_filter.
>
> I am for second option. And explicitly not disabling I40E driver does not mean
> you should have those NICs in your runtime environment, so the effect will be
> same as always enabling it.
>
Yes, I notice this problem, during implementation, I saw both patterns exist, 
so I have to choose one of them
We'd better align this.
Both option ok for me, but a little bit prefer option 1 , since it's not 
necessary to explore a command if no backend device, that make the hint more 
clean.
> 
> And since number of PMD specific APIs are increasing, perhaps we should find a
> better approach for testpmd to prevent them corrupting testpmd.
Will think about this, also like to know if you or anyone have any good 
suggestion.
> 
> Also it may worth to discuss why number of PMD specific APIs are increasing.
> 
> >
> > "set vf vlan tag (port_id) (vf_id) (on|off)\n"
> > "Set VLAN tag for a VF from the PF.\n\n"
> > @@ -10995,6 +10999,103 @@ cmdline_parse_inst_t
> cmd_set_vf_vlan_anti_spoof = {
> > },
> >  };
> >
> <...>


Re: [dpdk-dev] [PATCH v2 0/5] net/i40e: support pipeline personalization profile

2017-03-08 Thread Xing, Beilei

> -Original Message-
> From: Yigit, Ferruh
> Sent: Wednesday, March 8, 2017 7:43 PM
> To: Xing, Beilei ; Wu, Jingjing 
> Cc: Zhang, Helin ; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 0/5] net/i40e: support pipeline
> personalization profile
> 
> On 3/3/2017 7:39 AM, Beilei Xing wrote:
> > Add APIs and driver to support load/get i40e PPP (Pipeline
> > Personalization Profile)
> 
> Can you please describe what is "Pipeline Personalisation Profile" is?
> If possible please provide some links to documents. And please feel free to
> update NIC document about these details.

Due to limited resources of X*710 (parser and analyzer configuration tables, 
number of packet classification types,
number of packet types, filters configuration tables, etc.), it's impossible to 
simultaneously support all protocols/filters
required for different parts on network.
To enable protocols/filters extensions for X*710, new Admin Command for loading 
user defined configurations is added.
PPP is a format of extend configuration  for X*710, it allows user to load user 
defined configuration to X*710.

Actually I have no released doc to share, the documents in my hand are draft 
and can't be forwarded.

> 
> What are the use cases, what are the benefits of this feature?

List of possible use cases for extended X*710 configuration using profiles 
could include following:
Configuring Analyzer/Parser to support new protocols, for example:
*   IP L2TPv3 tunneling protocol
*   IPSec ESP/AH protocols
*   MPLSoGRE, MPLSoUDP tunnels
*   GTP-C/GTP-U tunnels
New PCTYPEs for offloading packet classification to X*710. For example:
*   new IP Protocol in addition to TCP/UDP/SCTP
*   new TCP/UDP subtypes, like TCP SYN, TCP FIN 
*   new PCTYPE for tunneled packets like GTP-C, GTP-U
New PTYPEs for packets identification, for example:
*   MAC, MPLS, IP4, UDP
*   MAC, MPLS, MPLS, IP6, TCP
Fixes for NVM configuration, for example:
*   list of enabled stat counters to improve throughput
*   parser/analyzer configuration for some corner cases

> 
> And can you please update release notes to announce about added feature?
Yes, will update in next version.

> 
> > since PPP will be supported from FVL6 NVM.
> >
> > v2 change:
> >  Correct patch num.
> >
> > Beilei Xing (5):
> >   net/i40e: support pipeline personalization profile
> >   net/i40e: add ppp processing
> >   app/testpmd: add command for writing personalization profile
> >   net/i40e: add get all loaded profiles
> >   app/testpmd: add command for getting loaded profiles
> 
> <...>



Re: [dpdk-dev] Reg DPDK & PMD

2017-03-08 Thread raman geetha gopalakrishnan
Thanks  a lot Bruce to correct my assumption. This was what i looked for.

Thanks
Raman

On Wed, Mar 8, 2017 at 7:46 PM, Bruce Richardson  wrote:

> On Wed, Mar 08, 2017 at 07:05:03PM +0530, raman geetha gopalakrishnan
> wrote:
> > Hi All,
> >
> > I have the following basic question. Hope to get an answer / link where i
> > can get myself clear.
> >
> > 1. In DPDK PMD is optimized driver for an given NIC to get maximum
> > performance.
> > That being the case why we are talking about DPDK supported NICs.
> >
> >  A) My assumption is that NIC interface is standardized so that PMD
> > should actually work with any NIC (barring some NIC specific performance
> > tweaks)
> >   is that correct?
>
> No, that assumption is not correct, which is the reason we have so many
> NIC drivers in DPDK. Each NIC uses a different method of talking to SW,
> both in terms of the registers needed to be accessed to initialize the
> NIC and then in terms of the metadata format used to receive or transmit
> packets.
>
> Regards,
> /Bruce
>
> >
> >  B) if #A is correct , how can i make changes to PMD to support any
> NIC
> > ?
> >
> > if i have to put the above question in different way then it is
> >
> > 2. what is preventing us from having a common PMD layer for all NICs and
> > additional PMD specific to each NIC???
> >
> > Thanks
> > Raman
>


Re: [dpdk-dev] [PATCH 1/2] net/i40e: enable VF untag drop

2017-03-08 Thread Zhang, Qi Z
Hi Ferruh:

> -Original Message-
> From: Yigit, Ferruh
> Sent: Tuesday, March 7, 2017 6:51 PM
> To: Zhang, Qi Z ; Wu, Jingjing ;
> Zhang, Helin 
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/2] net/i40e: enable VF untag drop
> 
> On 3/3/2017 1:59 AM, Qi Zhang wrote:
> > Add a new private API to support the untag drop enable/disable for
> > specific VF.
> >
> > Signed-off-by: Qi Zhang 
> > ---
> >  drivers/net/i40e/i40e_ethdev.c  | 49
> > +
> >  drivers/net/i40e/rte_pmd_i40e.h | 18 +++
> 
> Shared library is giving build error because of API is missing in 
> *version.map file
> 
> >  2 files changed, 67 insertions(+)
> >
> 
> <...>
> 
> > diff --git a/drivers/net/i40e/rte_pmd_i40e.h
> > b/drivers/net/i40e/rte_pmd_i40e.h index a0ad88c..895e2cc 100644
> > --- a/drivers/net/i40e/rte_pmd_i40e.h
> > +++ b/drivers/net/i40e/rte_pmd_i40e.h
> > @@ -332,4 +332,22 @@ int rte_pmd_i40e_get_vf_stats(uint8_t port,  int
> > rte_pmd_i40e_reset_vf_stats(uint8_t port,
> > uint16_t vf_id);
> >
> > +/**
> > + * Enable/Disable VF untag drop
> > + *
> > + * @param port
> > + *The port identifier of the Ethernet device.
> > + * @param vf_id
> > + *VF on witch to enable/disable
> > + * @param on
> > + *Enable or Disable
> > + * @retura
> 
> @return
> 
> > + *  - (0) if successful.
> > + *  -(-ENODEVE) if *port* invalid
> > + *  -(-EINVAL) if bad parameter.
> > + */
> > +int rte_pmd_i40e_set_vf_vlan_untag_drop(uint8_t port,
> > +   uint16_t vf_id,
> > +   uint8_t on);
> 
> As discussed previously, I believe it is good to keep following syntax in API:
> __, for this API it becomes:
I think, current naming rule is __ right? See below
rte_pmd_i40e_set_vf_vlan_anti_spoof;
rte_pmd_i40e_set_vf_vlan_filter;
rte_pmd_i40e_set_vf_vlan_insert;
rte_pmd_i40e_set_vf_vlan_stripq;
rte_pmd_i40e_set_vf_vlan_tag;
so what's wrong with this?
> 
> rte_pmd_i40e_vf_vlan_untag_drop_set(), and perhaps "set" can be removed?
> 
> > +
> >  #endif /* _PMD_I40E_H_ */
> >



  1   2   >