date:20170925

Re: [dpdk-dev] [PATCH v2 0/6] NXP DPAA2: Refactor bus scan/probe code

2017-09-25 Thread Hemant Agrawal

HI Thomas,

Ferruh has reviewed it. 
If you don't have any further comments on it, will you please apply 
this patchset to the main tree, so that other dependent patches can be applied 
to respective next trees?

Regards,
Hemant


> -Original Message-
> From: Shreyansh Jain [mailto:shreyansh.j...@nxp.com]
> Sent: Monday, September 11, 2017 7:56 PM
> To: Ferruh Yigit ; dev@dpdk.org
> Cc: Hemant Agrawal ; tho...@monjalon.net
> Subject: Re: [PATCH v2 0/6] NXP DPAA2: Refactor bus scan/probe code
> 
> On Monday 11 September 2017 07:36 PM, Ferruh Yigit wrote:
> > On 8/25/2017 11:19 AM, Shreyansh Jain wrote:
> >> Change Log:
> >> ~~~
> >>   v2:
> >> - Minor updates for logging (removed some logs and changed others
> >>   to make it cleaner when application starts)
> >>
> >> Brief:
> >> ~~
> >>
> >>   -- v1 is at [3] --
> >>
> >> In [1], during the IOVA Mapping patch set [2] discussion, it was
> >> observed that DPAA2 scan was actually doing work meant for probing.
> >>
> >> This patchset demarcates the roles of FSLMC bus scan and probe
> >> functions much more clearly than before:
> >>
> >> 1. scan now only add devices into a list
> >>   unlike previously, scan doesn't initialize the devices using the VFIO
> >>   operations. Now, scan would only add the devices onto a local device
> >>   list after marking their type and filling in device name.
> >>
> >> 2. probe would now perform VFIO operations
> >>   in dpaa2, for the device added in list, an initialization needs
> >>   to be done so as to enable the devices - before actually API calls
> >>   can be served. Probe function now initializes the devices as well
> >>   as links then to the Eth/Crypto drivers.
> >>
> >> 3. Refactoring some VFIO code
> >>   This patch improves the overall code contained within DPAA2 bus
> >>   for DPAA2 VFIO layer.
> >>
> >> [1] http://dpdk.org/ml/archives/dev/2017-July/071270.html
> >> [2] http://dpdk.org/ml/archives/dev/2017-July/070833.html
> >> [3] http://dpdk.org/ml/archives/dev/2017-August/073011.html
> >>
> >> Shreyansh Jain (6):
> >>bus/fslmc: support only single group and container
> >>bus/fslmc: introduce new device type enumerator
> >>crypto/dpaa2_sec: update driver type field
> >>net/dpaa2: update driver type field
> >>drivers: refactor DPAA2 object definition
> >>bus/fslmc: refactor scan and probe functions
> >
> > Series Reviewed-by: Ferruh Yigit 
> >
> > I guess this set needs to go in to the main tree, because of both
> > - as far as I get IOVA patchset breaks dpaa2, so this should go in
> > before that patchset
> > - This patchset has both crypto and net patches, hard to get by
> > sub-trees
> >
> 
> I agree.
> I don't think much patches have come on 17.11 tree yet - still, I can send a
> rebased set again.
> Thanks a lot for taking out time for this.
> 
> -
> Shreyansh

Re: [dpdk-dev] [PATCH v4 4/4] net/softnic: add TM hierarchy related ops

2017-09-25 Thread Lu, Wenzhuo

Hi Jasvinder,


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Jasvinder Singh
> Sent: Monday, September 18, 2017 5:10 PM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian ; Yigit, Ferruh
> ; tho...@monjalon.net
> Subject: [dpdk-dev] [PATCH v4 4/4] net/softnic: add TM hierarchy related ops
> 
> Implement ethdev TM hierarchy related APIs in SoftNIC PMD.
> 
> Signed-off-by: Cristian Dumitrescu 
> Signed-off-by: Jasvinder Singh 
> ---
>  drivers/net/softnic/rte_eth_softnic_internals.h |   41 +
>  drivers/net/softnic/rte_eth_softnic_tm.c| 2776
> ++-
>  2 files changed, 2813 insertions(+), 4 deletions(-)


> +
> +static uint32_t
> +tm_node_subport_id(struct rte_eth_dev *dev, struct tm_node
> *subport_node)
> +{
> + struct pmd_internals *p = dev->data->dev_private;
> + struct tm_node_list *nl = &p->soft.tm.h.nodes;
> + struct tm_node *ns;
> + uint32_t subport_id;
> +
> + subport_id = 0;
> + TAILQ_FOREACH(ns, nl, node) {
> + if (ns->level != TM_NODE_LEVEL_SUBPORT)
> + continue;
> +
> + if (ns->node_id == subport_node->node_id)
> + return subport_id;
> +
> + subport_id++;
> + }
> +
> + return UINT32_MAX;
UINT32_MAX means invalid number, right? Better define a specific MACRO for the 
invalid number in case you may not want to use 0xff.. or uint32.
The same suggestion for the below functions.


> +static int
> +shaper_profile_check(struct rte_eth_dev *dev,
> + uint32_t shaper_profile_id,
> + struct rte_tm_shaper_params *profile,
> + struct rte_tm_error *error)
> +{
> + struct tm_shaper_profile *sp;
> +
> + /* Shaper profile ID must not be NONE. */
> + if (shaper_profile_id == RTE_TM_SHAPER_PROFILE_ID_NONE)
> + return -rte_tm_error_set(error,
> + EINVAL,
> + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
> + NULL,
> + rte_strerror(EINVAL));
> +
> + /* Shaper profile must not exist. */
> + sp = tm_shaper_profile_search(dev, shaper_profile_id);
> + if (sp)
> + return -rte_tm_error_set(error,
> + EEXIST,
> + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
> + NULL,
> + rte_strerror(EEXIST));
> +
> + /* Profile must not be NULL. */
> + if (profile == NULL)
> + return -rte_tm_error_set(error,
> + EINVAL,
> + RTE_TM_ERROR_TYPE_SHAPER_PROFILE,
> + NULL,
> + rte_strerror(EINVAL));
A slight suggestion. We can do the easiest check at first.


> +
> +/* Traffic manager shaper profile add */
> +static int
> +pmd_tm_shaper_profile_add(struct rte_eth_dev *dev,
> + uint32_t shaper_profile_id,
> + struct rte_tm_shaper_params *profile,
> + struct rte_tm_error *error)
> +{
> + struct pmd_internals *p = dev->data->dev_private;
> + struct tm_shaper_profile_list *spl = &p->soft.tm.h.shaper_profiles;
> + struct tm_shaper_profile *sp;
> + int status;
> +
> + /* Check input params */
> + status = shaper_profile_check(dev, shaper_profile_id, profile, error);
> + if (status)
> + return status;
> +
> + /* Memory allocation */
> + sp = calloc(1, sizeof(struct tm_shaper_profile));
Just curious, why not use rte_zmalloc?

> + if (sp == NULL)
> + return -rte_tm_error_set(error,
> + ENOMEM,
> + RTE_TM_ERROR_TYPE_UNSPECIFIED,
> + NULL,
> + rte_strerror(ENOMEM));
> +
> + /* Fill in */
> + sp->shaper_profile_id = shaper_profile_id;
> + memcpy(&sp->params, profile, sizeof(sp->params));
> +
> + /* Add to list */
> + TAILQ_INSERT_TAIL(spl, sp, node);
> + p->soft.tm.h.n_shaper_profiles++;
> +
> + return 0;
> +}
> +

> +
> +static struct tm_node *
> +tm_shared_shaper_get_tc(struct rte_eth_dev *dev,
> + struct tm_shared_shaper *ss)
> +{
> + struct pmd_internals *p = dev->data->dev_private;
> + struct tm_node_list *nl = &p->soft.tm.h.nodes;
> + struct tm_node *n;
> +
> + TAILQ_FOREACH(n, nl, node) {
> + if ((n->level != TM_NODE_LEVEL_TC) ||
> + (n->params.n_shared_shapers == 0) ||
> + (n->params.shared_shaper_id[0] != ss-
> >shared_shaper_id))
According to node_add_check_tc, only one shared shaper supported, right? Better 
adding some comments here?

> + continue;
> +
> + return n;
> + }
> +
> + return NULL;
> +}



> +
> +static void
> +pipe_profile_build(struct rte_eth_dev *dev,
> + struct tm_node *np,
> + struct rte_sched_pipe_params *pp)
> +{
> + struct pmd_internals *p = dev->data->dev_private;
> + struct tm_hierarchy *h = &p->soft.tm.h;
> + struct tm_node_list *nl = &h->nodes;
> +

Re: [dpdk-dev] [PATCH v4 0/2] Dynamically configure mempool handle

2017-09-25 Thread Olivier MATZ

Hi Santosh,

On Tue, Sep 19, 2017 at 01:58:14PM +0530, santosh wrote:
> Hi Olivier,
> 
> 
> On Wednesday 13 September 2017 03:30 PM, santosh wrote:
> > Hi Olivier,
> >
> >
> > On Monday 11 September 2017 08:48 PM, Santosh Shukla wrote:
> >> v4:
> >> - Includes v3 review coment changes.
> >>   Patches rebased on 06791a4bce: ethdev: get the supported pools for a 
> >> port 
> > are you fine with v4? Thanks.
> >
> Ping? Thanks.

Really sorry for the late review.
I'm sending some minor comments, hoping the patches can be
integrated in RC1.

Olivier

Re: [dpdk-dev] [PATCH v4 1/2] eal: allow user to override default pool handle

2017-09-25 Thread Olivier MATZ

On Mon, Sep 11, 2017 at 08:48:36PM +0530, Santosh Shukla wrote:
> DPDK has support for both sw and hw mempool and
> currently user is limited to use ring_mp_mc pool.
> In case user want to use other pool handle,
> need to update config RTE_MEMPOOL_OPS_DEFAULT, then
> build and run with desired pool handle.
> 
> Introducing eal option to override default pool handle.
> 
> Now user can override the RTE_MEMPOOL_OPS_DEFAULT by passing
> pool handle to eal `--mbuf-pool-ops=""`.
> 
> Signed-off-by: Santosh Shukla 
> Acked-by: Hemant Agrawal 
>
> [...]
>
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -82,7 +82,7 @@ struct internal_config {
>   volatile enum rte_intr_mode vfio_intr_mode;
>   const char *hugefile_prefix;  /**< the base filename of hugetlbfs 
> files */
>   const char *hugepage_dir; /**< specific hugetlbfs directory to 
> use */
> -
> + const char *mbuf_pool_name;   /**< mbuf pool name */
>   unsigned num_hugepage_sizes;  /**< how many sizes on this system */
>   struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
>  };

What do you think about mbuf_pool_ops_name instead?

I'm afraid of the confusion we could have with the name
of the mempool.

[dpdk-dev] [PATCH v3] librte_eal: fix wrong assert for arm and ppc

2017-09-25 Thread Lukasz Majczak

The assertion of return value from the open() function is done against
0, while it is a correct value - open() returns -1 in case of an error.
It causes problems while trying to run as a daemon, in which case, this
call to open() will return 0 as a valid descriptor.

Fixes: b94e5c9406b5 ("eal/arm: add CPU flags for ARMv7")
Fixes: 97523f822ba9 ("eal/arm: add CPU flags for ARMv8")
Fixes: 9ae155385686 ("eal/ppc: cpu flag checks for IBM Power")

Signed-off-by: Lukasz Majczak 
---
 lib/librte_eal/common/arch/arm/rte_cpuflags.c| 2 +-
 lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c 
b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
index 5636e9c1d..88f1cbe37 100644
--- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -137,7 +137,7 @@ rte_cpu_get_features(hwcap_registers_t out)
_Elfx_auxv_t auxv;
 
auxv_fd = open("/proc/self/auxv", O_RDONLY);
-   assert(auxv_fd);
+   assert(auxv_fd != -1);
while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
if (auxv.a_type == AT_HWCAP) {
out[REG_HWCAP] = auxv.a_un.a_val;
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c 
b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
index fcf96e045..970a61c5e 100644
--- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
@@ -108,7 +108,7 @@ rte_cpu_get_features(hwcap_registers_t out)
Elf64_auxv_t auxv;
 
auxv_fd = open("/proc/self/auxv", O_RDONLY);
-   assert(auxv_fd);
+   assert(auxv_fd != -1);
while (read(auxv_fd, &auxv,
sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
if (auxv.a_type == AT_HWCAP)
-- 
2.14.1

Re: [dpdk-dev] [PATCH v4 2/2] ethdev: get the supported pools for a port

2017-09-25 Thread Olivier MATZ

Hi,

On Mon, Sep 11, 2017 at 08:48:37PM +0530, Santosh Shukla wrote:
> Now that dpdk supports more than one mempool drivers and
> each mempool driver works best for specific PMD, example:
> - sw ring based mempool for Intel PMD drivers.
> - dpaa2 HW mempool manager for dpaa2 PMD driver.
> - fpa HW mempool manager for Octeontx PMD driver.
> 
> Application would like to know the best mempool handle
> for any port.
> 
> Introducing rte_eth_dev_pools_ops_supported() API,
> which allows PMD driver to advertise
> his supported pools capability to the application.
> 
> Supported pools are categorized in below priority:-
> - Best mempool handle for this port (Highest priority '0')
> - Port supports this mempool handle (Priority '1')
> 
> Signed-off-by: Santosh Shukla 
>
> [...]
>
> +int
> +rte_eth_dev_pools_ops_supported(uint8_t port_id, const char *pool)

pools -> pool?

> +{
> + struct rte_eth_dev *dev;
> + const char *tmp;
> +
> + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> +
> + if (pool == NULL)
> + return -EINVAL;
> +
> + dev = &rte_eth_devices[port_id];
> +
> + if (*dev->dev_ops->pools_ops_supported == NULL) {
> + tmp = rte_eal_mbuf_default_mempool_ops();
> + if (!strcmp(tmp, pool))
> + return 0;
> + else
> + return -ENOTSUP;

I don't understand why we are comparing with
rte_eal_mbuf_default_mempool_ops().

It means that the result of the function would be influenced
by the parameter given by the user.

I think that a PMD that does not implement ->pools_ops_supported
should always return 1 (mempool is supported).


> + }
> +
> + return (*dev->dev_ops->pools_ops_supported)(dev, pool);
> +}
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 0adf3274a..d90029b1e 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -1425,6 +1425,10 @@ typedef int (*eth_get_dcb_info)(struct rte_eth_dev 
> *dev,
>struct rte_eth_dcb_info *dcb_info);
>  /**< @internal Get dcb information on an Ethernet device */
>  
> +typedef int (*eth_pools_ops_supported_t)(struct rte_eth_dev *dev,
> + const char *pool);
> +/**< @internal Get the supported pools for a port */
> +

The comment should be something like:
Test if a port supports specific mempool ops.

>  /**
>   * @internal A structure containing the functions exported by an Ethernet 
> driver.
>   */
> @@ -1544,6 +1548,8 @@ struct eth_dev_ops {
>  
>   eth_tm_ops_get_t tm_ops_get;
>   /**< Get Traffic Management (TM) operations. */
> + eth_pools_ops_supported_t pools_ops_supported;
> + /**< Get the supported pools for a port */

Same

>  };
>  
>  /**
> @@ -4436,6 +4442,24 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id,
>uint16_t *nb_rx_desc,
>uint16_t *nb_tx_desc);
>  
> +
> +/**
> + * Get the supported pools for a port

Same

> + *
> + * @param port_id
> + *   Port identifier of the Ethernet device.
> + * @param [in] pool
> + *   The supported pool handle for this port.

The name of the pool operations to test

> + *   Maximum length of pool handle name is RTE_MEMPOOL_OPS_NAMESIZE.

I don't think we should keep this



Thanks,
Olivier

Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

2017-09-25 Thread Zhao1, Wei

Hi, Ferruh



> -Original Message-

> From: Yigit, Ferruh

> Sent: Wednesday, September 20, 2017 6:36 PM

> To: Zhao1, Wei ; dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

>

> On 9/15/2017 4:13 AM, Wei Zhao wrote:

> > This feature enable queue regions configuration for RSS in PF/VF, so

> > that different traffic classes or different packet classification

> > types can be separated to different queues in different queue

> > regions.This patch can set queue region range, it include queue number

> > in a region and the index of first queue.

>

> Is following correct:

> So instead of distributing packets to the multiple queues, this will 
> distribute

> packets into queue reqions which may consists of multiple queues.

>

> If so, is there a way to control how packets distributed within same queue

> region to multiple queues?

>



distributed within same queue region is based on a rss algorithm, it is 
implemented by NIC self, software can not control it.





> And is this feature only supported with RSS? Can it be part of RSS

> configuration instead of PMD specific API?

>



Yes, only valid after rss enable.May be in the feature, in rte_flow mode, we 
can combine them together.



> > This patch enable mapping between different priorities (UP) and

>

> User priorities (UP)



oK, change in v4



>

> > different traffic classes.It also enable mapping between a region

> > index and a sepcific flowtype(PCTYPE).It also provide the solution of

> > flush all configuration about queue region the above described.

> >

> > Signed-off-by: Wei Zhao mailto:wei.zh...@intel.com>>

> > ---

> >  drivers/net/i40e/i40e_ethdev.c|  19 +-

> >  drivers/net/i40e/i40e_ethdev.h|  30 ++

> >  drivers/net/i40e/rte_pmd_i40e.c   | 482

> ++

> >  drivers/net/i40e/rte_pmd_i40e.h   |  38 +++

> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +

> >  5 files changed, 566 insertions(+), 4 deletions(-)

> >

>

> <...>

>

> > +static int

> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,

> > +struct i40e_pf *pf)

> > +{

> > +  uint16_t i;

> > +  struct i40e_vsi *vsi = pf->main_vsi;

> > +  uint16_t queue_offset, bsf, tc_index;

> > +  struct i40e_vsi_context ctxt;

> > +  struct i40e_aqc_vsi_properties_data *vsi_info;

> > +  struct i40e_queue_region_info *region_info =

> > +  &pf->queue_region;

> > +  uint32_t ret = -EINVAL;

> > +

> > +  if (!region_info->queue_region_number) {

> > +  PMD_INIT_LOG(ERR, "there is no that region id been 
> > set

> before");

>

> Can you please re-check the log message.



oK, change in v4



>

> > +  return ret;

> > +  }

> > +

> > +  memset(&ctxt, 0, sizeof(struct i40e_vsi_context));

> > +

> > +  /* Update Queue Pairs Mapping for currently enabled UPs */

> > +  ctxt.seid = vsi->seid;

> > +  ctxt.pf_num = hw->pf_id;

> > +  ctxt.vf_num = 0;

> > +  ctxt.uplink_seid = vsi->uplink_seid;

> > +  ctxt.info = vsi->info;

> > +  vsi_info = &ctxt.info;

> > +

> > +  memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);

> > +  memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);

> > +

> > +  /**

> > +  * Configure queue region and queue mapping parameters,

> > +  * for enabled queue region, allocate queues to this region.

> > +  */

> > +

> > +  for (i = 0; i < region_info->queue_region_number; i++) {

> > +  tc_index = region_info->region[i].region_id;

> > +  bsf = rte_bsf32(region_info->region[i].queue_num);

> > +  queue_offset = 
> > region_info->region[i].queue_start_index;

> > +  vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(

> > +  (queue_offset <<

> I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |

> > +  (bsf <<

> I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));

> > +  }

> > +

> > +  /* Associate queue number with VSI, Keep vsi->nb_qps unchanged

> */

> > +  if (vsi->type == I40E_VSI_SRIOV) {

> > +  vsi_info->mapping_flags |=

> > +

> rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);

> > +  for (i = 0; i < vsi->nb_qps; i++)

> > +  vsi_info->queue_mapping[i] =

> > +  
> > rte_cpu_to_le_16(vsi->base_queue + i);

> > +  } else {

> > +  vsi_info->mapping_flags |=

> > +

> rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);

> > +  vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi-

> >base_queue);

> > +  }

> > +  vsi_info->valid_secti

Re: [dpdk-dev] [PATCH] app/testpmd: adds mlockall() to fix pages

2017-09-25 Thread Olivier MATZ

On Thu, Sep 21, 2017 at 02:24:28PM +0200, Eelco Chaudron wrote:
> On 19/09/17 09:28, Olivier MATZ wrote:
> > Hi,
> > 
> > On Thu, Sep 14, 2017 at 09:22:38AM +0200, Eelco Chaudron wrote:
> > > On 13/09/17 11:39, Thomas Monjalon wrote:
> > > > 12/09/2017 15:08, Eelco Chaudron:
> > > > > Call the mlockall() function, to attempt to lock all of its process
> > > > > memory into physical RAM, and preventing the kernel from paging any
> > > > > of its memory to disk.
> > > > > 
> > > > > When using testpmd for performance testing, depending on the code path
> > > > > taken, we see a couple of page faults in a row. These faults effect
> > > > > the overall drop-rate of testpmd. On Linux the mlockall() call will
> > > > > prefault all the pages of testpmd (and the DPDK libraries if linked
> > > > > dynamically), even without LD_BIND_NOW.
> > > > Does it work on FreeBSD?
> > > I do not have a FreeBSD setup, but from the documentation I've read the 
> > > call
> > > is supported by FreeBSD.
> > > If some one has a working setup, please give this patch a quick try.
> > > > Is there any drawback?
> > > > Do we need to add an option for it?
> > > The only drawback I can think of is that with this change memory phyiscal
> > > memory is consumed as pages are pre-loaded.
> > > For testpmd (just loaded not doing anything) this is 2MB vs 35MB of memory
> > > used. I do not think this yields an extra option.
> > One small comment: the call to mlockall() will fail if we don't have
> > the permissions. I guess it's not an issue, but I wonder if we should
> > log it?
> I did not at add a log, as the rte log subsystem is not yet initialized.
> However we could add a printf("WARNING: mlockall() failed with error %s")
> like message. What do you think?

Since it's not critical, maybe NOTICE is better than WARNING.

One more question: what would be the drawback of calling
mlockall() after eal_init()? (rte_log would be initialized)


Thanks,
Olivier

Re: [dpdk-dev] Cannot allocate mempool with --no-huge since DPDK 16.07

2017-09-25 Thread Olivier MATZ

Hi Tom,

On Thu, Sep 21, 2017 at 05:21:08PM +0200, tom.barbe...@ulg.ac.be wrote:
> Hi all,
> 
> I get a EINVAL since DPDK 16.07 when trying to allocate a mempool with 
> rte_pktmbuf_pool_create. This is when I use --no-huge --vdev=eth_ring0. But 
> this happens before accessing the virtual device in any ways.
> 
> Which, btw shows the documentation is wrong as it indicates : 
> EINVAL - cache size provided is too large, or priv_size is not aligned.
> Which is not the problem here (tried cache_size of 0, 64,256 and priv_size is 
> 0). EINVAL can obviously come from elswhere.
> 
> Any idea? Something changed in involved subsystems since 16.07?

I first though your issue was related to this one:
http://dpdk.org/ml/archives/dev/2017-June/067678.html

But maybe it's just a memory issue. Can you try with less mbufs?
  testpmd --no-huge [...] -- --total-num-mbufs=4096 [...]

If that fixes the issue, I'll check if we can change the return
value to something clearer (ENOMEM?).

Thanks,
Olivier

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:
> Failure happens on build using:
> gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)

Yes, that case is missed. What do you think about following one:

  @@ -561,7 +561,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
  unsigned int blockcnt,
  unsigned int framesize,
  unsigned int framecnt,
  -  unsigned int qdisc_bypass,
  +  unsigned int qdisc_bypass __rte_unused,
  struct pmd_internals **internals,
  struct rte_eth_dev **eth_dev,
  struct rte_kvargs *kvlist)

> 
> Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")
> 
> Signed-off-by: Andrew Rybchenko 
> ---
> May be the right solution in fact remove PACKET_QDISC_BYPASS conditional
> completely. If below solution is accepted, feel free to squash it into
> the original patch.

It is a little to late for this, I already sent a pull-request with this
patch. So fix will need to be a separate patch.

<...>

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Andrew Rybchenko


On 09/25/2017 11:42 AM, Ferruh Yigit wrote:

On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:

Failure happens on build using:
gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)

Yes, that case is missed. What do you think about following one:

   @@ -561,7 +561,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
   unsigned int blockcnt,
   unsigned int framesize,
   unsigned int framecnt,
   -  unsigned int qdisc_bypass,
   +  unsigned int qdisc_bypass __rte_unused,
   struct pmd_internals **internals,
   struct rte_eth_dev **eth_dev,
   struct rte_kvargs *kvlist)


It is OK for me as well, but I've not chosen it since it could be treated as
always unused.


Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")

Signed-off-by: Andrew Rybchenko 
---
May be the right solution in fact remove PACKET_QDISC_BYPASS conditional
completely. If below solution is accepted, feel free to squash it into
the original patch.

It is a little to late for this, I already sent a pull-request with this
patch. So fix will need to be a separate patch.


Sad, if it means that the build will be broken in main DPDK repo as well.
Not critical, but a bit inconvenient.


<...>

Re: [dpdk-dev] [PATCH v4] devtools: rework abi checker script

2017-09-25 Thread Olivier MATZ

On Thu, Sep 21, 2017 at 11:40:35AM -0400, Neil Horman wrote:
> On Wed, Sep 20, 2017 at 11:12:53AM +0200, Olivier Matz wrote:
> > The initial version of the script had some limitations:
> > - cannot work on a non-clean workspace
> > - environment variables are not documented
> > - no compilation log in case of failure
> > - return success even it abi is incompatible
> >
> > This patch addresses these issues and rework the code.
> >
> > Signed-off-by: Olivier Matz 
> > ---
> >
> > v3->v4:
> > - clarify logs on incompatible abi
> > - log when an error returned an error
> > - [really] fix the report path
> > - log the output of make config in the proper file
> >
> > v2->v3:
> > - fix when not launched from dpdk root dir
> > - use "-Og -Wno-error" instead of "-O0"
> > - fix typo in commit log
> >
> > v1->v2:
> > - use /usr/bin/env to find bash (which is required)
> > - fix displayed path to html reports
> > - reword help for -f option
> >
> >
> >  devtools/validate-abi.sh | 397 
> > ---
> >  1 file changed, 205 insertions(+), 192 deletions(-)
> >
> This looks better, thank you for the iterations.  One last note: The abi 
> dumper
> utility errors out with error code of 12 if a given object has no exported
> symbols, and I see a few of those.  You may want to consider catching that
> error, logging an appropriate message and skipping the error emit.  That can 
> be
> handled later though, as its a corner case.  I'd go with this patch, and then
> do a incremental improvement later

Unfortunately the error code 12 does not exist on my version of abi-dumper
(debian stable, v0.99.16). I'm currently doing this as a workaround:

cmd $abidump ${i} -o $dst/${1}/${i}.dump -lver ${1} || true
# hack to ignore empty SymbolsInfo section (no public ABI)
if grep -q "'SymbolInfo' => {}," $dst/${1}/${i}.dump 2> /dev/null; then
log "INFO" "${i} has no public ABI, remove dump file"
cmd rm -f $dst/${1}/${i}.dump
fi

I tested with the latest abi-dumper version, and I indeed see
these errors in the logs. It seems we don't go inside the 'if'
above with a recent abi-dumper, and the .dump file is not generated.

I can add a check to display the same additional log
"INFO" "${i} has no public ABI, remove dump file" if abi-dumper
returns 12. Something like this:

ret=0
cmd $abidump ${i} -o $dst/${1}/${i}.dump -lver ${1} || ret=$?
# hack to ignore empty SymbolsInfo section (no public ABI)
if [ ${ret} = 12 ]; then
log "INFO" "${i} has no public ABI"
fi
if grep -q "'SymbolInfo' => {}," $dst/${1}/${i}.dump 2> /dev/null; then
log "INFO" "${i} has no public ABI, remove dump file"
cmd rm -f $dst/${1}/${i}.dump
fi


Olivier

Re: [dpdk-dev] [PATCH v3 1/8] mbuf: support GTP in software packet type parser

2017-09-25 Thread Olivier MATZ

On Sat, Sep 23, 2017 at 06:35:07AM +0800, Beilei Xing wrote:
> Add support of GTP-C and GTP-U tunnels in rte_net_get_ptype().
> 
> Signed-off-by: Beilei Xing 

Acked-by: Olivier Matz

Re: [dpdk-dev] [PATCH v3 2/2] app/testpmd: add API for configuration of queue region

2017-09-25 Thread Zhao1, Wei

Hi, Ferruh

> -Original Message-
> From: Yigit, Ferruh
> Sent: Wednesday, September 20, 2017 6:46 PM
> To: Zhao1, Wei ; dev@dpdk.org; Wu, Jingjing
> 
> Subject: Re: [dpdk-dev] [PATCH v3 2/2] app/testpmd: add API for
> configuration of queue region
> 
> On 9/15/2017 4:13 AM, Wei Zhao wrote:
> > This patch add a API configuration of queue region in rss.
> > It can parse the parameters of region index, queue number, queue start
> > index, user priority, traffic classes and so on.
> > According to commands from command line, it will call i40e private API
> > and start the process of set or flush queue region configure. As this
> > feature is specific for i40e, so private API will be used.
> >
> > Signed-off-by: Wei Zhao 
> > ---
> >  app/test-pmd/cmdline.c | 328
> > +
> 
> Testpmd documentation also needs to be updated.

Do you mean the following doc or others?
dpdk\doc\guides\testpmd_app_ug.rst


> 
> >  1 file changed, 328 insertions(+)
> >
> > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > 0144191..060fcb1 100644
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -637,6 +637,21 @@ static void cmd_help_long_parsed(void
> *parsed_result,
> > "ptype mapping update (port_id) (hw_ptype)
> (sw_ptype)\n"
> > "Update a ptype mapping item on a port\n\n"
> >
> > +   "queue-region set port (port_id) region_id (value) "
> > +   "queue_start_index (value) queue_num (value)\n"
> > +   "Set a queue region on a port\n\n"
> > +
> > +   "queue-region set (pf|vf) port (port_id) region_id
> (value) "
> > +   "flowtype (value)\n"
> > +   "Set a flowtype region index on a port\n\n"
> > +
> > +   "queue-region set port (port_id) UP (value)
> region_id (value)\n"
> > +   "Set the mapping of User Priority to "
> > +   "queue region on a port\n\n"
> > +
> > +   "queue-region flush (on|off) port (port_id)\n"
> > +   "flush all queue region related configuration\n\n"
> 
> I keep doing same comment but I will do it again...
> 
> Each patch adding a new feature looking from its own context and adding a
> new root level command and this is making overall testpmd confusing.
> 
> Since this is to set an option of the port, what do you think making this
> command part of existing commands, like:
> "port config #P queue-region "
> OR
> "set port #P queue-region ..." ?

What you said is very meaningful, but other feature liake ptype mapping use the 
same mode  and so on.
maybe we should do a whole work to make CLI command style consistent.


> 
> > +
> > , list_pkt_forwarding_modes()
> > );
> > }
> > @@ -8224,6 +8239,315 @@ cmdline_parse_inst_t cmd_syn_filter = {
> > NULL,
> > },
> >  };
> > +/* *** queue region set *** */
> > +struct cmd_queue_region_result {
> > +   cmdline_fixed_string_t cmd;
> > +   cmdline_fixed_string_t set;
> > +   cmdline_fixed_string_t port;
> > +   uint8_t  port_id;
> > +   cmdline_fixed_string_t region;
> > +   uint8_t  region_id;
> > +   cmdline_fixed_string_t queue_start_index;
> > +   uint8_t  queue_id;
> > +   cmdline_fixed_string_t queue_num;
> > +   uint8_t  queue_num_value;
> > +};
> > +
> > +static void
> > +cmd_queue_region_parsed(void *parsed_result,
> > +   __attribute__((unused)) struct cmdline *cl,
> > +   __attribute__((unused)) void *data) {
> > +   struct cmd_queue_region_result *res = parsed_result;
> > +   struct rte_i40e_rss_region_conf region_conf;
> > +   int ret = 0;
> > +
> > +   memset(®ion_conf, 0, sizeof(region_conf));
> > +   region_conf.op = RTE_PMD_I40E_QUEUE_REGION_SET;
> > +   region_conf.region_id = res->region_id;
> > +   region_conf.queue_num = res->queue_num_value;
> > +   region_conf.queue_start_index = res->queue_id;
> > +
> > +   ret = rte_pmd_i40e_queue_region_conf(res->port_id,
> ®ion_conf);
> 
> It is not safe to directly call PMD specific APIs from testpmd? What if that
> PMD is not enabled? There are samples how to do this, can you please check
> them?
> 

Good idea, I will add check code in PMD code to make sure RSS is enable.
If not, code will return fail.


> <...>

Re: [dpdk-dev] [dpdk-stable] [PATCH] net/vmxnet3: fix dereference before null check

2017-09-25 Thread Jastrzebski, MichalX K

> -Original Message-
> From: Yigit, Ferruh
> Sent: Friday, September 22, 2017 6:39 PM
> To: Jastrzebski, MichalX K ;
> skh...@vmware.com
> Cc: dev@dpdk.org; Jain, Deepak K ; Kulasek,
> TomaszX ; yongw...@vmware.com;
> sta...@dpdk.org
> Subject: Re: [dpdk-stable] [PATCH] net/vmxnet3: fix dereference before null
> check
> 
> On 9/22/2017 1:39 PM, Michal Jastrzebski wrote:
> > From: Tomasz Kulasek 
> >
> > Coverity error:
> >
> > check_after_deref: Null-checking rq suggests that it may be null, but it
> >has already been dereferenced on all paths leading to
> >the check.
> >
> > This patch moves NULL checking of "rq" at the very beginning of the path
> > before any dereference.
> >
> > Coverity issue: 143468
> > Fixes: 5aecdc17a97d ("vmxnet3: fix stop/restart")
> > Cc: yongw...@vmware.com
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Tomasz Kulasek 
> > ---
> >  drivers/net/vmxnet3/vmxnet3_rxtx.c | 17 -
> >  1 file changed, 8 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c
> b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> > index d9cf437..4fcceb4 100644
> > --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
> > +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
> > @@ -259,17 +259,16 @@
> >  {
> > int i;
> > vmxnet3_rx_queue_t *rq = rxq;
> > -   struct vmxnet3_hw *hw = rq->hw;
> > struct vmxnet3_cmd_ring *ring0, *ring1;
> > struct vmxnet3_comp_ring *comp_ring;
> > -   struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
> > int size;
> >
> > -   if (rq != NULL) {
> 
> vmxnet3_dev_rx_queue_reset() is static function and only called from
> single function [1], which already checks if the parameter is NULL.
> 
> What do you think just removing this check and keep rest same?
> 
> [1]
> vmxnet3_dev_clear_queues()
> 

Hi Ferruh,
Ok, we can try to do this as You suggest - we will see 
what coverity tells about that in the next build.
As long as vmxnet3_dev_clear_queues() checks if the parameter is NULL,
We can also classify this issue as False/Positive 
if our solution in this iteration doesn't help.

Michal.

> > -   /* Release both the cmd_rings mbufs */
> > -   for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
> > -   vmxnet3_rx_cmd_ring_release_mbufs(&rq-
> >cmd_ring[i]);
> > -   }
> > +   if (rq == NULL)
> > +   return;
> > +
> > +   /* Release both the cmd_rings mbufs */
> > +   for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
> > +   vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
> >
> > ring0 = &rq->cmd_ring[0];
> > ring1 = &rq->cmd_ring[1];
> > @@ -287,8 +286,8 @@
> >
> > size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
> > size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
> > -   if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
> > -   size += rq->data_desc_size * data_ring->size;
> > +   if (VMXNET3_VERSION_GE_3(rq->hw) && rq->data_desc_size)
> > +   size += rq->data_desc_size * rq->data_ring.size;
> >
> > memset(ring0->base, 0, size);
> >  }
> >

Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 8:40 AM, Zhao1, Wei wrote:
> Hi, Ferruh
> 
>> -Original Message-
>> From: Yigit, Ferruh
>> Sent: Wednesday, September 20, 2017 6:36 PM
>> To: Zhao1, Wei ; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and
> flush
>>
>> On 9/15/2017 4:13 AM, Wei Zhao wrote:
>> > This feature enable queue regions configuration for RSS in PF/VF, so
>> > that different traffic classes or different packet classification
>> > types can be separated to different queues in different queue
>> > regions.This patch can set queue region range, it include queue number
>> > in a region and the index of first queue.
>>
>> Is following correct:
>> So instead of distributing packets to the multiple queues, this will
> distribute
>> packets into queue reqions which may consists of multiple queues.
>>
>> If so, is there a way to control how packets distributed within same queue
>> region to multiple queues?
>>
> 
> distributed within same queue region is based on a rss algorithm, it is
> implemented by NIC self, software can not control it.

I was thinking RSS used to select queue region, but no first queue
region selected, later RSS used to select queue within the region.
Thanks for clarification.

<...>
   
> sizeof(vsi->info.tc_mapping));
>> > +  (void)rte_memcpy(&vsi->info.queue_mapping,
>> > +  &ctxt.info.queue_mapping,
>> > +  sizeof(vsi->info.queue_mapping));
>>
>> Please keep line allignment same with above line.
> 
> oK, change in v4, but can not the same.
> (void)rte_memcpy(&vsi->info.queue_mapping, &ctxt.info.queue_mapping,
>        
> sizeof(vsi->info.queue_mapping));
> will over 80 maxnumber.

Someting like following, new lines aligned:

(void)rte_memcpy(&vsi->info.queue_mapping,
&ctxt.info.queue_mapping,
sizeof(vsi->info.queue_mapping));

<...>

>> > +  if ((i == info->queue_region_number) &&
>> > +  (i <= I40E_REGION_MAX_INDEX)) {
>>
>>
>> Please use one more level indentaion here, and pharantesis looks extra can
>> you please double check?
> 
> You mean,you want the following moede ?
>     if (i == info->queue_region_number) {
>   if(i <= I40E_REGION_MAX_INDEX) {
>   ...
>   }
>}
> 

No, continuation of the "if" should be easy to recognized from body of
the "if", something like:

if ((i == info->queue_region_number) &&
(i <= I40E_REGION_MAX_INDEX)) {
info->region[i].region_id = conf_ptr->region_id;
info->region[i].queue_num = conf_ptr->queue_num;

<...>


>> > +static int
>> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,
>> > +  struct i40e_pf *pf, struct
> rte_i40e_rss_region_conf
>> *conf_ptr)
>>
>> What do you think starting all functions, even they are static, with
>> "i40e_queue_region_" prefix? 
>     ret = i40e_set_queue_region(pf, conf_ptr);
>     break;
>     case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
>     ret = i40e_set_region_flowtype_pf(hw,
> pf, conf_ptr);
>     break;
>     case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
>     ret = -EINVAL;
>     break;
>     case RTE_PMD_I40E_UP_REGION_SET:
>     ret = i40e_set_up_region(pf, conf_ptr);
> 
> I have use the format ofi40e_set_  as prefix?
> Because many set is not related directly to queue region.

When looking to this patch what functions are doing is clear, but it
also should be clear when looking the source code some time later what
that function is about.

That is why I believe a namespace is useful, like
"i40e_queue_region_xx", so you can immediately say this function is
about "RSS queue region" feature.

This function is "rte_pmd_i40e_queue_region_conf", it is applying
provided queue region configuration based of region op type, so I
assumed all these functions are directly related to the fature.

If you believe function name will be wrong with that prefix, of course
just don't use it, but please try to stick with a name space, that will
make easy to understand these features in all source code.

<...>


>> > +
>> > +  memset(&hw->local_dcbx_config, 0,
>> > +  sizeof(struct i40e_dcbx_config));
>>
>> Wrong indentation.

Reminder of this one incase missed.

<...>

>> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,
>> > +  struct
> rte_i40e_rss_region_conf *conf_ptr) {
>> > +  struct rte_eth_dev *dev = &rte_eth_devices[port];
>>
>> you need to verify port_id, since this is public API now. Please check
> other
>> API

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Bruce Richardson

On Mon, Sep 25, 2017 at 09:42:56AM +0100, Ferruh Yigit wrote:
> On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:
> > Failure happens on build using:
> > gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)
> 
> Yes, that case is missed. What do you think about following one:
> 
>   @@ -561,7 +561,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>   unsigned int blockcnt,
>   unsigned int framesize,
>   unsigned int framecnt,
>   -  unsigned int qdisc_bypass,
>   +  unsigned int qdisc_bypass __rte_unused,
>   struct pmd_internals **internals,
>   struct rte_eth_dev **eth_dev,
>   struct rte_kvargs *kvlist)
> 
> > 
> > Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")
> > 
> > Signed-off-by: Andrew Rybchenko 
> > ---
> > May be the right solution in fact remove PACKET_QDISC_BYPASS conditional
> > completely. If below solution is accepted, feel free to squash it into
> > the original patch.
> 
> It is a little to late for this, I already sent a pull-request with this
> patch. So fix will need to be a separate patch.
> 
Pull request hasn't actually been pulled yet, so you should be able to
supercede it by a later one, right?

/Bruce

Re: [dpdk-dev] [PATCH v3 2/2] app/testpmd: add API for configuration of queue region

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 10:25 AM, Zhao1, Wei wrote:
> Hi, Ferruh
> 
>> -Original Message-
>> From: Yigit, Ferruh
>> Sent: Wednesday, September 20, 2017 6:46 PM
>> To: Zhao1, Wei ; dev@dpdk.org; Wu, Jingjing
>> 
>> Subject: Re: [dpdk-dev] [PATCH v3 2/2] app/testpmd: add API for
>> configuration of queue region
>>
>> On 9/15/2017 4:13 AM, Wei Zhao wrote:
>>> This patch add a API configuration of queue region in rss.
>>> It can parse the parameters of region index, queue number, queue start
>>> index, user priority, traffic classes and so on.
>>> According to commands from command line, it will call i40e private API
>>> and start the process of set or flush queue region configure. As this
>>> feature is specific for i40e, so private API will be used.
>>>
>>> Signed-off-by: Wei Zhao 
>>> ---
>>>  app/test-pmd/cmdline.c | 328
>>> +
>>
>> Testpmd documentation also needs to be updated.
> 
> Do you mean the following doc or others?
> dpdk\doc\guides\testpmd_app_ug.rst

Yes this one, thanks.

> 
> 
>>
>>>  1 file changed, 328 insertions(+)
>>>
>>> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
>>> 0144191..060fcb1 100644
>>> --- a/app/test-pmd/cmdline.c
>>> +++ b/app/test-pmd/cmdline.c
>>> @@ -637,6 +637,21 @@ static void cmd_help_long_parsed(void
>> *parsed_result,
>>> "ptype mapping update (port_id) (hw_ptype)
>> (sw_ptype)\n"
>>> "Update a ptype mapping item on a port\n\n"
>>>
>>> +   "queue-region set port (port_id) region_id (value) "
>>> +   "queue_start_index (value) queue_num (value)\n"
>>> +   "Set a queue region on a port\n\n"
>>> +
>>> +   "queue-region set (pf|vf) port (port_id) region_id
>> (value) "
>>> +   "flowtype (value)\n"
>>> +   "Set a flowtype region index on a port\n\n"
>>> +
>>> +   "queue-region set port (port_id) UP (value)
>> region_id (value)\n"
>>> +   "Set the mapping of User Priority to "
>>> +   "queue region on a port\n\n"
>>> +
>>> +   "queue-region flush (on|off) port (port_id)\n"
>>> +   "flush all queue region related configuration\n\n"
>>
>> I keep doing same comment but I will do it again...
>>
>> Each patch adding a new feature looking from its own context and adding a
>> new root level command and this is making overall testpmd confusing.
>>
>> Since this is to set an option of the port, what do you think making this
>> command part of existing commands, like:
>> "port config #P queue-region "
>> OR
>> "set port #P queue-region ..." ?
> 
> What you said is very meaningful, but other feature liake ptype mapping use 
> the same mode  and so on.
> maybe we should do a whole work to make CLI command style consistent.

Yes ptype does it, is seems it is one of the missed ones. Although we
can do a whole work for CLI commands, meanwhile I think new ones can be
added properly.

This may be good opportunity to remember broken window theory [1] :)

[1]
https://blog.codinghorror.com/the-broken-window-theory/

<...>

Re: [dpdk-dev] [PATCH v3 3/6] net/i40e: implement dynamic mapping of sw flow types to hw pctypes

2017-09-25 Thread Xing, Beilei

> -Original Message-
> From: Rybalchenko, Kirill
> Sent: Wednesday, September 20, 2017 10:33 PM
> To: dev@dpdk.org
> Cc: Rybalchenko, Kirill ; Chilikin, Andrey
> ; Xing, Beilei ; Wu,
> Jingjing 
> Subject: [PATCH v3 3/6] net/i40e: implement dynamic mapping of sw flow
> types to hw pctypes
> 
> Implement dynamic mapping of software flow types to hardware pctypes.
> This allows to add new flow types and pctypes for DDP without changing API
> of the driver. The mapping table is located in private data area for 
> particular
> network adapter and can be individually modified with set of appropriate
> functions.
> 
> v2:
> Re-arrange patchset to avoid compillation errors.
> Remove usage of statically defined flow types and pctypes.
> 
> v3:
> Changed prototypes of some static functions.
> Fixed bugs in i40e_pctype_to_flowtype and i40e_flowtype_to_pctype
> functions.
> Various small modifications after reviewing.
> 
> Signed-off-by: Kirill Rybalchenko 
> ---
>  drivers/net/i40e/i40e_ethdev.c| 343 
> --
>  drivers/net/i40e/i40e_ethdev.h|  17 +-
>  drivers/net/i40e/i40e_ethdev_vf.c |  16 +-
>  drivers/net/i40e/i40e_fdir.c  |  54 +++---
>  drivers/net/i40e/i40e_flow.c  |   5 +-
>  drivers/net/i40e/i40e_rxtx.c  |  57 +++
>  drivers/net/i40e/i40e_rxtx.h  |   1 +
>  7 files changed, 208 insertions(+), 285 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 18eac07..e396f73 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c



> 
>  static int
> -i40e_hash_global_config_check(struct rte_eth_hash_global_conf *g_cfg)
> +i40e_hash_global_config_check(struct rte_eth_hash_global_conf *g_cfg,
> +   struct i40e_adapter *adapter)

how about chaning the parameter order?
i40e_hash_global_config_check(struct i40e_adapter *adapter, struct 
rte_eth_hash_global_conf *g_cfg)?

>  {
>   uint32_t i;
> - uint32_t mask0, i40e_mask = I40E_FLOW_TYPES;
> + uint32_t mask0, i40e_mask = adapter->flow_types_mask;
> 
>   if (g_cfg->hash_func != RTE_ETH_HASH_FUNCTION_TOEPLITZ &&
>   g_cfg->hash_func !=
> RTE_ETH_HASH_FUNCTION_SIMPLE_XOR && @@ -7899,64 +7839,32 @@
> static int  i40e_set_hash_filter_global_config(struct i40e_hw *hw,
>  struct rte_eth_hash_global_conf *g_cfg)  {
> + struct i40e_adapter *adapter = (struct i40e_adapter *)hw->back;
>   int ret;
> - uint16_t i;
> + uint16_t i, j;
>   uint32_t reg;
> - uint32_t mask0 = g_cfg->valid_bit_mask[0];
> - enum i40e_filter_pctype pctype;
> + /*
> +  * We work only with lowest 32 bits which is not correct, but to work
> +  * properly the valid_bit_mask size should be increased up to 64 bits
> +  * and this will brake ABI. This modification will be done in next
> release
> +  */
> + uint32_t mask0 = g_cfg->valid_bit_mask[0] &
> +(uint32_t)adapter->flow_types_mask;
> 
>   /* Check the input parameters */
> - ret = i40e_hash_global_config_check(g_cfg);
> + ret = i40e_hash_global_config_check(g_cfg, adapter);
>   if (ret < 0)
>   return ret;
> 
> - for (i = 0; mask0 && i < UINT32_BIT; i++) {
> - if (!(mask0 & (1UL << i)))
> - continue;
> - mask0 &= ~(1UL << i);
> - /* if flowtype is invalid, continue */
> - if (!I40E_VALID_FLOW(i))
> - continue;
> - pctype = i40e_flowtype_to_pctype(i);
> - reg = (g_cfg->sym_hash_enable_mask[0] & (1UL << i)) ?
> - I40E_GLQF_HSYM_SYMH_ENA_MASK : 0;
> - if (hw->mac.type == I40E_MAC_X722) {
> - if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_UDP)
> {



> -   reg);
> - } else {
> - i40e_write_rx_ctl(hw,
> I40E_GLQF_HSYM(pctype),
> -   reg);
> + for (i = RTE_ETH_FLOW_UNKNOWN + 1; i < UINT32_BIT; i++) {

Should it be like following?
for (i = RTE_ETH_FLOW_UNKNOWN + 1; mask0 && i < UINT32_BIT; i++) {

> + if (mask0 & (1UL << i)) {
> + reg = (g_cfg->sym_hash_enable_mask[0] & (1UL <<
> i)) ?
> +
>   I40E_GLQF_HSYM_SYMH_ENA_MASK : 0;
> +
> + for (j = I40E_FILTER_PCTYPE_INVALID + 1;
> +  j < I40E_FILTER_PCTYPE_MAX; j++) {
> + if (adapter->pctypes_tbl[i] & (1ULL << j))
> + i40e_write_rx_ctl(hw,
> I40E_GLQF_HSYM(j), reg);
>   }
> - } else {
> - i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(pctype),
> reg);
>   }
>   }
> 
> @@ -8581,13 +8489,10 @@ i40e_filter_input_set_init(struct i40e_pf *pf)
> 
>   for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
>pctype <= I40E_FILTER_PCTYPE_L

Re: [dpdk-dev] [PATCH v1 10/10] net/i40e: set register for no drop

2017-09-25 Thread Hunt, David




On 25/9/2017 3:50 AM, Wu, Jingjing wrote:



-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of David Hunt
Sent: Saturday, August 26, 2017 12:02 AM
To: dev@dpdk.org
Cc: Hunt, David ; Marjanovic, Nemanja
; Sexton, Rory 
Subject: [dpdk-dev] [PATCH v1 10/10] net/i40e: set register for no drop

See the XL710 controller datasheet for more information on this register

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
  drivers/net/i40e/i40e_ethdev.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index d9806fc..24b713e 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -1156,7 +1156,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
 * in firmware in the future.
 */
i40e_configure_registers(hw);
-
+   I40E_WRITE_REG(hw, I40E_PRTDCB_TC2PFC, 0xff);

What is the relationship with VM power manager?

And about no-drop setting, it is the responsibility of flow control, please 
check http://www.dpdk.org/dev/patchwork/patch/19449/


Thanks
Jingjing


Hi Jingjing,
   Yes, we've removed this now. It's left to flow control. Will be 
removed from next patch set.

Rgds,
Dave.

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 10:40 AM, Bruce Richardson wrote:
> On Mon, Sep 25, 2017 at 09:42:56AM +0100, Ferruh Yigit wrote:
>> On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:
>>> Failure happens on build using:
>>> gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)
>>
>> Yes, that case is missed. What do you think about following one:
>>
>>   @@ -561,7 +561,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>>   unsigned int blockcnt,
>>   unsigned int framesize,
>>   unsigned int framecnt,
>>   -  unsigned int qdisc_bypass,
>>   +  unsigned int qdisc_bypass __rte_unused,
>>   struct pmd_internals **internals,
>>   struct rte_eth_dev **eth_dev,
>>   struct rte_kvargs *kvlist)
>>
>>>
>>> Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")
>>>
>>> Signed-off-by: Andrew Rybchenko 
>>> ---
>>> May be the right solution in fact remove PACKET_QDISC_BYPASS conditional
>>> completely. If below solution is accepted, feel free to squash it into
>>> the original patch.
>>
>> It is a little to late for this, I already sent a pull-request with this
>> patch. So fix will need to be a separate patch.
>>
> Pull request hasn't actually been pulled yet, so you should be able to
> supercede it by a later one, right?

Technically yes, and easy to do, but it will be confusing. I was taking
pull-request as code freeze in sub-tree.

If there are multiple pull-request for a tree, how can one be sure which
ones has been pulled and what to expect in main repo, and verify what
has been merged?

As above said, if Thomas thinks this is OK, I can send another pull request?

Re: [dpdk-dev] [PATCH v1 01/10] net/i40e: add API to convert VF Id to PF Id

2017-09-25 Thread Hunt, David



On 25/9/2017 3:43 AM, Wu, Jingjing wrote:



-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of David Hunt
Sent: Saturday, August 26, 2017 12:02 AM
To: dev@dpdk.org
Cc: Hunt, David ; Marjanovic, Nemanja
; Sexton, Rory 
Subject: [dpdk-dev] [PATCH v1 01/10] net/i40e: add API to convert VF Id to PF Id

Need a way to convert a vf id to a pf id on the host so as to query the pf for
relevant statistics which are used for the frequency changes in the
vm_power_manager app. Used when profiles are passed down from the guest
to the host, allowing the host to map the vfs to pfs.

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
  drivers/net/i40e/i40e_ethdev.c |  1 +
  drivers/net/i40e/i40e_rxtx.c   | 27 +++
  drivers/net/i40e/i40e_rxtx.h   |  1 +
  lib/librte_ether/rte_ethdev.h  | 11 +++
  4 files changed, 40 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5f26e24..8fb67d8 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -445,6 +445,7 @@ static const struct rte_pci_id pci_id_i40e_map[] = {  };

  static const struct eth_dev_ops i40e_eth_dev_ops = {
+   .vfid_to_pfid = i40e_vf_mac_to_vsi,
.dev_configure= i40e_dev_configure,
.dev_start= i40e_dev_start,
.dev_stop = i40e_dev_stop,
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
d42c23c..1379d5e 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -806,6 +806,33 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf
**rx_pkts, uint16_t nb_pkts)
return nb_rx;
  }

+uint64_t
+i40e_vf_mac_to_vsi(struct rte_eth_dev *dev, uint64_t vfid) {
+   struct ether_addr *vf_mac_addr = (struct ether_addr *)&vfid;
+   struct ether_addr *mac;
+   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

dev_private);

+   int vsi_id = 0, i, x;
+   struct i40e_pf_vf *vf;
+   uint16_t vf_num = pf->vf_num;
+
+   for (x = 0; x < vf_num; x++) {
+   int mac_addr_matches = 1;
+   vf = &pf->vfs[x];
+   mac = &vf->mac_addr;
+
+   for (i = 0; i < ETHER_ADDR_LEN; i++) {
+   if (mac->addr_bytes[i] != vf_mac_addr->addr_bytes[i])
+   mac_addr_matches = 0;
+   }
+   if (mac_addr_matches) {
+   vsi_id = vf->vsi->vsi_id;
+   return vsi_id;
+   }

vsi and vsi_id is not a common concept in API level.


Agreed. We're removing from the API level in next patch set version.


How about just return vf_id and rename the function like 
i40e_query_vf_id_by_mac?
In i40e driver, we can get the vsi_id by vf_id.


The next revision will just return the vf_id, and we'll rename the 
function.



+   }
+
+   return -1;

It's an ops to API, you need to use error code but not -1.


Will fix.

Thanks,
Dave,

Re: [dpdk-dev] [PATCH v1 02/10] net/i40e: add API to get received packet count

2017-09-25 Thread Hunt, David




On 25/9/2017 3:47 AM, Wu, Jingjing wrote:



-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of David Hunt
Sent: Saturday, August 26, 2017 12:02 AM
To: dev@dpdk.org
Cc: Hunt, David ; Marjanovic, Nemanja
; Sexton, Rory 
Subject: [dpdk-dev] [PATCH v1 02/10] net/i40e: add API to get received packet
count

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
  drivers/net/i40e/i40e_ethdev.c |  1 +
  drivers/net/i40e/i40e_rxtx.c   | 10 ++
  drivers/net/i40e/i40e_rxtx.h   |  1 +
  lib/librte_ether/rte_ethdev.h  | 19 +++
  4 files changed, 31 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 8fb67d8..d9806fc 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -446,6 +446,7 @@ static const struct rte_pci_id pci_id_i40e_map[] = {

  static const struct eth_dev_ops i40e_eth_dev_ops = {
.vfid_to_pfid = i40e_vf_mac_to_vsi,
+   .read_pf_stats= i40e_vsi_stats_read,
.dev_configure= i40e_dev_configure,
.dev_start= i40e_dev_start,
.dev_stop = i40e_dev_stop,
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
1379d5e..b7b64d2 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -833,6 +833,16 @@ i40e_vf_mac_to_vsi(struct rte_eth_dev *dev, uint64_t
vfid) {
return -1;
  }

+uint64_t
+i40e_vsi_stats_read(struct rte_eth_dev *dev, uint8_t vsi_id) {
+   struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-

dev_private);

+
+   uint64_t glv_uprch = I40E_READ_REG(hw,
+   I40E_GLV_UPRCH(vsi_id)) & 0x;
+   uint64_t glv_uprcl = I40E_READ_REG(hw, I40E_GLV_UPRCL(vsi_id));
+   return glv_uprcl + (glv_uprch << 32);
+}

You can change the input to vf_id, and then get the vsi_id internally.
Anyway, the counter registers are cleared when read. It will impact the
Ops like stats_get/ stats_reset.

We have func called i40e_update_vsi_stats which record the packets count. I 
think you can use it.


Thanks
Jingjing


We've changed to using the existing stats functions in the next revision 
of the patch. Simplifies things a bit.


Thanks,
Dave

Re: [dpdk-dev] [dpdk-stable] [PATCH] net/vmxnet3: fix dereference before null check

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 10:27 AM, Jastrzebski, MichalX K wrote:
>> -Original Message-
>> From: Yigit, Ferruh
>> Sent: Friday, September 22, 2017 6:39 PM
>> To: Jastrzebski, MichalX K ;
>> skh...@vmware.com
>> Cc: dev@dpdk.org; Jain, Deepak K ; Kulasek,
>> TomaszX ; yongw...@vmware.com;
>> sta...@dpdk.org
>> Subject: Re: [dpdk-stable] [PATCH] net/vmxnet3: fix dereference before null
>> check
>>
>> On 9/22/2017 1:39 PM, Michal Jastrzebski wrote:
>>> From: Tomasz Kulasek 
>>>
>>> Coverity error:
>>>
>>> check_after_deref: Null-checking rq suggests that it may be null, but it
>>>has already been dereferenced on all paths leading to
>>>the check.
>>>
>>> This patch moves NULL checking of "rq" at the very beginning of the path
>>> before any dereference.
>>>
>>> Coverity issue: 143468
>>> Fixes: 5aecdc17a97d ("vmxnet3: fix stop/restart")
>>> Cc: yongw...@vmware.com
>>> Cc: sta...@dpdk.org
>>>
>>> Signed-off-by: Tomasz Kulasek 
>>> ---
>>>  drivers/net/vmxnet3/vmxnet3_rxtx.c | 17 -
>>>  1 file changed, 8 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c
>> b/drivers/net/vmxnet3/vmxnet3_rxtx.c
>>> index d9cf437..4fcceb4 100644
>>> --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
>>> +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
>>> @@ -259,17 +259,16 @@
>>>  {
>>> int i;
>>> vmxnet3_rx_queue_t *rq = rxq;
>>> -   struct vmxnet3_hw *hw = rq->hw;
>>> struct vmxnet3_cmd_ring *ring0, *ring1;
>>> struct vmxnet3_comp_ring *comp_ring;
>>> -   struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
>>> int size;
>>>
>>> -   if (rq != NULL) {
>>
>> vmxnet3_dev_rx_queue_reset() is static function and only called from
>> single function [1], which already checks if the parameter is NULL.
>>
>> What do you think just removing this check and keep rest same?
>>
>> [1]
>> vmxnet3_dev_clear_queues()
>>
> 
> Hi Ferruh,
> Ok, we can try to do this as You suggest - we will see 
> what coverity tells about that in the next build.

What I understand is, coverity complains about dereferencing variable
(rq) before NULL check, so removing NULL check should fix the warning.

> As long as vmxnet3_dev_clear_queues() checks if the parameter is NULL,
> We can also classify this issue as False/Positive 
> if our solution in this iteration doesn't help.

Agreed, thanks.

> 
> Michal.
> 

<...>

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 10:53 AM, Ferruh Yigit wrote:
> On 9/25/2017 10:40 AM, Bruce Richardson wrote:
>> On Mon, Sep 25, 2017 at 09:42:56AM +0100, Ferruh Yigit wrote:
>>> On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:
 Failure happens on build using:
 gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)
>>>
>>> Yes, that case is missed. What do you think about following one:
>>>
>>>   @@ -561,7 +561,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
>>>   unsigned int blockcnt,
>>>   unsigned int framesize,
>>>   unsigned int framecnt,
>>>   -  unsigned int qdisc_bypass,
>>>   +  unsigned int qdisc_bypass __rte_unused,
>>>   struct pmd_internals **internals,
>>>   struct rte_eth_dev **eth_dev,
>>>   struct rte_kvargs *kvlist)
>>>

 Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")

 Signed-off-by: Andrew Rybchenko 
 ---
 May be the right solution in fact remove PACKET_QDISC_BYPASS conditional
 completely. If below solution is accepted, feel free to squash it into
 the original patch.
>>>
>>> It is a little to late for this, I already sent a pull-request with this
>>> patch. So fix will need to be a separate patch.
>>>
>> Pull request hasn't actually been pulled yet, so you should be able to
>> supercede it by a later one, right?
> 
> Technically yes, and easy to do, but it will be confusing. I was taking
> pull-request as code freeze in sub-tree.
> 
> If there are multiple pull-request for a tree, how can one be sure which
> ones has been pulled and what to expect in main repo, and verify what
> has been merged?
> 
> As above said, if Thomas thinks this is OK, I can send another pull request?

What about Thomas doing the squash while applying to the main repo, so
that main repo won't be broken and there won't be multiple pull requests
around?

Re: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight mode TCP/IPv4 GRO

2017-09-25 Thread Hu, Jiayu

Hi Lei,

> -Original Message-
> From: Yao, Lei A
> Sent: Wednesday, September 20, 2017 3:00 PM
> To: Hu, Jiayu ; dev@dpdk.org
> Cc: Yigit, Ferruh ; Ananyev, Konstantin
> ; Tan, Jianfeng ;
> Wu, Jingjing ; Hu, Jiayu 
> Subject: RE: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight
> mode TCP/IPv4 GRO
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Jiayu Hu
> > Sent: Sunday, September 3, 2017 2:30 PM
> > To: dev@dpdk.org
> > Cc: Yigit, Ferruh ; Ananyev, Konstantin
> > ; Tan, Jianfeng ;
> > Wu, Jingjing ; Hu, Jiayu 
> > Subject: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight
> > mode TCP/IPv4 GRO
> >
> > The GRO library provides two modes to reassemble packets. Currently, the
> > csum forwarding engine has supported to use the lightweight mode to
> > reassemble TCP/IPv4 packets. This patch introduces the heavyweight mode
> > for TCP/IPv4 GRO in the csum forwarding engine.
> >
> > With the command "set port  gro on|off", users can enable
> > TCP/IPv4 GRO for a given port. With the command "set gro flush ",
> > users can determine when the GROed TCP/IPv4 packets are flushed from
> > reassembly tables. With the command "show port  gro", users
> can
> > display GRO configuration.
> >
> > Signed-off-by: Jiayu Hu 
> Tested-by : Lei Yao
> 
> This patch has been tested on my bench, iperf test result  is as following:
> No-GRO: 8 Gbps
> Kernel GRO: 14.3 Gbps
> GRO flush 0 : 12.7 Gbps
> GRO flush 1: 16.8 Gbps
> But when I use 40G NIC and set GRO flush cycle as 2, sometimes
> the iperf traffic will stall for several seconds. Still need investigate.

This issue happens in high link speed environment, like 40Gbps. I am
looking into this issue now.

Current experiment data shows that the large flush cycle value causes
lots of duplicated TCP ACKs with same ACK numbers, which may be one
of the reasons for the poor TCP/IP stack performance.

Thanks,
Jiayu
> 
> > ---
> > changes in v3:
> > - remove "heavyweight mode" and "lightweight mode" from GRO
> > commands
> > - combine two patches into one
> > - use consistent help string for GRO commands
> > - remove the unnecessary command "gro set (max_flow_num)
> > (max_item_num_per_flow) (port_id)"
> > changes in v2:
> > - use "set" and "show" as the root level command
> > - add a new command to show GRO configuration
> > - fix l2_len/l3_len/l4_len unset etc. bugs
> >
> >  app/test-pmd/cmdline.c  | 206 
> > 
> >  app/test-pmd/config.c   |  67 +++--
> >  app/test-pmd/csumonly.c |  31 -
> >  app/test-pmd/testpmd.c  |  18 ++-
> >  app/test-pmd/testpmd.h  |  16 ++-
> >  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  45 --
> >  6 files changed, 263 insertions(+), 120 deletions(-)
> >
> > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> > index cd8c358..d628250 100644
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -423,13 +423,16 @@ static void cmd_help_long_parsed(void
> > *parsed_result,
> > "tso show (portid)"
> > "Display the status of TCP Segmentation
> > Offload.\n\n"
> >
> > -   "gro (on|off) (port_id)"
> > +   "set port (port_id) gro on|off\n"
> > "Enable or disable Generic Receive Offload in"
> > " csum forwarding engine.\n\n"
> >
> > -   "gro set (max_flow_num)
> > (max_item_num_per_flow) (port_id)\n"
> > -   "Set max flow number and max packet number
> > per-flow"
> > -   " for GRO.\n\n"
> > +   "show port (port_id) gro\n"
> > +   "Display GRO configuration.\n\n"
> > +
> > +   "set gro flush (cycles)\n"
> > +   "Set the cycle to flush GROed packets from"
> > +   " reassembly tables.\n\n"
> >
> > "set fwd (%s)\n"
> > "Set packet forwarding mode.\n\n"
> > @@ -3850,115 +3853,145 @@ cmdline_parse_inst_t
> cmd_tunnel_tso_show
> > = {
> >  };
> >
> >  /* *** SET GRO FOR A PORT *** */
> > -struct cmd_gro_result {
> > +struct cmd_gro_enable_result {
> > +   cmdline_fixed_string_t cmd_set;
> > +   cmdline_fixed_string_t cmd_port;
> > cmdline_fixed_string_t cmd_keyword;
> > -   cmdline_fixed_string_t mode;
> > -   uint8_t port_id;
> > +   cmdline_fixed_string_t cmd_onoff;
> > +   uint8_t cmd_pid;
> >  };
> >
> >  static void
> > -cmd_enable_gro_parsed(void *parsed_result,
> > +cmd_gro_enable_parsed(void *parsed_result,
> > __attribute__((unused)) struct cmdline *cl,
> > __attribute__((unused)) void *data)
> >  {
> > -   struct cmd_gro_result *res;
> > +   struct cmd_gro_enable_result *res;
> >
> > res = parsed_result;
> > -   setup_gro(res->mode, res->port_id);
> > -}
> > -
> > -cmdline_parse_token_stri

Re: [dpdk-dev] [PATCH 0/2] Multiple Pktmbuf mempool support

2017-09-25 Thread Olivier MATZ

Hi Hemant,

On Fri, Sep 22, 2017 at 12:43:36PM +0530, Hemant Agrawal wrote:
> On 7/4/2017 5:52 PM, Hemant Agrawal wrote:
> > This patch is in addition to the patch series[1] submitted by
> > Santosh to allow application to set mempool handle.
> > 
> > The existing pktmbuf pool create api only support the internal use
> > of "CONFIG_RTE_MBUF_DEFAULT_MEMPOOL_OPS", which assumes that the HW
> > can only support one type of mempool for packet mbuf.
> > 
> > There are multiple additional requirements.
> > 
> > 1. The platform independent image detects the underlying bus,
> > based on the bus and resource detected, it will dynamically select
> > the default mempool. This need not to have the application knowlege.
> > e.g. DPAA2 and DPAA are two different NXP platforms, based on the
> > underlying platform the default ops for mbuf can be dpaa or dpaa2.
> > Application should work seemlessly whether it is running on dpaa or dpaa2.
> > 
> > 2.Platform support more than one type of mempool for pktmbuf,
> > depend on the availability of resource, the driver can decide one
> > of the mempool for the current packet mbuf request.
> > 
> > 3. In case of where application is providing the mempool, as proposed
> > in [1], the check preference logic will be bypassed and application
> > config will take priority.
> > 
> > [1]Allow application set mempool handle
> > http://dpdk.org/ml/archives/dev/2017-June/067022.html
> > 
> > Hemant Agrawal (2):
> >   mempool: check the support for the given mempool
> >   mbuf: add support for preferred mempool list
> > 
> >  config/common_base   |  2 ++
> >  lib/librte_mbuf/rte_mbuf.c   | 28 +++-
> >  lib/librte_mempool/rte_mempool.h | 24 
> >  lib/librte_mempool/rte_mempool_ops.c | 32 
> >  4 files changed, 81 insertions(+), 5 deletions(-)
> > 
> 
> Hi Olivier,
>   Any opinion on this patchset?

Sorry for the lack of feedback, for some reason I missed the initial
mails.

I don't quite like the idea of having hardcoded config:
 CONFIG_RTE_MBUF_BACKUP_MEMPOOL_OPS_1=""
 CONFIG_RTE_MBUF_BACKUP_MEMPOOL_OPS_2=""

Also, I'm a bit reserved about rte_mempool_ops_check_support(): it can
return "supported" but the creation of the pool can still fail later due
to the creation parameters (element count/size, mempool flags, ...).

The ordering preference of these mempool ops may also depend on the
configuration (enabled ports for instance) or user choices.

Let me propose you another approach to (I hope) solve your issue, based
on Santosh's patches [1].

We can introduce a new helper that could be used by applications to
dynamically select the best mempool ops. It could be something similar
to the pseudo-code I've written in [3].

  // return an array pool ops name, ordered by preference
  pool_ops = get_ordered_pool_ops_list()

Then try to create the first pool, if it fails, try the next, until
it is succesful.

That said, it is difficult to take the good decision inside
rte_pktmbuf_pool_create() because we don't have all the information.
Sergio and Jerin suggested to introduce a new argument ops_name to
rte_pktmbuf_pool_create() [2]. From what I remember, this is also
something you were in favor of, right?

In that case, we can move the difficult task of choosing the
right mempool inside the application.

Comments?

Olivier

[1] http://dpdk.org/dev/patchwork/patch/28596/
[2] http://dpdk.org/ml/archives/dev/2017-September/074489.html
[3] http://dpdk.org/dev/patchwork/patch/27610/

Re: [dpdk-dev] [PATCH] test/logs: fix dynamic log levels testing

2017-09-25 Thread Olivier MATZ

On Thu, Sep 21, 2017 at 08:44:09PM +0200, Radoslaw Biernacki wrote:
> This patch fixes the dynamic log levels testing in logs_autotest.
> Introduction of rte_log_set_level() in patch c1b5fa94a46f was done
> with parameter RTE_LOG_EMERG which caused all RTE_LOG() calls an
> early return due to all given levels were far below EMERG.
> If first two logs supposed to show up on console, the initial log
> level must be low (DEBUG). It is than changed above ERR when we test
> if TESTAPP2 log type can be filtered by log type log level.
> 
> Fixes: c1b5fa94a46f ("eal: support dynamic log types")
> 
> Signed-off-by: Radoslaw Biernacki 

Acked-by: Olivier Matz

Re: [dpdk-dev] [PATCH v3] sched: make RED scaling configurable

2017-09-25 Thread Dumitrescu, Cristian

...
 
> diff --git a/config/common_base b/config/common_base
> index 5e97a08..2626557 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -666,6 +666,13 @@ CONFIG_RTE_SCHED_COLLECT_STATS=n
>  CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
>  CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
>  CONFIG_RTE_SCHED_VECTOR=n
> +#
> +# RTE_RED_SCALING - number of fractional bits used for RED's moving
> average
> +# For every bit that RTE_RED_SCALING is reduced, the max-queue size can
> doubled
> +# RTE_RED_MAX_TH_MAX = (max-queue size - 1), max-queue size must be
> power of two
> +#
> +CONFIG_RTE_RED_SCALING=10
> +CONFIG_RTE_RED_MAX_TH_MAX=1023
> 

Alan, Tomasz,

Thank you for your work!

We generally want to avoid build-time configuration in favour of run-time 
configuration. Can you please rework this as run-time configuration?

I suggest we add a new API function to configure these parameters globally for 
all the RED objects (as opposed to per RED object), with the default values as 
the current values when this function is not called.

So NACK for now.

Regards,
Cristian

Re: [dpdk-dev] [PATCH] rte_sched: don't count RED-drops as tail-drops

2017-09-25 Thread Dumitrescu, Cristian

> 
> Packets that are RED-dropped are not Tail-dropped, so the n_pkts_dropped
> counter should not be incremented when the n_pkts_red_dropped counter
> is.
> 

Hi Alan,

Not really:
* The n_pkts_dropped counter is intended to be incremented every time a packet 
is dropped, regardless of the drop cause (either tail drop or RED), so it is 
cumulative of all drop causes.
* The n_pkts_red_dropped counter is only incremented when the drop cause is 
RED, so it represents a slice of n_pkts_dropped. Therefore, the number of 
packets dropped due to tail drop is (n_pkts_dropped - n_pkts_red_dropped).

So NAK.

Thanks,
Cristian

Re: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight mode TCP/IPv4 GRO

2017-09-25 Thread Ferruh Yigit

On 9/3/2017 7:30 AM, Jiayu Hu wrote:
> The GRO library provides two modes to reassemble packets. Currently, the
> csum forwarding engine has supported to use the lightweight mode to
> reassemble TCP/IPv4 packets. This patch introduces the heavyweight mode
> for TCP/IPv4 GRO in the csum forwarding engine.
> 
> With the command "set port  gro on|off", users can enable
> TCP/IPv4 GRO for a given port. With the command "set gro flush ",
> users can determine when the GROed TCP/IPv4 packets are flushed from
> reassembly tables. With the command "show port  gro", users can
> display GRO configuration.
> 
> Signed-off-by: Jiayu Hu 

Reviewed-by: Ferruh Yigit

Re: [dpdk-dev] [PATCH v4] devtools: rework abi checker script

2017-09-25 Thread Neil Horman

On Mon, Sep 25, 2017 at 11:11:20AM +0200, Olivier MATZ wrote:
> On Thu, Sep 21, 2017 at 11:40:35AM -0400, Neil Horman wrote:
> > On Wed, Sep 20, 2017 at 11:12:53AM +0200, Olivier Matz wrote:
> > > The initial version of the script had some limitations:
> > > - cannot work on a non-clean workspace
> > > - environment variables are not documented
> > > - no compilation log in case of failure
> > > - return success even it abi is incompatible
> > >
> > > This patch addresses these issues and rework the code.
> > >
> > > Signed-off-by: Olivier Matz 
> > > ---
> > >
> > > v3->v4:
> > > - clarify logs on incompatible abi
> > > - log when an error returned an error
> > > - [really] fix the report path
> > > - log the output of make config in the proper file
> > >
> > > v2->v3:
> > > - fix when not launched from dpdk root dir
> > > - use "-Og -Wno-error" instead of "-O0"
> > > - fix typo in commit log
> > >
> > > v1->v2:
> > > - use /usr/bin/env to find bash (which is required)
> > > - fix displayed path to html reports
> > > - reword help for -f option
> > >
> > >
> > >  devtools/validate-abi.sh | 397 
> > > ---
> > >  1 file changed, 205 insertions(+), 192 deletions(-)
> > >
> > This looks better, thank you for the iterations.  One last note: The abi 
> > dumper
> > utility errors out with error code of 12 if a given object has no exported
> > symbols, and I see a few of those.  You may want to consider catching that
> > error, logging an appropriate message and skipping the error emit.  That 
> > can be
> > handled later though, as its a corner case.  I'd go with this patch, and 
> > then
> > do a incremental improvement later
> 
> Unfortunately the error code 12 does not exist on my version of abi-dumper
> (debian stable, v0.99.16). I'm currently doing this as a workaround:
> 
> cmd $abidump ${i} -o $dst/${1}/${i}.dump -lver ${1} || true
> # hack to ignore empty SymbolsInfo section (no public ABI)
> if grep -q "'SymbolInfo' => {}," $dst/${1}/${i}.dump 2> /dev/null; then
> log "INFO" "${i} has no public ABI, remove dump file"
> cmd rm -f $dst/${1}/${i}.dump
> fi
> 
> I tested with the latest abi-dumper version, and I indeed see
> these errors in the logs. It seems we don't go inside the 'if'
> above with a recent abi-dumper, and the .dump file is not generated.
> 
> I can add a check to display the same additional log
> "INFO" "${i} has no public ABI, remove dump file" if abi-dumper
> returns 12. Something like this:
> 
> ret=0
> cmd $abidump ${i} -o $dst/${1}/${i}.dump -lver ${1} || ret=$?
> # hack to ignore empty SymbolsInfo section (no public ABI)
> if [ ${ret} = 12 ]; then
> log "INFO" "${i} has no public ABI"
> fi
> if grep -q "'SymbolInfo' => {}," $dst/${1}/${i}.dump 2> /dev/null; then
> log "INFO" "${i} has no public ABI, remove dump file"
> cmd rm -f $dst/${1}/${i}.dump
> fi
> 
Agreed, that makes sense.
Thanks.
Neil

> 
> Olivier
>

Re: [dpdk-dev] [PATCH] doc: fix a typo in testpmd guide

2017-09-25 Thread Mcnamara, John



> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Rami Rosen
> Sent: Saturday, September 23, 2017 5:55 PM
> To: dev@dpdk.org
> Cc: Xing, Beilei ; Rosen, Rami
> 
> Subject: [dpdk-dev] [PATCH] doc: fix a typo in testpmd guide
> 
> This patch fixes a trivial typo in testpmd guide.
> 
> Signed-off-by: Rami Rosen 

Acked-by: John McNamara

Re: [dpdk-dev] [PATCH] doc: add Linux flower support check in TAP guide

2017-09-25 Thread Mcnamara, John



> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Wednesday, September 20, 2017 2:03 PM
> To: Pascal Mazon 
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] doc: add Linux flower support check in TAP
> guide
> 
> The flow API is supported in TAP PMD if flower is supported in Linux.
> Some commands are combined to suggest a convenient check of its support by
> the running kernel.
> 
> Signed-off-by: Thomas Monjalon 

Acked-by: John McNamara

Re: [dpdk-dev] [PATCH v6 3/8] mempool: add flags arg in xmem size and usage

2017-09-25 Thread Olivier MATZ

On Thu, Sep 07, 2017 at 09:00:37PM +0530, Santosh Shukla wrote:
> xmem_size and xmem_usage need to know the status of mempool flags,
> so add 'flags' arg in _xmem_size/usage() api.
> 
> Following patch will make use of that.
> 
> Signed-off-by: Santosh Shukla 
> Signed-off-by: Jerin Jacob 

Acked-by: Olivier Matz

Re: [dpdk-dev] [PATCH v6 5/8] mempool: get the mempool capability

2017-09-25 Thread Olivier MATZ

On Thu, Sep 07, 2017 at 09:00:39PM +0530, Santosh Shukla wrote:
> Allow the mempool driver to advertise his pool capabilities.
> For that pupose, an api(rte_mempool_ops_get_capabilities)
> and ->get_capabilities() handler has been introduced.
> - Upon ->get_capabilities() call, mempool driver will advertise
> his capabilities to mempool flags param.
> 
> Signed-off-by: Santosh Shukla 
> Signed-off-by: Jerin Jacob 

Acked-by: Olivier Matz

Re: [dpdk-dev] [PATCH v6 7/8] mempool: introduce block size align flag

2017-09-25 Thread Olivier MATZ

On Thu, Sep 07, 2017 at 09:00:41PM +0530, Santosh Shukla wrote:
> Some mempool hw like octeontx/fpa block, demands block size
> (/total_elem_sz) aligned object start address.
> 
> Introducing an MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS flag.
> If this flag is set:
> - Align object start address(vaddr) to a multiple of total_elt_sz.
> - Allocate one additional object. Additional object is needed to make
>   sure that requested 'n' object gets correctly populated.
> 
> Example:
> - Let's say that we get 'x' size of memory chunk from memzone.
> - And application has requested 'n' object from mempool.
> - Ideally, we start using objects at start address 0 to...(x-block_sz)
>   for n obj.
> - Not necessarily first object address i.e. 0 is aligned to block_sz.
> - So we derive 'offset' value for block_sz alignment purpose i.e..'off'.
> - That 'off' makes sure that start address of object is blk_sz aligned.
> - Calculating 'off' may end up sacrificing first block_sz area of
>   memzone area x. So total number of the object which can fit in the
>   pool area is n-1, Which is incorrect behavior.
> 
> Therefore we request one additional object (/block_sz area) from memzone
> when MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS flag is set.
> 
> Signed-off-by: Santosh Shukla 
> Signed-off-by: Jerin Jacob 
>
> [...]
>
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -239,10 +239,15 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t 
> flags,
>   */
>  size_t
>  rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t 
> pg_shift,
> -   __rte_unused unsigned int flags)
> +   unsigned int flags)
>  {
>   size_t obj_per_page, pg_num, pg_sz;
>  
> + if (flags & (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS |
> + MEMPOOL_F_CAPA_PHYS_CONTIG))
> + /* alignment need one additional object */
> + elt_num += 1;
> +

In previous version, we agreed to test both _BLK_ALIGNED_OBJECTS
and _PHYS_CONTIG in _xmem_size()/_usage(). Here, the test will
also be true if only MEMPOOL_F_CAPA_PHYS_CONTIG is set.

If we want to test both, the test should be:

mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
if ((flags & mask) == mask)

> @@ -265,13 +270,18 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t 
> total_elt_sz, uint32_t pg_shift,
>  ssize_t
>  rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
>   size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num,
> - uint32_t pg_shift, __rte_unused unsigned int flags)
> + uint32_t pg_shift, unsigned int flags)
>  {
>   uint32_t elt_cnt = 0;
>   phys_addr_t start, end;
>   uint32_t paddr_idx;
>   size_t pg_sz = (size_t)1 << pg_shift;
>  
> + if (flags & (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS |
> + MEMPOOL_F_CAPA_PHYS_CONTIG))
> + /* alignment need one additional object */
> + elt_num += 1;
> +

Same here

Re: [dpdk-dev] [PATCH v5 1/5] net/bonding: remove bonding APIs using ABI versioning

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 4:22 AM, Zhiyong Yang wrote:
> There are two bonding APIs using ABI versioning, and both have
> port_id as parameter. Since we are already breaking ABI, no need
> to keep older versions of APIs.
> 
> Signed-off-by: Zhiyong Yang 

Reviewed-by: Ferruh Yigit

Re: [dpdk-dev] [PATCH v5 2/5] ethdev: increase port_id range

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 4:22 AM, Zhiyong Yang wrote:
> Extend port_id definition from uint8_t to uint16_t in lib and drivers
> data structures, specifically rte_eth_dev_data. Modify the APIs,
> drivers and app using port_id at the same time.
> 
> Fix some checkpatch issues from the original code and remove some
> unnecessary cast operations.
> 
> release_17_11 and deprecation have been updated in this patch.
> 
> Signed-off-by: Zhiyong Yang 
> Acked-by: Adrien Mazarguil 

Reviewed-by: Ferruh Yigit 


This is very hard to be sure everything covered. Also this patch will
cause lots of merge conflict against next-net.

I would suggest getting this patch as soon as possible, to give more
time to possible fixes.

Thanks,
ferruh

Re: [dpdk-dev] [PATCH v6 8/8] mempool: notify memory area to pool

2017-09-25 Thread Olivier MATZ

On Thu, Sep 07, 2017 at 09:00:42PM +0530, Santosh Shukla wrote:
> HW pool manager e.g. Octeontx SoC demands s/w to program start and end
> address of pool. Currently, there is no such api in external mempool.
> Introducing rte_mempool_ops_register_memory_area api which will let HW(pool
> manager) to know when common layer selects hugepage:
> For each hugepage - Notify its start/end address to HW pool manager.
> 
> Signed-off-by: Santosh Shukla 
> Signed-off-by: Jerin Jacob 
>
> [...]
>
> +/**
> + * @internal wrapper for mempool_ops register_memory_area callback.
> + * API to notify the mempool handler if a new memory area is added to pool.
> + *

if -> when

> + * Mempool handler usually get notified once for the case of mempool get full
> + * range of memory area. However, if several memory areas exist then mempool
> + * handler gets notified each time.

Not sure I understand this last paragraph.

> + *
> + * @param mp
> + *   Pointer to the memory pool.
> + * @param vaddr
> + *   Pointer to the buffer virtual address
> + * @param paddr
> + *   Pointer to the buffer physical address
> + * @param len
> + *   Pool size

Minor: missing dot at the end

> + * @return
> + *  - 0: Success;
> + *  - ENOTSUP: doesn't support register_memory_area ops (valid error case).

Missing minus before ENOTSUP.
The dot should be a semicolon instead.

Re: [dpdk-dev] [PATCH v3 5/6] app/testpmd: add new commands to manipulate with pctype mapping

2017-09-25 Thread Xing, Beilei



> -Original Message-
> From: Rybalchenko, Kirill
> Sent: Wednesday, September 20, 2017 10:33 PM
> To: dev@dpdk.org
> Cc: Rybalchenko, Kirill ; Chilikin, Andrey
> ; Xing, Beilei ; Wu,
> Jingjing 
> Subject: [PATCH v3 5/6] app/testpmd: add new commands to manipulate
> with pctype mapping
> 
> Add new commands to manipulate with dynamic flow type to pctype
> mapping table in i40e PMD.
> Commands allow to print table, modify it and reset to default value.
> 
> v3:
> changed command syntax from 'pctype mapping...' to 'port config pctype
> mapping...' and 'show port pctype mapping'
> 
> Signed-off-by: Kirill Rybalchenko 
> ---
>  app/test-pmd/cmdline.c  | 335
> +++-
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  46 
>  drivers/net/i40e/rte_pmd_i40e_version.map   |   2 +-
>  3 files changed, 372 insertions(+), 11 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> 4f2d731..4f68267 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c



> +
> +/* show port pctype mapping get */
> +
> +/* Common result structure for show port pctype mapping get */ struct

show port pctype mapping get  ->   show port pctype mapping ?

> +cmd_pctype_mapping_get_result {
> + cmdline_fixed_string_t show;
> + cmdline_fixed_string_t port;
> + uint8_t port_id;
> + cmdline_fixed_string_t pctype;
> + cmdline_fixed_string_t mapping;
> +};
> +
> +/* Common CLI fields for pctype mapping get */

"get" should be removed.

> +cmdline_parse_token_string_t cmd_pctype_mapping_get_show =
> + TOKEN_STRING_INITIALIZER
> + (struct cmd_pctype_mapping_get_result,
> +  show, "show");
> +cmdline_parse_token_string_t cmd_pctype_mapping_get_port =
> + TOKEN_STRING_INITIALIZER
> + (struct cmd_pctype_mapping_get_result,
> +  port, "port");
> +cmdline_parse_token_num_t cmd_pctype_mapping_get_port_id =
> + TOKEN_NUM_INITIALIZER
> + (struct cmd_pctype_mapping_get_result,
> +  port_id, UINT8);
> +cmdline_parse_token_string_t cmd_pctype_mapping_get_pctype =
> + TOKEN_STRING_INITIALIZER
> + (struct cmd_pctype_mapping_get_result,
> +  pctype, "pctype");
> +cmdline_parse_token_string_t cmd_pctype_mapping_get_mapping =
> + TOKEN_STRING_INITIALIZER
> + (struct cmd_pctype_mapping_get_result,
> +  mapping, "mapping");
> +
> +static void
> +cmd_pctype_mapping_get_parsed(
> + void *parsed_result,
> + __attribute__((unused)) struct cmdline *cl,
> + __attribute__((unused)) void *data)
> +{
> + struct cmd_pctype_mapping_get_result *res = parsed_result;
> + int ret = -ENOTSUP;
> +#ifdef RTE_LIBRTE_I40E_PMD
> + struct rte_pmd_i40e_flow_type_mapping
> mapping[RTE_PMD_I40E_FLOW_TYPE_MAX];
> + int i, j;
> +#endif
> +
> + if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> + return;
> +
> +#ifdef RTE_LIBRTE_I40E_PMD
> + ret = rte_pmd_i40e_flow_type_mapping_get(res->port_id,
> mapping);
> +#endif
> +
> + switch (ret) {
> + case 0:
> + break;
> + case -ENODEV:
> + printf("invalid port_id %d\n", res->port_id);
> + return;
> + case -ENOTSUP:
> + printf("function not implemented\n");
> + return;
> + default:
> + printf("programming error: (%s)\n", strerror(-ret));
> + return;
> + }
> +
> +#ifdef RTE_LIBRTE_I40E_PMD
> + for (i = 0; i < RTE_PMD_I40E_FLOW_TYPE_MAX; i++) {
> + if (mapping[i].pctype != 0ULL) {
> + int first_pctype = 1;
> +

first_pctype should be moved to the beginning of the function.

> + printf("pctype: ");
> + for (j = 0; j < RTE_PMD_I40E_PCTYPE_MAX; j++) {
> + if (mapping[i].pctype & (1ULL << j)) {
> + printf(first_pctype ? "%02d" : ",%02d",
> j);
> + first_pctype = 0;
> + }
> + }
> + printf("  ->  flowtype: %02d\n",
> mapping[i].flow_type);
> + }
> + }
> +#endif
> +}
> +




> +static void
> +cmd_pctype_mapping_update_parsed(
> + void *parsed_result,
> + __attribute__((unused)) struct cmdline *cl,
> + __attribute__((unused)) void *data)
> +{
> + struct cmd_pctype_mapping_update_result *res = parsed_result;
> + int ret = -ENOTSUP;
> +#ifdef RTE_LIBRTE_I40E_PMD
> + struct rte_pmd_i40e_flow_type_mapping mapping; #endif
> + unsigned int nb_item, i;

Should "i" be defined when RTE_LIBRTE_I40E_PMD is defined? otherwise 
it will be defined but not used when RTE_LIBRTE_I40E_PMD is not defined.

> + unsigned int pctype_list[RTE_PMD_I40E_PCTYPE_MAX];
> +
> + if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> + return;
> +
> + nb_item = parse_item_lis

Re: [dpdk-dev] [PATCH v5 2/5] ethdev: increase port_id range

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 4:22 AM, Zhiyong Yang wrote:
> Extend port_id definition from uint8_t to uint16_t in lib and drivers
> data structures, specifically rte_eth_dev_data. Modify the APIs,
> drivers and app using port_id at the same time.
> 
> Fix some checkpatch issues from the original code and remove some
> unnecessary cast operations.
> 
> release_17_11 and deprecation have been updated in this patch.
> 
> Signed-off-by: Zhiyong Yang 
> Acked-by: Adrien Mazarguil 

<...>

> diff --git a/drivers/net/af_packet/Makefile b/drivers/net/af_packet/Makefile
> index 70d517c16..4d62b7dbd 100644
> --- a/drivers/net/af_packet/Makefile
> +++ b/drivers/net/af_packet/Makefile
> @@ -40,7 +40,7 @@ LIB = librte_pmd_af_packet.a
>  
>  EXPORT_MAP := rte_pmd_af_packet_version.map
>  
> -LIBABIVER := 1
> +LIBABIVER := 2

I have just recognized this one. This shouldn't be updated. Only
LIABIVER of the libraries that their API modified should be updated.

The release notes "Shared Library Versions" updates and Makefile
LIBABIVER updates should be compatible. I mean either both needs to be
updated or both not updated.

There is no way to split this patch more without breaking build right?
It would be possible to do it if port_t used, but that has been decided
not to...

<...>

Re: [dpdk-dev] [PATCH v3 7/7] doc: add membership documentation

2017-09-25 Thread De Lara Guarch, Pablo



> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Yipeng Wang
> Sent: Wednesday, September 6, 2017 1:00 AM
> To: dev@dpdk.org
> Cc: Tai, Charlie ; Gobriel, Sameh
> ; Wang, Ren ; Wang,
> Yipeng1 ; Mcnamara, John
> 
> Subject: [dpdk-dev] [PATCH v3 7/7] doc: add membership documentation
> 
> This patch adds the documentation for membership library.
> 
> Signed-off-by: Yipeng Wang 
> Reviewed-by: John McNamara 

...

> diff --git a/doc/guides/prog_guide/member_lib.rst
> b/doc/guides/prog_guide/member_lib.rst
> new file mode 100644
> index 000..2b32535
> --- /dev/null
> +++ b/doc/guides/prog_guide/member_lib.rst


> +Membership Library
> +==
> +
> +Introduction
> +
> +
> +The DPDK Membership Library provides an API for DPDK applications to
> insert a
> +new member, delete an existing member, or query the existence of a
> member in a
> +given set, or a group of sets. For the case of a group of sets the library

"group of sets, the library".

...

> +We are including a configurable Membership Library in DPDK to cover set

This looks like more like a cover letter than part of programmer's guide.
Reword this loke "This Membership Library covers set membership...",
and avoid "include... in DPDK", as it is implicit.

> +membership functionality for both a single set and multi-set scenarios.
> The
> +library is optimized to support the customer network applications which
> require
> +membership checking functionality. In this guide, we will cover two set-
> summary
> +schemes including a vector of Bloom Filters and Hash-Table based
> +set-summary schemes with and without false negative probability,
> followed by
> +a brief discussion of the Membership Library API.
> +
> +Vector of Bloom Filters
> +---
> +
> +Bloom Filter (BF) [Member-bloom] is a well-known space-efficient
> +probabilistic data structure that answers set membership queries (test
> whether
> +an element is a member of a set) with some probability of false positives
> and
> +zero false negatives; a query for an element returns either it is "possibly 
> in
> +a set" (with very high probability) or "definitely not in a set".
> +
> +The BF is a method for representing a set of ``n`` elements (for example
> flow keys
> +in network applications domain) to support membership queries. The idea
> of BF is
> +to allocate a bit-vector ``v`` with ``m`` bits, which are initially all set 
> to 0.
> Then
> +it chooses ``k`` independent hash functions ``h1``, ``h2``, ... ``hk`` with
> hash values range from
> +``1`` to ``m`` to perform hashing calculations on each element. 

From "0" to "m-1"?

...

> +
> +  Vector Bloom Filter (vBF) Overview
> +
> +To support membership test for both multiple sets and a single set,
> +the library implements a Vector Bloom Filter (vBF) scheme.
> +vBF basically composes multiple bloom filters into a vector of bloom filers.
> +The membership test is conducted on all of the
> +bloom filters concurrently to determine which set(s) it belongs to or none
> of
> +them. The basic idea of vBF is shown in the above figure where an element
> is
> +used to address multiple bloom filters concurrently and the bloom filter
> +index(es) with a hit is returned.
> +
> +.. _figure_membership5:
> +.. figure:: img/member_i5.*
> +
> +  vBF for Flow Scheduling to Worker Thread
> +
> +As previously mentioned, there are many usages of such structures. vBF is
> used
> +for applications that needs to check membership against multiple sets
> +simultaneously. 

"needs" -> "need".

...

> +The major usage of HTSS with false negative is to use it as a cache for
> +distributing elements to different target sets. By allowing HTSS to evict old
> +elements, the set-summary can keep track of the most recent elements
> +(i.e. active) as a cache typically does. Old inactive elements (infrequently
> +used elements) will automatically and eventually get evicted from the
> +set-summary. It worth noting that the set-summary still has false positive

"It is worth".

...

> +We also pass two seeds: ``prim_hash_seed`` and
> +``sec_hash_seed`` for the primary and secondary hash functions to
> calculate two independent hash values.
> +``socket_id`` parameter is the NUMA socket ID for the memory used to
> create the
> +set-summary. For HTSS, another parameter ``iscache`` is used to indicate

In order to keep consistency, I would use "is_cache".

> +if this set-summary is a cache (i.e. with false negative probability) or not.
> +For vBF, extra parameters are needed. For example, ``num_set`` is the
> number of
> +sets needed to initialize the vector bloom filters. This number is equal to
> the
> +number of bloom filters will be created.
> +``false_pos_rate`` is the false positive rate. num_keys and false_pos_rate
> will be used to determine
> +the number of hash functions and the bloom filter size.
> +
> +
> +Set-summary Element Insertion
> +~
> +
> +The ``rte_member_add(

[dpdk-dev] [PATCH v2 1/8] net/i40e: add API to convert VF MAC to VSI index

2017-09-25 Thread David Hunt

From: "Sexton, Rory" 

Need a way to convert a vf id to a pf id on the host so as to query the pf
for relevant statistics which are used for the frequency changes in the
vm_power_manager app. Used when profiles are passed down from the guest
to the host, allowing the host to map the vfs to pfs.

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
 drivers/net/i40e/rte_pmd_i40e.c | 35 +++
 drivers/net/i40e/rte_pmd_i40e.h | 13 +
 2 files changed, 48 insertions(+)

diff --git a/drivers/net/i40e/rte_pmd_i40e.c b/drivers/net/i40e/rte_pmd_i40e.c
index f12b7f4..85b540f 100644
--- a/drivers/net/i40e/rte_pmd_i40e.c
+++ b/drivers/net/i40e/rte_pmd_i40e.c
@@ -2115,3 +2115,38 @@ int rte_pmd_i40e_ptype_mapping_replace(uint8_t port,
 
return 0;
 }
+
+uint64_t
+rte_pmd_i40e_query_vfid_by_mac(uint8_t port, uint64_t vf_mac) {
+   struct rte_eth_dev *dev;
+   struct ether_addr *vf_mac_addr = (struct ether_addr *)&vf_mac;
+   struct ether_addr *mac;
+   struct i40e_pf *pf;
+   int i, x;
+   struct i40e_pf_vf *vf;
+   uint16_t vf_num;
+
+   RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+   dev = &rte_eth_devices[port];
+
+   if (!is_i40e_supported(dev))
+   return -ENOTSUP;
+
+   pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+   vf_num = pf->vf_num;
+
+   for (x = 0; x < vf_num; x++) {
+   int mac_addr_matches = 1;
+   vf = &pf->vfs[x];
+   mac = &vf->mac_addr;
+
+   for (i = 0; i < ETHER_ADDR_LEN; i++) {
+   if (mac->addr_bytes[i] != vf_mac_addr->addr_bytes[i])
+   mac_addr_matches = 0;
+   }
+   if (mac_addr_matches)
+   return x;
+   }
+
+   return -EINVAL;
+}
diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
index 356fa89..6984105 100644
--- a/drivers/net/i40e/rte_pmd_i40e.h
+++ b/drivers/net/i40e/rte_pmd_i40e.h
@@ -637,4 +637,17 @@ int rte_pmd_i40e_ptype_mapping_replace(uint8_t port,
   uint8_t mask,
   uint32_t pkt_type);
 
+/**
+ * Translate a VF mac address into VF index in array of pf->vfs
+ *
+ * @param port
+ *pointer to port identifier of the device
+ * @param vf_mac
+ *the mac address of the vf to determine index of
+ * @return
+ *-(-22 EINVAL) the vf mac does not exist on this port
+ *-(!-22) the index of vfid in pf->vfs
+ */
+uint64_t rte_pmd_i40e_query_vfid_by_mac(uint8_t port, uint64_t vf_mac);
+
 #endif /* _PMD_I40E_H_ */
-- 
2.7.4

[dpdk-dev] [PATCH v2] Policy Based Power Control for Guest

2017-09-25 Thread David Hunt

Policy Based Power Control for Guest

This patchset adds the facility for a guest VM to send a policy down to the
host that will allow the host to scale up/down cpu frequencies
depending on the policy criteria independently of the DPDK app running in
the guest.  This differs from the previous vm_power implementation where
individual scale up/down requests were send from the guest to the host via
virtio-serial.

V2 patchet changes:
  * Removed API's in ethdev layer.
  * Now just a single new API in the i40e driver for mapping VF MAC to
VF index.
  * Moved new function from rte_rxtx.c to rte_pmd_i40e.c
  * Removed function for reading i40e register, moved to using the
standard stats API.
  * Renamed i40e function to rte_pmd_i40e_query_vfid_by_mac
  * Cleaned up policy generation code.

It's a modification of the vm_power_manager app that runs in the host, and
the guest_vm_power_app example app that runs in the guest. This allows the
guest to send down a policy to the host via virtio-serial, which then allows
the host to scale up/down based on the criteria in the policy, resulting in
quicker scale up/down than individual requests coming from the guest.
It also means that the DPDK application running in the guest does not need
to be modified in any way, it is unaware that it's cores are being scaled
up/down, reducing the effort in implementing a power-aware infrastructure.

The usage model is as follows:
1. Set up the VF's and assign to the guest in the usual way.
2. run vm_power_manager on the host, creating a channel to the guest.
3. Start the guest_vm_power_mgr app on the guest, which establishes
   a virtio-serial channel to the host.
4. Send down the profile for the guest using the "send_profile now" command.
   There is an example profile hard-coded into guest_vm_power_mgr.
5. Stop the guest_vm_power_mgr and run your normal power-unaware application.
6. Send traffic into the VFs at varying traffic rates.
   Observe the frequency change on the host (turbostat -i 1)

The sequence of code changes are as follows:

A new function has been aded to the i40e driver to allow mapping of
a VF MAC to VF index.

Next we make an addition to librte_power that adds an extra command to allow
the passing of a policy structure from the guest to the host. This struct
contains information like busy/quiet hour, packet throughput thresholds, etc.

The next addition adds functionality to convert the virtual CPU (vcpU0 IDs to
physical CPU (pcpu) IDs so that the host can scale up/down the cores used
in the guest.

The remaining patches are functionality to process the policy, and take action
when the relevant trigger occurs to cause a frequency change.

[1/8] net/i40e: add API to convert VF MAC to VSI index
[2/8] lib/librte_power: add extra msg type for policies
[3/8] examples/vm_power_mgr: add vcpu to pcpu mapping
[4/8] examples/vm_power_mgr: add scale to medium freq fn
[5/8] examples/vm_power_mgr: add policy to channels
[6/8] examples/vm_power_mgr: add port initialisation
[7/8] examples/guest_cli: add send policy to host
[8/8] examples/vm_power_mgr: set MAC address of VF

[dpdk-dev] [PATCH v2 2/8] lib/librte_power: add extra msg type for policies

2017-09-25 Thread David Hunt

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
 lib/librte_power/channel_commands.h | 52 +
 1 file changed, 52 insertions(+)

diff --git a/lib/librte_power/channel_commands.h 
b/lib/librte_power/channel_commands.h
index 484085b..1599706 100644
--- a/lib/librte_power/channel_commands.h
+++ b/lib/librte_power/channel_commands.h
@@ -46,6 +46,7 @@ extern "C" {
 /* Valid Commands */
 #define CPU_POWER   1
 #define CPU_POWER_CONNECT   2
+#define PKT_POLICY  3
 
 /* CPU Power Command Scaling */
 #define CPU_POWER_SCALE_UP  1
@@ -54,11 +55,62 @@ extern "C" {
 #define CPU_POWER_SCALE_MIN 4
 #define CPU_POWER_ENABLE_TURBO  5
 #define CPU_POWER_DISABLE_TURBO 6
+#define HOURS 24
+
+#ifdef RTE_LIBRTE_I40E_PMD
+#define MAX_VFS 10
+#endif
+
+#define MAX_VCPU_PER_VM 8
+
+typedef enum {false, true} bool;
+
+struct t_boost_status {
+   bool tbEnabled;
+};
+
+struct timer_profile {
+   int busy_hours[HOURS];
+   int quiet_hours[HOURS];
+#ifdef RTE_LIBRTE_I40E_PMD
+   int hours_to_use_traffic_profile[HOURS];
+#endif
+};
+
+enum workload {HIGH, MEDIUM, LOW};
+enum policy_to_use {
+#ifdef RTE_LIBRTE_I40E_PMD
+   TRAFFIC,
+#endif
+   TIME,
+   WORKLOAD
+};
+
+#ifdef RTE_LIBRTE_I40E_PMD
+struct traffic {
+   uint32_t min_packet_thresh;
+   uint32_t avg_max_packet_thresh;
+   uint32_t max_max_packet_thresh;
+};
+#endif
 
 struct channel_packet {
uint64_t resource_id; /**< core_num, device */
uint32_t unit;/**< scale down/up/min/max */
uint32_t command; /**< Power, IO, etc */
+   char vm_name[32];
+
+#ifdef RTE_LIBRTE_I40E_PMD
+   uint64_t vfid[MAX_VFS];
+   int nb_mac_to_monitor;
+   struct traffic traffic_policy;
+#endif
+   uint8_t vcpu_to_control[MAX_VCPU_PER_VM];
+   uint8_t num_vcpu;
+   struct timer_profile timer_policy;
+   enum workload workload;
+   enum policy_to_use policy_to_use;
+   struct t_boost_status t_boost_status;
 };
 
 
-- 
2.7.4

[dpdk-dev] [PATCH v2 3/8] examples/vm_power_mgr: add vcpu to pcpu mapping

2017-09-25 Thread David Hunt

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
 examples/vm_power_manager/channel_manager.c | 61 +
 examples/vm_power_manager/channel_manager.h | 25 
 2 files changed, 86 insertions(+)

diff --git a/examples/vm_power_manager/channel_manager.c 
b/examples/vm_power_manager/channel_manager.c
index e068ae2..d5eda89 100644
--- a/examples/vm_power_manager/channel_manager.c
+++ b/examples/vm_power_manager/channel_manager.c
@@ -574,6 +574,67 @@ set_channel_status(const char *vm_name, unsigned 
*channel_list,
return num_channels_changed;
 }
 
+void
+get_all_vm(int *num_vm, int *num_cpu) {
+
+   virNodeInfo node_info;
+   virDomainPtr *domptr;
+   uint64_t mask;
+   int i, ii, numVcpus[MAX_VCPUS], cpu, n_vcpus;
+   unsigned int jj;
+   const char *vm_name;
+   unsigned int flags = VIR_CONNECT_LIST_DOMAINS_RUNNING |
+   VIR_CONNECT_LIST_DOMAINS_PERSISTENT;
+   unsigned int flag = VIR_DOMAIN_VCPU_CONFIG;
+
+
+   memset(global_cpumaps, 0, CHANNEL_CMDS_MAX_CPUS*global_maplen);
+   if (virNodeGetInfo(global_vir_conn_ptr, &node_info))
+   RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n");
+
+   /* Returns number of pcpus */
+   global_n_host_cpus = (unsigned int)node_info.cpus;
+
+   /* Returns number of active domains */
+   *num_vm = virConnectListAllDomains(global_vir_conn_ptr, &domptr, flags);
+   if (*num_vm <= 0)
+   RTE_LOG(ERR, CHANNEL_MANAGER, "No Active Domains Running\n");
+
+   for (i = 0; i < *num_vm; i++) {
+
+   /* Get Domain Names */
+   vm_name = virDomainGetName(domptr[i]);
+   lvm_info[i].vm_name = vm_name;
+
+   /* Get Number of Vcpus */
+   numVcpus[i] = virDomainGetVcpusFlags(domptr[i], flag);
+
+   /* Get Number of VCpus & VcpuPinInfo */
+   n_vcpus = virDomainGetVcpuPinInfo(domptr[i],
+   numVcpus[i], global_cpumaps,
+   global_maplen, flag);
+
+   if ((int)n_vcpus > 0) {
+   *num_cpu = n_vcpus;
+   lvm_info[i].num_cpus = n_vcpus;
+   }
+
+   /* Save pcpu in use by libvirt VMs */
+   for (ii = 0; ii < n_vcpus; ii++) {
+   mask = 0;
+   for (jj = 0; jj < global_n_host_cpus; jj++) {
+   if (VIR_CPU_USABLE(global_cpumaps,
+   global_maplen, ii, jj) > 0) {
+   mask |= 1ULL << jj;
+   }
+   }
+   ITERATIVE_BITMASK_CHECK_64(mask, cpu) {
+   lvm_info[i].pcpus[ii] = cpu;
+   }
+   }
+   }
+}
+
 int
 get_info_vm(const char *vm_name, struct vm_info *info)
 {
diff --git a/examples/vm_power_manager/channel_manager.h 
b/examples/vm_power_manager/channel_manager.h
index 47c3b9c..788c1e6 100644
--- a/examples/vm_power_manager/channel_manager.h
+++ b/examples/vm_power_manager/channel_manager.h
@@ -66,6 +66,17 @@ struct sockaddr_un _sockaddr_un;
 #define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path)
 #endif
 
+#define MAX_VMS 4
+#define MAX_VCPUS 20
+
+
+struct libvirt_vm_info {
+   const char *vm_name;
+   unsigned int pcpus[MAX_VCPUS];
+   uint8_t num_cpus;
+};
+
+struct libvirt_vm_info lvm_info[MAX_VMS];
 /* Communication Channel Status */
 enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0,
CHANNEL_MGR_CHANNEL_CONNECTED,
@@ -319,6 +330,20 @@ int set_channel_status(const char *vm_name, unsigned 
*channel_list,
  */
 int get_info_vm(const char *vm_name, struct vm_info *info);
 
+/**
+ * Populates a table with all domains running and their physical cpu.
+ * All information is gathered through libvirt api.
+ *
+ * @param noVms
+ *  modified to store number of active VMs
+ *
+ * @param noVcpus
+modified to store number of vcpus active
+ *
+ * @return
+ *   void
+ */
+void get_all_vm(int *noVms, int *noVcpus);
 #ifdef __cplusplus
 }
 #endif
-- 
2.7.4

[dpdk-dev] [PATCH v2 4/8] examples/vm_power_mgr: add scale to medium freq fn

2017-09-25 Thread David Hunt

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
 examples/vm_power_manager/power_manager.c | 15 +++
 examples/vm_power_manager/power_manager.h | 13 +
 2 files changed, 28 insertions(+)

diff --git a/examples/vm_power_manager/power_manager.c 
b/examples/vm_power_manager/power_manager.c
index 80705f9..c021c1d 100644
--- a/examples/vm_power_manager/power_manager.c
+++ b/examples/vm_power_manager/power_manager.c
@@ -286,3 +286,18 @@ power_manager_disable_turbo_core(unsigned int core_num)
POWER_SCALE_CORE(disable_turbo, core_num, ret);
return ret;
 }
+
+int
+power_manager_scale_core_med(unsigned int core_num)
+{
+   int ret = 0;
+
+   if (core_num >= POWER_MGR_MAX_CPUS)
+   return -1;
+   if (!(global_enabled_cpus & (1ULL << core_num)))
+   return -1;
+   rte_spinlock_lock(&global_core_freq_info[core_num].power_sl);
+   ret = rte_power_set_freq(core_num, 5);
+   rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl);
+   return ret;
+}
diff --git a/examples/vm_power_manager/power_manager.h 
b/examples/vm_power_manager/power_manager.h
index b74d09b..b52fb4c 100644
--- a/examples/vm_power_manager/power_manager.h
+++ b/examples/vm_power_manager/power_manager.h
@@ -231,6 +231,19 @@ int power_manager_disable_turbo_core(unsigned int 
core_num);
  */
 uint32_t power_manager_get_current_frequency(unsigned core_num);
 
+/**
+ * Scale to medium frequency for the core specified by core_num.
+ * It is thread-safe.
+ *
+ * @param core_num
+ *  The core number to change frequency
+ *
+ * @return
+ *  - 1 on success.
+ *  - 0 if frequency not changed.
+ *  - Negative on error.
+ */
+int power_manager_scale_core_med(unsigned int core_num);
 
 #ifdef __cplusplus
 }
-- 
2.7.4

[dpdk-dev] [PATCH v2 5/8] examples/vm_power_mgr: add policy to channels

2017-09-25 Thread David Hunt

From: "Sexton, Rory" 

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
 examples/vm_power_manager/channel_monitor.c | 331 +++-
 examples/vm_power_manager/channel_monitor.h |  20 ++
 2 files changed, 345 insertions(+), 6 deletions(-)

diff --git a/examples/vm_power_manager/channel_monitor.c 
b/examples/vm_power_manager/channel_monitor.c
index ac40dac..3c8f72d 100644
--- a/examples/vm_power_manager/channel_monitor.c
+++ b/examples/vm_power_manager/channel_monitor.c
@@ -41,13 +41,20 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
 #include 
+#include 
+#include 
 
+#ifdef RTE_LIBRTE_I40E_PMD
+#include 
+#endif
 
+#include 
 #include "channel_monitor.h"
 #include "channel_commands.h"
 #include "channel_manager.h"
@@ -57,10 +64,17 @@
 
 #define MAX_EVENTS 256
 
+#ifdef RTE_LIBRTE_I40E_PMD
+uint64_t vsi_pkt_count_prev[384];
+uint64_t rdtsc_prev[384];
+#endif
 
+double time_period_s = 1;
 static volatile unsigned run_loop = 1;
 static int global_event_fd;
+static unsigned int policy_is_set;
 static struct epoll_event *global_events_list;
+static struct policy policies[MAX_VMS];
 
 void channel_monitor_exit(void)
 {
@@ -68,6 +82,293 @@ void channel_monitor_exit(void)
rte_free(global_events_list);
 }
 
+static void
+core_share(int pNo, int z, int x, int t)
+{
+   if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
+   if (strcmp(policies[pNo].pkt.vm_name,
+   lvm_info[x].vm_name) != 0) {
+   policies[pNo].core_share[z].status = 1;
+   power_manager_scale_core_max(
+   policies[pNo].core_share[z].pcpu);
+   }
+   }
+}
+
+static void
+core_share_status(int pNo) {
+
+   int noVms, noVcpus, z, x, t;
+
+   get_all_vm(&noVms, &noVcpus);
+
+   /* Reset Core Share Status. */
+   for (z = 0; z < noVcpus; z++)
+   policies[pNo].core_share[z].status = 0;
+
+   /* Foreach vcpu in a policy. */
+   for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
+   /* Foreach VM on the platform. */
+   for (x = 0; x < noVms; x++) {
+   /* Foreach vcpu of VMs on platform. */
+   for (t = 0; t < lvm_info[x].num_cpus; t++)
+   core_share(pNo, z, x, t);
+   }
+   }
+}
+
+static void
+get_pcpu_to_control(struct policy *pol) {
+
+   /* Convert vcpu to pcpu. */
+   struct vm_info info;
+   int pcpu, count;
+   uint64_t mask_u64b;
+
+   RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
+   pol->pkt.vm_name);
+   get_info_vm(pol->pkt.vm_name, &info);
+
+   for (count = 0; count < pol->pkt.num_vcpu; count++) {
+   mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
+   for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
+   if ((mask_u64b >> pcpu) & 1)
+   pol->core_share[count].pcpu = pcpu;
+   }
+   }
+}
+
+#ifdef RTE_LIBRTE_I40E_PMD
+static int
+get_pfid(struct policy *pol) {
+
+   int i, x, ret = 0, nb_ports;
+
+   nb_ports = rte_eth_dev_count();
+   for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
+
+   for (x = 0; x < nb_ports; x++) {
+   ret = rte_pmd_i40e_query_vfid_by_mac(x,
+   pol->pkt.vfid[i]);
+   if (ret != -EINVAL) {
+   pol->port[i] = x;
+   break;
+   }
+   }
+   if (ret == -EINVAL) {
+   RTE_LOG(INFO, CHANNEL_MONITOR,
+   "Error with Policy. MAC not found on "
+   "attached ports ");
+   pol->enabled = 0;
+   return ret;
+   }
+   pol->pfid[i] = ret;
+   }
+   return 1;
+}
+#endif
+
+static int
+update_policy(struct channel_packet *pkt) {
+
+   unsigned int updated = 0;
+
+   for (int i = 0; i < MAX_VMS; i++) {
+   if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
+   policies[i].pkt = *pkt;
+   get_pcpu_to_control(&policies[i]);
+#ifdef RTE_LIBRTE_I40E_PMD
+   if (get_pfid(&policies[i]) == -1) {
+   updated = 1;
+   break;
+   }
+#endif
+   core_share_status(i);
+   policies[i].enabled = 1;
+   updated = 1;
+   }
+   }
+   if (!updated) {
+   for (int i = 0; i < MAX_VMS; i++) {
+   if (policies[i].enabled == 0) {
+   policies[i].pkt =

[dpdk-dev] [PATCH v2 6/8] examples/vm_power_mgr: add port initialisation

2017-09-25 Thread David Hunt

We need to initialise the port's we're monitoring to be able to see
the throughput.

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: David Hunt 
---
 examples/vm_power_manager/main.c | 220 +++
 1 file changed, 220 insertions(+)

diff --git a/examples/vm_power_manager/main.c b/examples/vm_power_manager/main.c
index c33fcc9..698abca 100644
--- a/examples/vm_power_manager/main.c
+++ b/examples/vm_power_manager/main.c
@@ -49,6 +49,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 
 #include "channel_manager.h"
@@ -56,6 +59,192 @@
 #include "power_manager.h"
 #include "vm_power_cli.h"
 
+#define RX_RING_SIZE 512
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static uint32_t enabled_port_mask;
+static volatile bool force_quit;
+
+//
+static const struct rte_eth_conf port_conf_default = {
+   .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN }
+};
+
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+   struct rte_eth_conf port_conf = port_conf_default;
+   const uint16_t rx_rings = 1, tx_rings = 1;
+   int retval;
+   uint16_t q;
+
+   if (port >= rte_eth_dev_count())
+   return -1;
+
+   /* Configure the Ethernet device. */
+   retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+   if (retval != 0)
+   return retval;
+
+   /* Allocate and set up 1 RX queue per Ethernet port. */
+   for (q = 0; q < rx_rings; q++) {
+   retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+   rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+   if (retval < 0)
+   return retval;
+   }
+
+   /* Allocate and set up 1 TX queue per Ethernet port. */
+   for (q = 0; q < tx_rings; q++) {
+   retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+   rte_eth_dev_socket_id(port), NULL);
+   if (retval < 0)
+   return retval;
+   }
+
+   /* Start the Ethernet port. */
+   retval = rte_eth_dev_start(port);
+   if (retval < 0)
+   return retval;
+
+   /* Display the port MAC address. */
+   struct ether_addr addr;
+   rte_eth_macaddr_get(port, &addr);
+   printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+  " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+   (unsigned int)port,
+   addr.addr_bytes[0], addr.addr_bytes[1],
+   addr.addr_bytes[2], addr.addr_bytes[3],
+   addr.addr_bytes[4], addr.addr_bytes[5]);
+
+   /* Enable RX in promiscuous mode for the Ethernet device. */
+   rte_eth_promiscuous_enable(port);
+
+
+   return 0;
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+   char *end = NULL;
+   unsigned long pm;
+
+   /* parse hexadecimal string */
+   pm = strtoul(portmask, &end, 16);
+   if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+   return -1;
+
+   if (pm == 0)
+   return -1;
+
+   return pm;
+}
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+   int opt, ret;
+   char **argvopt;
+   int option_index;
+   char *prgname = argv[0];
+   static struct option lgopts[] = {
+   { "mac-updating", no_argument, 0, 1},
+   { "no-mac-updating", no_argument, 0, 0},
+   {NULL, 0, 0, 0}
+   };
+   argvopt = argv;
+
+   while ((opt = getopt_long(argc, argvopt, "p:q:T:",
+ lgopts, &option_index)) != EOF) {
+
+   switch (opt) {
+   /* portmask */
+   case 'p':
+   enabled_port_mask = parse_portmask(optarg);
+   if (enabled_port_mask == 0) {
+   printf("invalid portmask\n");
+   return -1;
+   }
+   break;
+   /* long options */
+   case 0:
+   break;
+
+   default:
+   return -1;
+   }
+   }
+
+   if (optind >= 0)
+   argv[optind-1] = prgname;
+
+   ret = optind-1;
+   optind = 0; /* reset getopt lib */
+   return ret;
+}
+
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+   uint8_t portid, count, all_ports_up, print_flag = 0;
+   struct rte_eth_link link;
+
+   printf("\nChecking link status");
+   fflush(stdout);
+   for (count = 0; count <= MAX_CHECK_TIME; count++) {
+   if (force_q

[dpdk-dev] [PATCH v2 7/8] examples/guest_cli: add send policy to host

2017-09-25 Thread David Hunt

From: "Sexton, Rory" 

Here we're adding an example of setting up a policy, and allowing the
vm_cli_guest app to send it to the host using the cli command
"send_policy now"

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
 .../guest_cli/vm_power_cli_guest.c | 104 +
 1 file changed, 104 insertions(+)

diff --git a/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c 
b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c
index 4e982bd..33663f6 100644
--- a/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c
+++ b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -139,8 +140,111 @@ cmdline_parse_inst_t cmd_set_cpu_freq_set = {
},
 };
 
+struct cmd_send_policy_result {
+   cmdline_fixed_string_t send_policy;
+   cmdline_fixed_string_t cmd;
+};
+
+#ifdef RTE_LIBRTE_I40E_PMD
+union PFID {
+   struct ether_addr addr;
+   uint64_t pfid;
+};
+#endif
+
+static inline int
+send_policy(void)
+{
+   struct channel_packet pkt;
+   int ret;
+
+#ifdef RTE_LIBRTE_I40E_PMD
+   union PFID pfid;
+   /* Use port MAC address as the vfid */
+   rte_eth_macaddr_get(0, &pfid.addr);
+   printf("Port %u MAC: %02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 ":"
+   "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 "\n",
+   1,
+   pfid.addr.addr_bytes[0], pfid.addr.addr_bytes[1],
+   pfid.addr.addr_bytes[2], pfid.addr.addr_bytes[3],
+   pfid.addr.addr_bytes[4], pfid.addr.addr_bytes[5]);
+   pkt.vfid[0] = pfid.pfid;
+
+   pkt.nb_mac_to_monitor = 1;
+#endif
+   pkt.t_boost_status.tbEnabled = false;
+
+   pkt.vcpu_to_control[0] = 0;
+   pkt.vcpu_to_control[1] = 1;
+   pkt.num_vcpu = 2;
+   /* Dummy Population. */
+#ifdef RTE_LIBRTE_I40E_PMD
+   pkt.traffic_policy.min_packet_thresh = 96000;
+   pkt.traffic_policy.avg_max_packet_thresh = 180;
+   pkt.traffic_policy.max_max_packet_thresh = 200;
+#endif
+
+   pkt.timer_policy.busy_hours[0] = 3;
+   pkt.timer_policy.busy_hours[1] = 4;
+   pkt.timer_policy.busy_hours[2] = 5;
+   pkt.timer_policy.quiet_hours[0] = 11;
+   pkt.timer_policy.quiet_hours[1] = 12;
+   pkt.timer_policy.quiet_hours[2] = 13;
+
+#ifdef RTE_LIBRTE_I40E_PMD
+   pkt.timer_policy.hours_to_use_traffic_profile[0] = 8;
+   pkt.timer_policy.hours_to_use_traffic_profile[1] = 10;
+#endif
+
+   pkt.workload = LOW;
+   pkt.policy_to_use = TIME;
+   pkt.command = PKT_POLICY;
+   strcpy(pkt.vm_name, "ubintu2");
+   ret = guest_channel_send_msg(&pkt, 1);
+   if (ret == 0)
+   return 1;
+   RTE_LOG(DEBUG, POWER, "Error sending message: %s\n",
+   ret > 0 ? strerror(ret) : "channel not connected");
+   return -1;
+}
+
+static void
+cmd_send_policy_parsed(void *parsed_result, struct cmdline *cl,
+  __attribute__((unused)) void *data)
+{
+   int ret = -1;
+   struct cmd_send_policy_result *res = parsed_result;
+
+   if (!strcmp(res->cmd, "now")) {
+   printf("Sending Policy down now!\n");
+   ret = send_policy();
+   }
+   if (ret != 1)
+   cmdline_printf(cl, "Error sending message: %s\n",
+   strerror(ret));
+}
+
+cmdline_parse_token_string_t cmd_send_policy =
+   TOKEN_STRING_INITIALIZER(struct cmd_send_policy_result,
+   send_policy, "send_policy");
+cmdline_parse_token_string_t cmd_send_policy_cmd_cmd =
+   TOKEN_STRING_INITIALIZER(struct cmd_send_policy_result,
+   cmd, "now");
+
+cmdline_parse_inst_t cmd_send_policy_set = {
+   .f = cmd_send_policy_parsed,
+   .data = NULL,
+   .help_str = "send_policy now",
+   .tokens = {
+   (void *)&cmd_send_policy,
+   (void *)&cmd_send_policy_cmd_cmd,
+   NULL,
+   },
+};
+
 cmdline_parse_ctx_t main_ctx[] = {
(cmdline_parse_inst_t *)&cmd_quit,
+   (cmdline_parse_inst_t *)&cmd_send_policy_set,
(cmdline_parse_inst_t *)&cmd_set_cpu_freq_set,
NULL,
 };
-- 
2.7.4

[dpdk-dev] [PATCH v2 8/8] examples/vm_power_mgr: set MAC address of VF

2017-09-25 Thread David Hunt

We need to set vf mac from the host, so that they will be in sync on the
guest and the host. Otherwise, we'll have a random mac on the guest, and
a 00:00:00:00:00:00 mac on the host.

Signed-off-by: David Hunt 
---
 examples/vm_power_manager/main.c | 60 +++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/examples/vm_power_manager/main.c b/examples/vm_power_manager/main.c
index 698abca..18f5e7f 100644
--- a/examples/vm_power_manager/main.c
+++ b/examples/vm_power_manager/main.c
@@ -58,6 +58,15 @@
 #include "channel_monitor.h"
 #include "power_manager.h"
 #include "vm_power_cli.h"
+#ifdef RTE_LIBRTE_IXGBE_PMD
+#include 
+#endif
+#ifdef RTE_LIBRTE_I40E_PMD
+#include 
+#endif
+#ifdef RTE_LIBRTE_BNXT_PMD
+#include 
+#endif
 
 #define RX_RING_SIZE 512
 #define TX_RING_SIZE 512
@@ -222,7 +231,7 @@ check_all_ports_link_status(uint8_t port_num, uint32_t 
port_mask)
(uint8_t)portid);
continue;
}
-  /* clear all_ports_up flag if any link down */
+   /* clear all_ports_up flag if any link down */
if (link.link_status == ETH_LINK_DOWN) {
all_ports_up = 0;
break;
@@ -273,7 +282,9 @@ main(int argc, char **argv)
unsigned lcore_id;
unsigned int nb_ports;
struct rte_mempool *mbuf_pool;
+#ifdef RTE_LIBRTE_I40E_PMD
uint8_t portid;
+#endif
 
 
ret = rte_eal_init(argc, argv);
@@ -300,13 +311,60 @@ main(int argc, char **argv)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
 
/* Initialize ports. */
+#ifdef RTE_LIBRTE_I40E_PMD
for (portid = 0; portid < nb_ports; portid++) {
+   struct ether_addr eth;
+   int w, j;
+   int ret = -ENOTSUP;
+
if ((enabled_port_mask & (1 << portid)) == 0)
continue;
+
+   eth.addr_bytes[0] = 0xe0;
+   eth.addr_bytes[1] = 0xe0;
+   eth.addr_bytes[2] = 0xe0;
+   eth.addr_bytes[3] = 0xe0;
+   eth.addr_bytes[4] = portid + 0xf0;
+
if (port_init(portid, mbuf_pool) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n",
portid);
+
+   for (w = 0; w < MAX_VFS; w++) {
+   eth.addr_bytes[5] = w + 0xf0;
+
+#ifdef RTE_LIBRTE_IXGBE_PMD
+   if (ret == -ENOTSUP)
+   ret = rte_pmd_ixgbe_set_vf_mac_addr(portid,
+   w, ð);
+#endif
+#ifdef RTE_LIBRTE_I40E_PMD
+   if (ret == -ENOTSUP)
+   ret = rte_pmd_i40e_set_vf_mac_addr(portid,
+   w, ð);
+#endif
+#ifdef RTE_LIBRTE_BNXT_PMD
+   if (ret == -ENOTSUP)
+   ret = rte_pmd_bnxt_set_vf_mac_addr(portid,
+   w, ð);
+#endif
+
+
+   switch (ret) {
+   case 0:
+   printf("Port %d VF %d MAC: ",
+   portid, w);
+   for (j = 0; j < 6; j++) {
+   printf("%02x", eth.addr_bytes[j]);
+   if (j < 5)
+   printf(":");
+   }
+   printf("\n");
+   break;
+   }
+   }
}
+#endif
 
lcore_id = rte_get_next_lcore(-1, 1, 0);
if (lcore_id == RTE_MAX_LCORE) {
-- 
2.7.4

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 11:06 AM, Ferruh Yigit wrote:
> On 9/25/2017 10:53 AM, Ferruh Yigit wrote:
>> On 9/25/2017 10:40 AM, Bruce Richardson wrote:
>>> On Mon, Sep 25, 2017 at 09:42:56AM +0100, Ferruh Yigit wrote:
 On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:
> Failure happens on build using:
> gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)

 Yes, that case is missed. What do you think about following one:

   @@ -561,7 +561,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
   unsigned int blockcnt,
   unsigned int framesize,
   unsigned int framecnt,
   -  unsigned int qdisc_bypass,
   +  unsigned int qdisc_bypass __rte_unused,
   struct pmd_internals **internals,
   struct rte_eth_dev **eth_dev,
   struct rte_kvargs *kvlist)

>
> Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")
>
> Signed-off-by: Andrew Rybchenko 
> ---
> May be the right solution in fact remove PACKET_QDISC_BYPASS conditional
> completely. If below solution is accepted, feel free to squash it into
> the original patch.

 It is a little to late for this, I already sent a pull-request with this
 patch. So fix will need to be a separate patch.

>>> Pull request hasn't actually been pulled yet, so you should be able to
>>> supercede it by a later one, right?
>>
>> Technically yes, and easy to do, but it will be confusing. I was taking
>> pull-request as code freeze in sub-tree.
>>
>> If there are multiple pull-request for a tree, how can one be sure which
>> ones has been pulled and what to expect in main repo, and verify what
>> has been merged?
>>
>> As above said, if Thomas thinks this is OK, I can send another pull request?
> 
> What about Thomas doing the squash while applying to the main repo, so
> that main repo won't be broken and there won't be multiple pull requests
> around?

I will get this patch to next-net, so next-net won't be broken anymore
(thanks to Nelio for suggesting). And this leaves the decision to Thomas
to squash the fix (so main repo won't be broken) or get as separate
patch while merging.

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:
> Failure happens on build using:
> gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)
> 
> Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")
> 
> Signed-off-by: Andrew Rybchenko 

Reviewed-by: Ferruh Yigit

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 1:52 PM, Ferruh Yigit wrote:
> On 9/25/2017 7:50 AM, Andrew Rybchenko wrote:
>> Failure happens on build using:
>> gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-4)
>>
>> Fixes: 0d16c17ae7a4 ("net/af_packet: make qdisc bypass configurable")
>>
>> Signed-off-by: Andrew Rybchenko 
> 
> Reviewed-by: Ferruh Yigit 

Applied to dpdk-next-net/master, thanks.

Re: [dpdk-dev] [PATCH] doc: add Linux flower support check in TAP guide

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 12:23 PM, Mcnamara, John wrote:
> 
> 
>> -Original Message-
>> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Thomas Monjalon
>> Sent: Wednesday, September 20, 2017 2:03 PM
>> To: Pascal Mazon 
>> Cc: dev@dpdk.org
>> Subject: [dpdk-dev] [PATCH] doc: add Linux flower support check in TAP
>> guide
>>
>> The flow API is supported in TAP PMD if flower is supported in Linux.
>> Some commands are combined to suggest a convenient check of its support by
>> the running kernel.
>>
>> Signed-off-by: Thomas Monjalon 

Acked-by: Pascal Mazon 

> Acked-by: John McNamara 

Applied to dpdk-next-net/master, thanks.

Re: [dpdk-dev] [dpdk-stable] [PATCH] net/i40e: fix assignment of enum values

2017-09-25 Thread Ferruh Yigit

On 9/23/2017 3:05 AM, Wu, Jingjing wrote:
> 
> 
>> -Original Message-
>> From: Yigit, Ferruh
>> Sent: Saturday, September 23, 2017 12:48 AM
>> To: Jastrzebski, MichalX K ; Wu, Jingjing
>> ; Xing, Beilei 
>> Cc: dev@dpdk.org; Jain, Deepak K ; Kulasek, TomaszX
>> ; sta...@dpdk.org
>> Subject: Re: [dpdk-stable] [PATCH] net/i40e: fix assignment of enum values
>>
>> On 9/22/2017 1:36 PM, Michal Jastrzebski wrote:
>>> From: Tomasz Kulasek 
>>>
>>> mixed_enums: Mixing enum types enum i40e_vsi_type and enum
>>>  virtchnl_vsi_type for type
>>>
>>> Coverity issue 158651
>>> Fixes: a58860f68929 ("net/i40e/base: use new virtchnl header file")
>>> Cc: jingjing...@intel.com
>>> Cc: sta...@dpdk.org
>>>
>>> Signed-off-by: Tomasz Kulasek 

> Acked-by: Jingjing Wu  

Applied to dpdk-next-net/master, thanks.

[dpdk-dev] [PATCH] example: add new service cores sample application

2017-09-25 Thread Harry van Haaren

This commit adds a new sample app, which showcases the value
of running services. In particular it allows the application
to dynamically schedule services to service-cores.

The sample app itself registers a number of dummy services,
and applies different profiles to them at runtime. Note that
this sample application does not forward any traffic - it
demonstrates advanced usage of the service cores API.

Signed-off-by: Harry van Haaren 
---
 examples/service_cores/Makefile |  54 +
 examples/service_cores/main.c   | 245 
 2 files changed, 299 insertions(+)
 create mode 100644 examples/service_cores/Makefile
 create mode 100644 examples/service_cores/main.c

diff --git a/examples/service_cores/Makefile b/examples/service_cores/Makefile
new file mode 100644
index 000..bd4a345
--- /dev/null
+++ b/examples/service_cores/Makefile
@@ -0,0 +1,54 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = service_cores
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/service_cores/main.c b/examples/service_cores/main.c
new file mode 100644
index 000..c1f125a
--- /dev/null
+++ b/examples/service_cores/main.c
@@ -0,0 +1,245 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY O

Re: [dpdk-dev] [PATCH 1/2] net/mlx4: get back RX flow functionality

2017-09-25 Thread Ferruh Yigit

On 8/3/2017 9:49 AM, Vasily Philipov wrote:
> Getting hw directly on RX fast path without verbs call.
> 
> Now the number of scatters is calculating on the fly, according to the
> maximum expected packet size.
> 
> Signed-off-by: Vasily Philipov 

I will update series as superseded with [1], please shout if this is wrong.

Thanks,
ferruh

[1]
http://dpdk.org/dev/patchwork/patch/27881/
Moti's mlx4 patchset with 5 patches.

Re: [dpdk-dev] [PATCH v2] net/ark:add null point check

2017-09-25 Thread Ed Czeck

Thank you.


> In function ark_config_device(), there are several malloc without null
> point check. Fix it by adding null point check.



Acked-by: Ed Czeck

Re: [dpdk-dev] [PATCH v2] net/ark:add null point check

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 2:47 PM, Ed Czeck wrote:
<...>
>> In function ark_config_device(), there are several malloc without null
>> point check. Fix it by adding null point check.
> 

Signed-off-by: Yong Wang 

> Acked-by: Ed Czeck 

Applied to dpdk-next-net/master, thanks.

Re: [dpdk-dev] [PATCH v3 1/7] member: implement main API

2017-09-25 Thread De Lara Guarch, Pablo



> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Yipeng Wang
> Sent: Wednesday, September 6, 2017 1:00 AM
> To: dev@dpdk.org
> Cc: Tai, Charlie ; Gobriel, Sameh
> ; Wang, Ren ; Wang,
> Yipeng1 ; Mcnamara, John
> 
> Subject: [dpdk-dev] [PATCH v3 1/7] member: implement main API
> 
> Membership library is an extension and generalization of a traditional filter
> (for example Bloom Filter) structure. In general, the Membership library is a
> data structure that provides a "set-summary" and responds to set-
> membership queries of whether a certain element belongs to a set(s). A
> membership test for an element will return the set this element belongs to
> or not-found if the element is never inserted into the set-summary.
> 
> The results of the membership test is not 100% accurate. Certain false

Is -> are.
> positive or false negative probability could exist. However, comparing to a
> "full-blown" complete list of elements, a "set-summary"
> is memory efficient and fast on lookup.
> 
> This patch add the main API definition.
> 
> Signed-off-by: Yipeng Wang 
> ---
>  lib/Makefile |   2 +
>  lib/librte_eal/common/eal_common_log.c   |   1 +
>  lib/librte_eal/common/include/rte_log.h  |   1 +
>  lib/librte_member/Makefile   |  49 +++
>  lib/librte_member/rte_member.c   | 342 
>  lib/librte_member/rte_member.h   | 518
> +++
>  lib/librte_member/rte_member_version.map |  16 +
>  7 files changed, 929 insertions(+)
>  create mode 100644 lib/librte_member/Makefile  create mode 100644
> lib/librte_member/rte_member.c  create mode 100644
> lib/librte_member/rte_member.h  create mode 100644
> lib/librte_member/rte_member_version.map
> 

...

> diff --git a/lib/librte_member/rte_member.c
> b/lib/librte_member/rte_member.c new file mode 100644 index
> 000..71b066d
> --- /dev/null
> +++ b/lib/librte_member/rte_member.c

...

> +
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "rte_member.h"
> +#include "rte_member_ht.h"
> +#include "rte_member_vbf.h"
> +
> +TAILQ_HEAD(rte_member_list, rte_tailq_entry); static struct
> +rte_tailq_elem rte_member_tailq = {
> + .name = "RTE_MEMBER",
> +};
> +EAL_REGISTER_TAILQ(rte_member_tailq)
> +
> +
> +void *
> +rte_member_find_existing(const char *name) {
> + struct rte_member_setsum *setsum = NULL;
> + struct rte_tailq_entry *te;
> + struct rte_member_list *member_list;
> +
> + member_list = RTE_TAILQ_CAST(rte_member_tailq.head,
> rte_member_list);
> +
> + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
> + TAILQ_FOREACH(te, member_list, next) {
> + setsum = (struct rte_member_setsum *) te->data;
> + if (strncmp(name, setsum->name,
> RTE_MEMBER_NAMESIZE) == 0)
> + break;
> + }
> + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
> +
> + if (te == NULL) {
> + rte_errno = ENOENT;
> + return NULL;
> + }
> + return setsum;
> +}
> +
> +void
> +rte_member_free(void *ss)

Why not using directly "struct rte_member_setsum", so you can use it directly?
This applies to other functions that are using "void *".
I see that the content of this structure is internal, but you can still use a 
pointer to that structure.

...

> +void *
> +rte_member_create(const struct rte_member_parameters *params) {
> + struct rte_tailq_entry *te;
> + struct rte_member_list *member_list = NULL;
> + struct rte_member_setsum *setsum = NULL;
> + int ret;
> +
> + if (params == NULL) {
> + rte_errno = EINVAL;
> + return NULL;
> + }
> +
> + if ((params->key_len == 0)) {
> + rte_errno = EINVAL;
> + RTE_LOG(ERR, MEMBER,
> + "Memship create with invalid parameters\n");

rte_member_create has invalid parameters?


> diff --git a/lib/librte_member/rte_member.h
> b/lib/librte_member/rte_member.h new file mode 100644 index
> 000..de44b1b
> --- /dev/null
> +++ b/lib/librte_member/rte_member.h
> @@ -0,0 +1,518 @@

...

> +enum rte_member_setsum_type {
> + RTE_MEMBER_TYPE_HT = 0,  /**< Hash table based set summary.
> */
> + RTE_MEMBER_TYPE_VBF, /**< vector of bloom filters. */

"vector" -> "Vector".

> + RTE_MEMBER_NUM_TYPE
> +};
> +
> +/** @internal compare function for different arch. */ enum
> +rte_member_sig_compare_function {
> + RTE_MEMBER_COMPARE_SCALAR = 0,
> + RTE_MEMBER_COMPARE_AVX2,
> + RTE_MEMBER_COMPARE_NUM
> +};
> +
> +/** @internal setsummary structure. */
> +struct rte_member_setsum {
> + enum rte_member_setsum_type type;
> + const char *name;
> + uint32_t key_len;
> + uint32_t socket_id;  /* NUMA Socket ID for memory. */

Either comment all the fields or do not do it (for this case, because the
structure is internal, otherwise, all fields would require a comm

Re: [dpdk-dev] [PATCH v4 02/41] bus/dpaa: introduce NXP DPAA Bus driver skeleton

2017-09-25 Thread Shreyansh Jain


On Tuesday 19 September 2017 06:44 PM, Shreyansh Jain wrote:

Hello Ferruh,

On Monday 18 September 2017 08:17 PM, Ferruh Yigit wrote:

On 9/9/2017 12:20 PM, Shreyansh Jain wrote:

Signed-off-by: Shreyansh Jain 
Signed-off-by: Hemant Agrawal 


<...>

diff --git a/drivers/bus/dpaa/rte_bus_dpaa_version.map 
b/drivers/bus/dpaa/rte_bus_dpaa_version.map

new file mode 100644
index 000..d97a009
--- /dev/null
+++ b/drivers/bus/dpaa/rte_bus_dpaa_version.map
@@ -0,0 +1,7 @@
+DPDK_17.11 {
+    global:
+
+    rte_dpaa_driver_register;
+    rte_dpaa_driver_unregister;


"local *;" ?


Agree. I will change this.
Currently rte_dpaa_driver_* functions are being used locally within 
bus/dpaa.




Even though I agree earlier that I will change this (append 'local *:' 
to the file), probably I will have to skip this.
Further in the patch series, there are some symbols which are added 
which are required by the mempool and net drivers (and crypto, in 
future). Shared compilation fails for them if I add 'local: *;' here.

Re: [dpdk-dev] [PATCH] net/af_packet: fix build failure because of unused parameter

2017-09-25 Thread Thomas Monjalon

25/09/2017 12:06, Ferruh Yigit:
> On 9/25/2017 10:53 AM, Ferruh Yigit wrote:
> > On 9/25/2017 10:40 AM, Bruce Richardson wrote:
> >> On Mon, Sep 25, 2017 at 09:42:56AM +0100, Ferruh Yigit wrote:
> >>> It is a little to late for this, I already sent a pull-request with this
> >>> patch. So fix will need to be a separate patch.
> >>>
> >> Pull request hasn't actually been pulled yet, so you should be able to
> >> supercede it by a later one, right?
> > 
> > Technically yes, and easy to do, but it will be confusing. I was taking
> > pull-request as code freeze in sub-tree.
> > 
> > If there are multiple pull-request for a tree, how can one be sure which
> > ones has been pulled and what to expect in main repo, and verify what
> > has been merged?
> > 
> > As above said, if Thomas thinks this is OK, I can send another pull request?
> 
> What about Thomas doing the squash while applying to the main repo, so
> that main repo won't be broken and there won't be multiple pull requests
> around?

Yes, no need of sending a new pull request.

[dpdk-dev] [PATCH v2] vfio: refactor PCI BAR mapping

2017-09-25 Thread Jonas Pfefferle

Split pci_vfio_map_resource for primary and secondary processes.
Save all relevant mapping data in primary process to allow
the secondary process to perform mappings.

Signed-off-by: Jonas Pfefferle 
---
v2:
* fix zero size and offset when trying to mmap non msix bar

 lib/librte_eal/common/include/rte_pci.h|   7 +
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 446 +
 2 files changed, 271 insertions(+), 182 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 8b12339..0821af9 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -214,6 +214,12 @@ struct pci_map {
uint64_t phaddr;
 };
 
+struct pci_msix_table {
+   int bar_index;
+   uint32_t offset;
+   uint32_t size;
+};
+
 /**
  * A structure describing a mapped PCI resource.
  * For multi-process we need to reproduce all PCI mappings in secondary
@@ -226,6 +232,7 @@ struct mapped_pci_resource {
char path[PATH_MAX];
int nb_maps;
struct pci_map maps[PCI_MAX_RESOURCE];
+   struct pci_msix_table msix_table;
 };
 
 /** mapped pci device list */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index aa9d96e..6443bd5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -88,8 +88,7 @@ pci_vfio_write_config(const struct rte_intr_handle 
*intr_handle,
 
 /* get PCI BAR number where MSI-X interrupts are */
 static int
-pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
- uint32_t *msix_table_size)
+pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 {
int ret;
uint32_t reg;
@@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t 
*msix_table_offset,
return -1;
}
 
-   *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
-   *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
-   *msix_table_size = 16 * (1 + (flags & 
RTE_PCI_MSIX_FLAGS_QSIZE));
+   msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
+   msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
+   msix_table->size =
+   16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
 
return 0;
}
@@ -300,25 +300,150 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, 
int vfio_dev_fd)
return -1;
 }
 
-/*
- * map the PCI resources of a PCI device in virtual memory (VFIO version).
- * primary and secondary processes follow almost exactly the same path
- */
-int
-pci_vfio_map_resource(struct rte_pci_device *dev)
+static int
+pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
+{
+   uint32_t ioport_bar;
+   int ret;
+
+   ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+ + PCI_BASE_ADDRESS_0 + bar_index*4);
+   if (ret != sizeof(ioport_bar)) {
+   RTE_LOG(ERR, EAL, "Cannot read command (%x) from config 
space!\n",
+   PCI_BASE_ADDRESS_0 + bar_index*4);
+   return -1;
+   }
+
+   return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO;
+}
+
+static int
+pci_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+   if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
+   RTE_LOG(ERR, EAL, "Error setting up interrupts!\n");
+   return -1;
+   }
+
+   /* set bus mastering for the device */
+   if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+   RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
+   return -1;
+   }
+
+   /* Reset the device */
+   ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+
+   return 0;
+}
+
+static int
+pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
+   int bar_index, int additional_flags)
+{
+   struct memreg {
+   unsigned long offset, size;
+   } memreg[2] = {};
+   void *bar_addr;
+   struct pci_msix_table *msix_table = &vfio_res->msix_table;
+   struct pci_map *bar = &vfio_res->maps[bar_index];
+
+   if (bar->size == 0)
+   /* Skip this BAR */
+   return 0;
+
+   if (msix_table->bar_index == bar_index) {
+   /*
+* VFIO will not let us map the MSI-X table,
+* but we can map around it.
+*/
+   uint32_t table_start = msix_table->offset;
+   uint32_t table_end = table_start + msix_table->size;
+   table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
+   table_start &= PAGE_MASK;
+
+   if (table_sta

Re: [dpdk-dev] [PATCH v4 02/41] bus/dpaa: introduce NXP DPAA Bus driver skeleton

2017-09-25 Thread Ferruh Yigit

On 9/25/2017 3:32 PM, Shreyansh Jain wrote:
> On Tuesday 19 September 2017 06:44 PM, Shreyansh Jain wrote:
>> Hello Ferruh,
>>
>> On Monday 18 September 2017 08:17 PM, Ferruh Yigit wrote:
>>> On 9/9/2017 12:20 PM, Shreyansh Jain wrote:
 Signed-off-by: Shreyansh Jain 
 Signed-off-by: Hemant Agrawal 
>>>
>>> <...>
>>>
 diff --git a/drivers/bus/dpaa/rte_bus_dpaa_version.map 
 b/drivers/bus/dpaa/rte_bus_dpaa_version.map
 new file mode 100644
 index 000..d97a009
 --- /dev/null
 +++ b/drivers/bus/dpaa/rte_bus_dpaa_version.map
 @@ -0,0 +1,7 @@
 +DPDK_17.11 {
 +    global:
 +
 +    rte_dpaa_driver_register;
 +    rte_dpaa_driver_unregister;
>>>
>>> "local *;" ?
>>
>> Agree. I will change this.
>> Currently rte_dpaa_driver_* functions are being used locally within 
>> bus/dpaa.
>>
> 
> Even though I agree earlier that I will change this (append 'local *:' 
> to the file), probably I will have to skip this.
> Further in the patch series, there are some symbols which are added 
> which are required by the mempool and net drivers (and crypto, in 
> future). Shared compilation fails for them if I add 'local: *;' here.

It should be OK if this is last item in the first group.

Technically I believe it will be OK to remove that line, but not quite sure.

Lets be consistent with exiting usage and keep it, there are many sample
map files.

[dpdk-dev] [PATCH v3 00/13] Move PCI away from the EAL

2017-09-25 Thread Gaetan Rivet

Hi all,

Here is a new version of the PCI bus move out of the EAL.

The EAL PCI implementation is divided in two parts:

  - librte_pci: library offering helpers to handle PCI objects
  - librte_bus_pci: bus driver for PCI devices

This allows other libraries / tools to use PCI elements (location, mappings,
parsing operations, etc) without forcing a dependency on a bus driver.

The latter should not have to export helpers that others might need. It
is focused on defining the rte_pci_device, rte_pci_driver objects and
their handling.

The cryptodev library has hard dependencies on rte_pci_devices (used by
generic probe function). Other similar libs (ether and eventdev) avoided
the issue by inlining such functions and expecting users to include the
relevant headers once the PCI bus has already been built.

@Declan:
I proposed a solution that would avoid inlining those functions,
which does not feel right. Let me know what you think of it or if you
think of a better solution. I think it would be best to have cryptodev
completely independent from PCI / vdev as far as the lib in concerned
(the vdev bus will move as well).

v2:

  + Made rte_eal_using_phys_addrs common to both linux and bsd interfaces.
  + Added documentation of EAL API changes in release note.
  + Fixed a few rebase-related mistakes.
  + Fixed parallel build race condition reported by Luca Boccassi.
  + Grouped together commits breaking compilation:

-> pci: introduce PCI lib and bus
-> lib: include rte_bus_pci
-> drivers: include rte_bus_pci
-> test: include rte_bus_pci
-> app/testpmd: include rte_bus_pci
-> cryptodev: move PCI specific helpers to drivers/crypto

  Until all of them have been applied, compilation is broken.
  I am currently wondering whether merging some of them might
  be sensible.

  + Not included in this series:

Several filesystem-related functions are currently
private to the EAL and directly linked. This is not good,
but the solution seems to be to have a new lib offering an FS abstraction.
This seems an overreach for this patchset and should probably come in a
second step.

v3:

  + Fixed .map versioning
  + merged one commit breaking the build into the main commit moving
code around.

Other such commits are still present, as they only break specific subsystems
(lib, drivers, apps, cryptodev). Merging them all within the one main commit
does not seem right.

As such, build is still broken from

   * pci: introduce PCI lib and bus

until

   * cryptodev: move PCI specific helpers to drivers/crypto

Gaetan Rivet (13):
  eal: expose rte_eal_using_phys_addrs
  ethdev: remove useless PCI dependency
  bus: properly include rte_debug
  pci: introduce PCI lib and bus
  lib: include rte_bus_pci
  drivers: include rte_bus_pci
  test: include rte_bus_pci
  app/testpmd: include rte_bus_pci
  cryptodev: move PCI specific helpers to drivers/crypto
  pci: avoid inlining functions
  pci: avoid over-complicated macro
  pci: deprecate misnamed functions
  doc: add notes on EAL PCI API update

 app/test-pmd/testpmd.h   |   1 +
 config/common_base   |  10 +
 doc/guides/rel_notes/deprecation.rst |  10 +
 doc/guides/rel_notes/release_17_11.rst   |  28 +
 drivers/Makefile |   2 +-
 drivers/bus/Makefile |   2 +
 drivers/bus/pci/Makefile |  59 ++
 drivers/bus/pci/bsd/Makefile |  32 +
 drivers/bus/pci/bsd/rte_pci.c| 671 +
 drivers/bus/pci/include/rte_bus_pci.h| 387 
 drivers/bus/pci/linux/Makefile   |  37 ++
 drivers/bus/pci/linux/rte_pci.c  | 723 +++
 drivers/bus/pci/linux/rte_pci_init.h |  97 +++
 drivers/bus/pci/linux/rte_pci_uio.c  | 568 ++
 drivers/bus/pci/linux/rte_pci_vfio.c | 675 +
 drivers/bus/pci/linux/rte_vfio_mp_sync.c | 425 +
 drivers/bus/pci/private.h| 174 ++
 drivers/bus/pci/rte_bus_pci_version.map  |  21 +
 drivers/bus/pci/rte_pci_common.c | 543 +
 drivers/bus/pci/rte_pci_common_uio.c | 235 
 drivers/crypto/Makefile  |   4 +-
 drivers/crypto/pci/Makefile  |  52 ++
 drivers/crypto/pci/rte_cryptodev_pci.c   | 128 
 drivers/crypto/pci/rte_cryptodev_pci.h   |  94 +++
 drivers/crypto/pci/rte_cryptodev_pci_version.map |   7 +
 drivers/crypto/qat/qat_qp.c  |   1 +
 drivers/event/octeontx/ssovf_probe.c |   1 +
 drivers/net/ark/ark_ethdev.c |   1 +
 drivers/net/avp/avp_ethdev.c |   2 +
 drivers/net/bnxt/bnxt.h  |   1 +
 drivers/net/bonding/rte

[dpdk-dev] [PATCH v3 01/13] eal: expose rte_eal_using_phys_addrs

2017-09-25 Thread Gaetan Rivet

This function was previously private to the EAL layer.
Other subsystems requires it, such as the PCI bus.

In order not to force other components to include stdbool, which is
incompatible with several NIC drivers, the return type has
been changed from bool to int.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/bsdapp/eal/eal_memory.c  |  6 ++
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  9 -
 lib/librte_eal/common/eal_private.h | 11 ---
 lib/librte_eal/common/include/rte_memory.h  | 11 +++
 lib/librte_eal/linuxapp/eal/eal_memory.c|  2 +-
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  9 -
 6 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c 
b/lib/librte_eal/bsdapp/eal/eal_memory.c
index 3614da8..65c96b0 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -192,3 +192,9 @@ rte_eal_hugepage_attach(void)
close(fd_hugepage);
return -1;
 }
+
+int
+rte_eal_using_phys_addrs(void)
+{
+   return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 47a09ea..2065c53 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -204,6 +204,13 @@ DPDK_17.08 {
 
 } DPDK_17.05;
 
+DPDK_17.11 {
+   global;
+
+   rte_eal_using_phys_addrs;
+
+} DPDK_17.08;
+
 EXPERIMENTAL {
global:
 
@@ -237,4 +244,4 @@ EXPERIMENTAL {
rte_service_set_stats_enable;
rte_service_start_with_defaults;
 
-} DPDK_17.08;
+} DPDK_17.11;
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index 597d82e..10a7078 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -333,17 +333,6 @@ int rte_eal_hugepage_init(void);
 int rte_eal_hugepage_attach(void);
 
 /**
- * Returns true if the system is able to obtain
- * physical addresses. Return false if using DMA
- * addresses through an IOMMU.
- *
- * Drivers based on uio will not load unless physical
- * addresses are obtainable. It is only possible to get
- * physical addresses when running as a privileged user.
- */
-bool rte_eal_using_phys_addrs(void);
-
-/**
  * Find a bus capable of identifying a device.
  *
  * @param str
diff --git a/lib/librte_eal/common/include/rte_memory.h 
b/lib/librte_eal/common/include/rte_memory.h
index 4aa5d1f..5568931 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -195,6 +195,17 @@ unsigned rte_memory_get_nchannel(void);
  */
 unsigned rte_memory_get_nrank(void);
 
+/**
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ *
+ * @return
+ *   1 if the system is able to obtain physical addresses.
+ *   0 if using DMA addresses through an IOMMU.
+ */
+int rte_eal_using_phys_addrs(void);
+
 #ifdef RTE_LIBRTE_XEN_DOM0
 
 /**< Internal use only - should DOM0 memory mapping be used */
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c 
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 5279128..af8719b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -1542,7 +1542,7 @@ rte_eal_hugepage_attach(void)
return -1;
 }
 
-bool
+int
 rte_eal_using_phys_addrs(void)
 {
return phys_addrs_available;
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 8c08b8d..18bce25 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -209,6 +209,13 @@ DPDK_17.08 {
 
 } DPDK_17.05;
 
+DPDK_17.11 {
+   global;
+
+   rte_eal_using_phys_addrs;
+
+} DPDK_17.08;
+
 EXPERIMENTAL {
global:
 
@@ -242,4 +249,4 @@ EXPERIMENTAL {
rte_service_set_stats_enable;
rte_service_start_with_defaults;
 
-} DPDK_17.08;
+} DPDK_17.11;
-- 
2.1.4

[dpdk-dev] [PATCH v3 02/13] ethdev: remove useless PCI dependency

2017-09-25 Thread Gaetan Rivet

Signed-off-by: Gaetan Rivet 
---
 lib/librte_ether/rte_ethdev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 1849a3b..5092b82 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -47,7 +47,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
2.1.4

[dpdk-dev] [PATCH v3 03/13] bus: properly include rte_debug

2017-09-25 Thread Gaetan Rivet

Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_bus.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/librte_eal/common/eal_common_bus.c 
b/lib/librte_eal/common/eal_common_bus.c
index 08bec2d..9d1be8a 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -35,6 +35,7 @@
 #include 
 
 #include 
+#include 
 
 #include "eal_private.h"
 
-- 
2.1.4

[dpdk-dev] [PATCH v3 05/13] lib: include rte_bus_pci

2017-09-25 Thread Gaetan Rivet

Devices and drivers are now defined within the bus-specific PCI header.
Update the libraries, as structuraly unsound as it may be.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_ether/rte_ethdev_pci.h  | 1 +
 lib/librte_eventdev/rte_eventdev_pmd_pci.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev_pci.h 
b/lib/librte_ether/rte_ethdev_pci.h
index 56b1072..722075e 100644
--- a/lib/librte_ether/rte_ethdev_pci.h
+++ b/lib/librte_ether/rte_ethdev_pci.h
@@ -36,6 +36,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 /**
diff --git a/lib/librte_eventdev/rte_eventdev_pmd_pci.h 
b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
index b6bd731..ade32b5 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd_pci.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
@@ -50,6 +50,7 @@ extern "C" {
 #include 
 #include 
 #include 
+#include 
 
 #include "rte_eventdev_pmd.h"
 
-- 
2.1.4

[dpdk-dev] [PATCH v3 07/13] test: include rte_bus_pci

2017-09-25 Thread Gaetan Rivet

Devices and drivers are now defined within the bus-specific PCI header.
Update test applications.

Signed-off-by: Gaetan Rivet 
---
 test/test/test_kni.c| 1 +
 test/test/virtual_pmd.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/test/test/test_kni.c b/test/test/test_kni.c
index db17fdf..b2f05ec 100644
--- a/test/test/test_kni.c
+++ b/test/test/test_kni.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/test/test/virtual_pmd.c b/test/test/virtual_pmd.c
index 9d46ad5..72e784a 100644
--- a/test/test/virtual_pmd.c
+++ b/test/test/virtual_pmd.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
-- 
2.1.4

[dpdk-dev] [PATCH v3 06/13] drivers: include rte_bus_pci

2017-09-25 Thread Gaetan Rivet

Devices and drivers are now defined within the bus-specific PCI header.
Update drivers.

Signed-off-by: Gaetan Rivet 
---
 drivers/bus/pci/bsd/rte_pci.c| 1 +
 drivers/bus/pci/linux/rte_pci.c  | 1 +
 drivers/bus/pci/linux/rte_pci_uio.c  | 1 +
 drivers/bus/pci/linux/rte_pci_vfio.c | 1 +
 drivers/bus/pci/linux/rte_vfio_mp_sync.c | 1 +
 drivers/bus/pci/private.h| 1 +
 drivers/bus/pci/rte_pci_common.c | 1 +
 drivers/bus/pci/rte_pci_common_uio.c | 1 +
 drivers/crypto/qat/qat_qp.c  | 1 +
 drivers/event/octeontx/ssovf_probe.c | 1 +
 drivers/net/ark/ark_ethdev.c | 1 +
 drivers/net/avp/avp_ethdev.c | 2 ++
 drivers/net/bnxt/bnxt.h  | 1 +
 drivers/net/bonding/rte_eth_bond_args.c  | 1 +
 drivers/net/cxgbe/base/adapter.h | 1 +
 drivers/net/cxgbe/cxgbe_ethdev.c | 1 +
 drivers/net/e1000/em_ethdev.c| 1 +
 drivers/net/e1000/igb_ethdev.c   | 1 +
 drivers/net/e1000/igb_pf.c   | 1 +
 drivers/net/ena/ena_ethdev.h | 1 +
 drivers/net/enic/base/vnic_dev.h | 4 +++-
 drivers/net/enic/enic_ethdev.c   | 1 +
 drivers/net/enic/enic_main.c | 1 +
 drivers/net/i40e/i40e_ethdev.c   | 1 +
 drivers/net/i40e/i40e_ethdev_vf.c| 1 +
 drivers/net/ixgbe/ixgbe_ethdev.c | 1 +
 drivers/net/ixgbe/ixgbe_ethdev.h | 1 +
 drivers/net/mlx5/mlx5.c  | 1 +
 drivers/net/mlx5/mlx5_ethdev.c   | 1 +
 drivers/net/sfc/sfc.h| 1 +
 drivers/net/sfc/sfc_ethdev.c | 1 +
 drivers/net/thunderx/nicvf_ethdev.c  | 1 +
 drivers/net/virtio/virtio_ethdev.c   | 1 +
 drivers/net/virtio/virtio_pci.h  | 1 +
 drivers/net/vmxnet3/vmxnet3_ethdev.c | 1 +
 35 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/pci/bsd/rte_pci.c b/drivers/bus/pci/bsd/rte_pci.c
index 5bd0f4b..6c7ab81 100644
--- a/drivers/bus/pci/bsd/rte_pci.c
+++ b/drivers/bus/pci/bsd/rte_pci.c
@@ -57,6 +57,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/bus/pci/linux/rte_pci.c b/drivers/bus/pci/linux/rte_pci.c
index c2ac82b..ba32d8b 100644
--- a/drivers/bus/pci/linux/rte_pci.c
+++ b/drivers/bus/pci/linux/rte_pci.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/bus/pci/linux/rte_pci_uio.c 
b/drivers/bus/pci/linux/rte_pci_uio.c
index eed6d0f..1f0dacd 100644
--- a/drivers/bus/pci/linux/rte_pci_uio.c
+++ b/drivers/bus/pci/linux/rte_pci_uio.c
@@ -47,6 +47,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/bus/pci/linux/rte_pci_vfio.c 
b/drivers/bus/pci/linux/rte_pci_vfio.c
index 81b67c9..ab63423 100644
--- a/drivers/bus/pci/linux/rte_pci_vfio.c
+++ b/drivers/bus/pci/linux/rte_pci_vfio.c
@@ -42,6 +42,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/drivers/bus/pci/linux/rte_vfio_mp_sync.c 
b/drivers/bus/pci/linux/rte_vfio_mp_sync.c
index 2c1654d..d24eba1 100644
--- a/drivers/bus/pci/linux/rte_vfio_mp_sync.c
+++ b/drivers/bus/pci/linux/rte_vfio_mp_sync.c
@@ -50,6 +50,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index 7ff1fc4..fdc2c81 100644
--- a/drivers/bus/pci/private.h
+++ b/drivers/bus/pci/private.h
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct rte_pci_driver;
 struct rte_pci_device;
diff --git a/drivers/bus/pci/rte_pci_common.c b/drivers/bus/pci/rte_pci_common.c
index c37b85b..459ae42 100644
--- a/drivers/bus/pci/rte_pci_common.c
+++ b/drivers/bus/pci/rte_pci_common.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/bus/pci/rte_pci_common_uio.c 
b/drivers/bus/pci/rte_pci_common_uio.c
index 4365660..544c606 100644
--- a/drivers/bus/pci/rte_pci_common_uio.c
+++ b/drivers/bus/pci/rte_pci_common_uio.c
@@ -40,6 +40,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/crypto/qat/qat_qp.c b/drivers/crypto/qat/qat_qp.c
index 5048d21..a22d6ef 100644
--- a/drivers/crypto/qat/qat_qp.c
+++ b/drivers/crypto/qat/qat_qp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/drivers/event/octeontx/ssovf_probe.c 
b/drivers/event/octeontx/ssovf_probe.c
index e1c0c6d..1cac4bc 100644
--- a/drivers/event/octeontx/ssovf_probe.c
+++ b/drivers/event/octeontx/ssovf_probe.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ssovf_evdev.h"
 
diff --git a/drivers/net/ark/ark_ethdev.c b/drivers/net/ark/ark_ethdev.c
index 6db362b..78a4d79 100644
--- a/drivers/net/ark/ark_ethdev.c
+++ b/drivers/net/ark/ark_ethdev.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_eth

[dpdk-dev] [PATCH v3 08/13] app/testpmd: include rte_bus_pci

2017-09-25 Thread Gaetan Rivet

Devices and drivers are now defined within the bus-specific PCI header.
Update applications.

Signed-off-by: Gaetan Rivet 
---
 app/test-pmd/testpmd.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 1d1ee75..d0613b5 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -35,6 +35,7 @@
 #define _TESTPMD_H_
 
 #include 
+#include 
 #include 
 
 #define RTE_PORT_ALL(~(portid_t)0x0)
-- 
2.1.4

[dpdk-dev] [PATCH v3 09/13] cryptodev: move PCI specific helpers to drivers/crypto

2017-09-25 Thread Gaetan Rivet

Those helpers rely on the PCI bus driver implementation.
Other similar libraries relied on the bus-specifics being handled in
inlined functions, to be compiled on demand by drivers, once the proper
PCI dependency has been settled. This seems unsafe.

Move the PCI-specific helpers out of the lib directory to the
drivers/crypto directory, properly following the dependency hierarchy.

Signed-off-by: Gaetan Rivet 
---
 drivers/Makefile |   2 +-
 drivers/crypto/Makefile  |   4 +-
 drivers/crypto/pci/Makefile  |  52 +
 drivers/crypto/pci/rte_cryptodev_pci.c   | 128 +++
 drivers/crypto/pci/rte_cryptodev_pci.h   |  94 +
 drivers/crypto/pci/rte_cryptodev_pci_version.map |   7 ++
 lib/librte_cryptodev/Makefile|   1 -
 lib/librte_cryptodev/rte_cryptodev_pci.h |  92 
 lib/librte_cryptodev/rte_cryptodev_pmd.c |  94 -
 lib/librte_cryptodev/rte_cryptodev_version.map   |   2 -
 10 files changed, 285 insertions(+), 191 deletions(-)
 create mode 100644 drivers/crypto/pci/Makefile
 create mode 100644 drivers/crypto/pci/rte_cryptodev_pci.c
 create mode 100644 drivers/crypto/pci/rte_cryptodev_pci.h
 create mode 100644 drivers/crypto/pci/rte_cryptodev_pci_version.map
 delete mode 100644 lib/librte_cryptodev/rte_cryptodev_pci.h

diff --git a/drivers/Makefile b/drivers/Makefile
index 7fef66d..a5d3fa0 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -37,7 +37,7 @@ DEPDIRS-mempool := bus
 DIRS-y += net
 DEPDIRS-net := bus mempool
 DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += crypto
-DEPDIRS-crypto := mempool
+DEPDIRS-crypto := bus mempool
 DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += event
 DEPDIRS-event := bus
 
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 7a719b9..cfd6cb6 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -33,6 +33,8 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 core-libs := librte_eal librte_mbuf librte_mempool librte_ring librte_cryptodev
 
+DIRS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci
+DEPDIRS-pci = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_GCM) += aesni_gcm
 DEPDIRS-aesni_gcm = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += aesni_mb
@@ -42,7 +44,7 @@ DEPDIRS-armv8 = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_OPENSSL) += openssl
 DEPDIRS-openssl = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_QAT) += qat
-DEPDIRS-qat = $(core-libs)
+DEPDIRS-qat = $(core-libs) librte_cryptodev_pci
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER) += scheduler
 DEPDIRS-scheduler = $(core-libs) librte_kvargs librte_reorder
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SNOW3G) += snow3g
diff --git a/drivers/crypto/pci/Makefile b/drivers/crypto/pci/Makefile
new file mode 100644
index 000..da819f2
--- /dev/null
+++ b/drivers/crypto/pci/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 6WIND S.A. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_cryptodev_pci.a
+
+# library version
+LIBABIVER := 1
+
+# build flags
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# library source files
+SRCS-y += rte_cryptodev_pci.c
+
+# export include files
+SYMLINK-y-include += rte_cryptodev_pci.h
+
+# versioning export map
+EXPORT_MAP := rte_cryptodev_pci_version.map
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/crypto/pci/r

[dpdk-dev] [PATCH v3 10/13] pci: avoid inlining functions

2017-09-25 Thread Gaetan Rivet

Parsing operations should not happen in performance critical sections.
Headers should not propose implementations unless duly required.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_pci/include/rte_pci.h   | 69 --
 lib/librte_pci/rte_pci.c   | 65 +++
 lib/librte_pci/rte_pci_version.map |  4 +++
 3 files changed, 75 insertions(+), 63 deletions(-)

diff --git a/lib/librte_pci/include/rte_pci.h b/lib/librte_pci/include/rte_pci.h
index 3858e80..09a609a 100644
--- a/lib/librte_pci/include/rte_pci.h
+++ b/lib/librte_pci/include/rte_pci.h
@@ -126,19 +126,6 @@ struct mapped_pci_resource {
 /** mapped pci device list */
 TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
 
-/**< Internal use only - Macro used by pci addr parsing functions **/
-#define GET_PCIADDR_FIELD(in, fd, lim, dlm)   \
-do {   \
-   unsigned long val;  \
-   char *end;  \
-   errno = 0;  \
-   val = strtoul((in), &end, 16);  \
-   if (errno != 0 || end[0] != (dlm) || val > (lim))   \
-   return -EINVAL; \
-   (fd) = (typeof (fd))val;\
-   (in) = end + 1; \
-} while(0)
-
 /**
  * Utility function to produce a PCI Bus-Device-Function value
  * given a string representation. Assumes that the BDF is provided without
@@ -152,15 +139,7 @@ do {   
\
  * @return
  *  0 on success, negative on error.
  */
-static inline int
-eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
-{
-   dev_addr->domain = 0;
-   GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
-   GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
-   GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
-   return 0;
-}
+int eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr);
 
 /**
  * Utility function to produce a PCI Bus-Device-Function value
@@ -174,16 +153,7 @@ eal_parse_pci_BDF(const char *input, struct rte_pci_addr 
*dev_addr)
  * @return
  *  0 on success, negative on error.
  */
-static inline int
-eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
-{
-   GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':');
-   GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
-   GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
-   GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
-   return 0;
-}
-#undef GET_PCIADDR_FIELD
+int eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr);
 
 /**
  * Utility function to write a pci device name, this device name can later be
@@ -197,17 +167,9 @@ eal_parse_pci_DomBDF(const char *input, struct 
rte_pci_addr *dev_addr)
  * @param size
  * The output buffer size
  */
-static inline void
-rte_pci_device_name(const struct rte_pci_addr *addr,
-   char *output, size_t size)
-{
-   RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
-   RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
-   addr->domain, addr->bus,
-   addr->devid, addr->function) >= 0);
-}
+void rte_pci_device_name(const struct rte_pci_addr *addr,
+char *output, size_t size);
 
-/* Compare two PCI device addresses. */
 /**
  * Utility function to compare two PCI device addresses.
  *
@@ -220,27 +182,8 @@ rte_pci_device_name(const struct rte_pci_addr *addr,
  * Positive on addr is greater than addr2.
  * Negative on addr is less than addr2, or error.
  */
-static inline int
-rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
-const struct rte_pci_addr *addr2)
-{
-   uint64_t dev_addr, dev_addr2;
-
-   if ((addr == NULL) || (addr2 == NULL))
-   return -1;
-
-   dev_addr = ((uint64_t)addr->domain << 24) |
-   (addr->bus << 16) | (addr->devid << 8) | addr->function;
-   dev_addr2 = ((uint64_t)addr2->domain << 24) |
-   (addr2->bus << 16) | (addr2->devid << 8) | addr2->function;
-
-   if (dev_addr > dev_addr2)
-   return 1;
-   else if (dev_addr < dev_addr2)
-   return -1;
-   else
-   return 0;
-}
+int rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
+const struct rte_pci_addr *addr2);
 
 /**
  * Map a particular resource from a file.
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index 9dfdd3f..8584b55 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -53,6 +53,71 @@
 
 #include "rte_pci.h"
 
+/* Macro used by pci addr parsing functions. **/
+#d

[dpdk-dev] [PATCH v3 11/13] pci: avoid over-complicated macro

2017-09-25 Thread Gaetan Rivet

Using a macro helps writing the code to the detriment of the reader in
this case. This is backward. Write once, read many.

The few LOCs gained is not worth the opacity of the implementation.

Signed-off-by: Gaetan Rivet 
---
 lib/librte_pci/rte_pci.c | 65 ++--
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index 8584b55..cbb5359 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -53,36 +53,63 @@
 
 #include "rte_pci.h"
 
-/* Macro used by pci addr parsing functions. **/
-#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \
-do {\
-   unsigned long val;  \
-   char *end;  \
-   errno = 0;  \
-   val = strtoul((in), &end, 16);  \
-   if (errno != 0 || end[0] != (dlm) || val > (lim))   \
-   return -EINVAL; \
-   (fd) = (typeof (fd))val;\
-   (in) = end + 1; \
-} while(0)
+static inline const char *
+get_u8_pciaddr_field(const char *in, void *_u8, char dlm)
+{
+   unsigned long val;
+   uint8_t *u8 = _u8;
+   char *end;
+
+   errno = 0;
+   val = strtoul(in, &end, 16);
+   if (errno != 0 || end[0] != dlm || val > UINT8_MAX) {
+   errno = errno ? errno : EINVAL;
+   return NULL;
+   }
+   *u8 = (uint8_t)val;
+   return end + 1;
+}
 
 int
 eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
 {
+   const char *in = input;
+
dev_addr->domain = 0;
-   GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
-   GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
-   GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
+   in = get_u8_pciaddr_field(in, &dev_addr->bus, ':');
+   if (in == NULL)
+   return -EINVAL;
+   in = get_u8_pciaddr_field(in, &dev_addr->devid, '.');
+   if (in == NULL)
+   return -EINVAL;
+   in = get_u8_pciaddr_field(in, &dev_addr->function, '\0');
+   if (in == NULL)
+   return -EINVAL;
return 0;
 }
 
 int
 eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
 {
-   GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':');
-   GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
-   GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
-   GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
+   const char *in = input;
+   unsigned long val;
+   char *end;
+
+   errno = 0;
+   val = strtoul(in, &end, 16);
+   if (errno != 0 || end[0] != ':' || val > UINT16_MAX)
+   return -EINVAL;
+   dev_addr->domain = (uint16_t)val;
+   in = end + 1;
+   in = get_u8_pciaddr_field(in, &dev_addr->bus, ':');
+   if (in == NULL)
+   return -EINVAL;
+   in = get_u8_pciaddr_field(in, &dev_addr->devid, '.');
+   if (in == NULL)
+   return -EINVAL;
+   in = get_u8_pciaddr_field(in, &dev_addr->function, '\0');
+   if (in == NULL)
+   return -EINVAL;
return 0;
 }
 
-- 
2.1.4

[dpdk-dev] [PATCH v3 12/13] pci: deprecate misnamed functions

2017-09-25 Thread Gaetan Rivet

Rename misnamed functions and describe the change in a deprecation
notice.

Signed-off-by: Gaetan Rivet 
---
 doc/guides/rel_notes/deprecation.rst | 10 ++
 lib/librte_pci/include/rte_pci.h | 63 
 lib/librte_pci/rte_pci.c | 26 +++
 lib/librte_pci/rte_pci_version.map   |  4 +++
 4 files changed, 103 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 3362f33..1828e57 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -120,3 +120,13 @@ Deprecation Notices
   The non-"do-sig" versions of the hash tables will be removed
   (including the ``signature_offset`` parameter)
   and the "do-sig" versions renamed accordingly.
+
+* pci: Several exposed functions are misnamed.
+  The following functions are deprecated starting from v17.11 and are replaced:
+
+  - ``eal_parse_pci_BDF`` replaced by ``pci_parse_BDF``
+  - ``eal_parse_pci_DomBDF`` replaced by ``pci_parse_DomBDF``
+  - ``rte_eal_compare_pci_addr`` replaced by ``pci_addr_cmp``
+  - ``rte_pci_device_name`` replaced by ``pci_device_name``
+
+  The functions are only renamed. Their behavior is not affected.
diff --git a/lib/librte_pci/include/rte_pci.h b/lib/librte_pci/include/rte_pci.h
index 09a609a..224cea0 100644
--- a/lib/librte_pci/include/rte_pci.h
+++ b/lib/librte_pci/include/rte_pci.h
@@ -127,6 +127,7 @@ struct mapped_pci_resource {
 TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
 
 /**
+ * @deprecated
  * Utility function to produce a PCI Bus-Device-Function value
  * given a string representation. Assumes that the BDF is provided without
  * a domain prefix (i.e. domain returned is always 0)
@@ -143,6 +144,22 @@ int eal_parse_pci_BDF(const char *input, struct 
rte_pci_addr *dev_addr);
 
 /**
  * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided without
+ * a domain prefix (i.e. domain returned is always 0)
+ *
+ * @param input
+ * The input string to be parsed. Should have the format XX:XX.X
+ * @param dev_addr
+ * The PCI Bus-Device-Function address to be returned. Domain will always 
be
+ * returned as 0
+ * @return
+ *  0 on success, negative on error.
+ */
+int pci_parse_BDF(const char *input, struct rte_pci_addr *dev_addr);
+
+/**
+ * @deprecated
+ * Utility function to produce a PCI Bus-Device-Function value
  * given a string representation. Assumes that the BDF is provided including
  * a domain prefix.
  *
@@ -156,6 +173,21 @@ int eal_parse_pci_BDF(const char *input, struct 
rte_pci_addr *dev_addr);
 int eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr);
 
 /**
+ * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided including
+ * a domain prefix.
+ *
+ * @param input
+ * The input string to be parsed. Should have the format :XX:XX.X
+ * @param dev_addr
+ * The PCI Bus-Device-Function address to be returned
+ * @return
+ *  0 on success, negative on error.
+ */
+int pci_parse_DomBDF(const char *input, struct rte_pci_addr *dev_addr);
+
+/**
+ * @deprecated
  * Utility function to write a pci device name, this device name can later be
  * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
  * BDF helpers.
@@ -171,6 +203,22 @@ void rte_pci_device_name(const struct rte_pci_addr *addr,
 char *output, size_t size);
 
 /**
+ * Utility function to write a pci device name, this device name can later be
+ * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
+ * BDF helpers.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address
+ * @param output
+ * The output buffer string
+ * @param size
+ * The output buffer size
+ */
+void pci_device_name(const struct rte_pci_addr *addr,
+char *output, size_t size);
+
+/**
+ * @deprecated
  * Utility function to compare two PCI device addresses.
  *
  * @param addr
@@ -186,6 +234,21 @@ int rte_eal_compare_pci_addr(const struct rte_pci_addr 
*addr,
 const struct rte_pci_addr *addr2);
 
 /**
+ * Utility function to compare two PCI device addresses.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address to compare
+ * @param addr2
+ * The PCI Bus-Device-Function address to compare
+ * @return
+ * 0 on equal PCI address.
+ * Positive on addr is greater than addr2.
+ * Negative on addr is less than addr2, or error.
+ */
+int pci_addr_cmp(const struct rte_pci_addr *addr,
+const struct rte_pci_addr *addr2);
+
+/**
  * Map a particular resource from a file.
  *
  * @param requested_addr
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index cbb5359..54ce10d 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -73,6 +73,12 @@ get_u8_pciaddr_field(const char

[dpdk-dev] [PATCH v3 13/13] doc: add notes on EAL PCI API update

2017-09-25 Thread Gaetan Rivet

Add a section related to EAL API changes to 17.11 release notes.

Signed-off-by: Gaetan Rivet 
Acked-by: John McNamara 
---
 doc/guides/rel_notes/release_17_11.rst | 28 
 1 file changed, 28 insertions(+)

diff --git a/doc/guides/rel_notes/release_17_11.rst 
b/doc/guides/rel_notes/release_17_11.rst
index 8bf91bd..d5546ba 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -130,6 +130,34 @@ API Changes
   * Rework start and stop APIs into ``rte_service_runstate_set``
   * Added API to set runstate of service implementation to indicate readyness
 
+* **PCI bus API moved outside of the EAL**
+
+  The PCI bus previously implemented within the EAL has been moved.
+  A first part has been added as an RTE library providing PCI helpers to
+  parse device locations or other such utilities.
+  A second part consisting in the actual bus driver has been moved to its
+  proper subdirectory, without changing its functionalities.
+
+  As such, several PCI-related functions are not proposed by the EAL anymore:
+
+  * rte_pci_detach
+  * rte_pci_dump
+  * rte_pci_ioport_map
+  * rte_pci_ioport_read
+  * rte_pci_ioport_unmap
+  * rte_pci_ioport_write
+  * rte_pci_map_device
+  * rte_pci_probe
+  * rte_pci_probe_one
+  * rte_pci_read_config
+  * rte_pci_register
+  * rte_pci_scan
+  * rte_pci_unmap_device
+  * rte_pci_unregister
+  * rte_pci_write_config
+
+  These functions are made available either as part of ``librte_pci`` or
+  ``librte_bus_pci``.
 
 ABI Changes
 ---
-- 
2.1.4

Re: [dpdk-dev] [PATCH] example: add new service cores sample application

2017-09-25 Thread Eads, Gage

Neat example. Looks good overall, I just have a few questions.

> +#define PROFILE_SERVICE_PER_CORE 8

Any reason not to use 5 here? That would remove a few zeroes from the 
profiles[] initializers.

> +static int
> +apply_profile(int profile_id)
> +{
> +  uint32_t i;
> +  uint32_t s;
> +  int ret;
> +  struct profile *p = &profiles[profile_id];
> +  const uint8_t core_off = 1;
> +
> +  for (i = 0; i < p->num_cores; i++) {
> + ret = rte_service_lcore_add(i + core_off);
> + if (ret && ret != -EALREADY)
> +printf("core %d added ret %d\n", i + 
> core_off, ret);

I'm wondering if this and the other printfs in this function should be 
rte_panics? These seem like fatal errors.

> +
> + ret = rte_service_lcore_start(i + core_off);
> + if (ret && ret != -EALREADY)
> +printf("core %d start ret %d\n", i + 
> core_off, ret);
> +
> + for (s = 0; s < NUM_SERVICES; s++) {
> +if (rte_service_map_lcore_set(s, i + 
> core_off,
> +  
> p->cores[i].mapped_services[s]))
> +   rte_panic("failed to 
> map lcore to 1\n");

What does '1' refer to here? Perhaps it should be: rte_panic("failed to map 
lcore %d to %s\n", i + core_off, services[s].name);

[dpdk-dev] Issue with pktgen-dpdk replaying >1500bytes pcap on MCX4

2017-09-25 Thread Damien Clabaut


Hello DPDK devs,

I am sending this message here as I did not find a bugtracker on the 
website.


If this is the wrong place, I would kindly apologize and ask you to 
redirect me to the proper place,


Thank you.

Description of the issue:

I am using pktgen-dpdk to replay a pcap file containing exactly 1 packet.

The packet in question is generated using this Scapy command:

pkt=(Ether(src="ec:0d:9a:37:d1:ab",dst="7c:fe:90:31:0d:52")/Dot1Q(vlan=2)/IP(dst="192.168.0.254")/UDP(sport=1020,dport=1021)/Raw(RandBin(size=8500)))

The pcap is then replayed in pktgen-dpdk:

./app/app/x86_64-native-linuxapp-gcc/pktgen -l 0-7 -- -m [1-7].0 -s 
0:pcap/8500Bpp.pcap


When I run this on a machine with Mellanox ConnectX-4 NIC (MCX4), the 
switch towards which I generate traffic gets a strange behaviour


#sh int et29/1 | i rate
  5 seconds input rate 39.4 Gbps (98.4% with framing overhead), 0 
packets/sec


A capture of this traffic (I used a monitor session to redirect all to a 
different port, connected to a machine on which I ran tcpdump) gives me 
this:


19:04:50.210792 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 
(oui Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
19:04:50.210795 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 
(oui Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
19:04:50.210796 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 
(oui Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
19:04:50.210797 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 
(oui Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
19:04:50.210799 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 
(oui Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500


The issue cannot be reproduced if any of the following conditions is met:

- Set the size in the Raw(RandBin()) to a value lower than 1500

- Send the packet from a Mellanox ConnectX-3 (MCX3) NIC (both machines 
are indentical in terms of software).


Is this a known problem ?

I remain available for any question you may have.

Regards,

--
Damien Clabaut
R&D vRouter
ovh.qc.ca

Re: [dpdk-dev] [PATCH v4 1/2] eal: allow user to override default pool handle

2017-09-25 Thread santosh


On Monday 25 September 2017 08:28 AM, Olivier MATZ wrote:
> On Mon, Sep 11, 2017 at 08:48:36PM +0530, Santosh Shukla wrote:
>> DPDK has support for both sw and hw mempool and
>> currently user is limited to use ring_mp_mc pool.
>> In case user want to use other pool handle,
>> need to update config RTE_MEMPOOL_OPS_DEFAULT, then
>> build and run with desired pool handle.
>>
>> Introducing eal option to override default pool handle.
>>
>> Now user can override the RTE_MEMPOOL_OPS_DEFAULT by passing
>> pool handle to eal `--mbuf-pool-ops=""`.
>>
>> Signed-off-by: Santosh Shukla 
>> Acked-by: Hemant Agrawal 
>>
>> [...]
>>
>> --- a/lib/librte_eal/common/eal_internal_cfg.h
>> +++ b/lib/librte_eal/common/eal_internal_cfg.h
>> @@ -82,7 +82,7 @@ struct internal_config {
>>  volatile enum rte_intr_mode vfio_intr_mode;
>>  const char *hugefile_prefix;  /**< the base filename of hugetlbfs 
>> files */
>>  const char *hugepage_dir; /**< specific hugetlbfs directory to 
>> use */
>> -
>> +const char *mbuf_pool_name;   /**< mbuf pool name */
>>  unsigned num_hugepage_sizes;  /**< how many sizes on this system */
>>  struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
>>  };
> What do you think about mbuf_pool_ops_name instead?
>
> I'm afraid of the confusion we could have with the name
> of the mempool.
>
Hoping that we're doing final renaming on handle, Its the third time and so for
other mempool series.

queued for v5.

Re: [dpdk-dev] [PATCH v4 2/2] ethdev: get the supported pools for a port

2017-09-25 Thread santosh

Hi Olivier,


On Monday 25 September 2017 08:37 AM, Olivier MATZ wrote:
> Hi,
>
> On Mon, Sep 11, 2017 at 08:48:37PM +0530, Santosh Shukla wrote:
>> Now that dpdk supports more than one mempool drivers and
>> each mempool driver works best for specific PMD, example:
>> - sw ring based mempool for Intel PMD drivers.
>> - dpaa2 HW mempool manager for dpaa2 PMD driver.
>> - fpa HW mempool manager for Octeontx PMD driver.
>>
>> Application would like to know the best mempool handle
>> for any port.
>>
>> Introducing rte_eth_dev_pools_ops_supported() API,
>> which allows PMD driver to advertise
>> his supported pools capability to the application.
>>
>> Supported pools are categorized in below priority:-
>> - Best mempool handle for this port (Highest priority '0')
>> - Port supports this mempool handle (Priority '1')
>>
>> Signed-off-by: Santosh Shukla 
>>
>> [...]
>>
>> +int
>> +rte_eth_dev_pools_ops_supported(uint8_t port_id, const char *pool)
> pools -> pool?

ok.

>> +{
>> +struct rte_eth_dev *dev;
>> +const char *tmp;
>> +
>> +RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>> +
>> +if (pool == NULL)
>> +return -EINVAL;
>> +
>> +dev = &rte_eth_devices[port_id];
>> +
>> +if (*dev->dev_ops->pools_ops_supported == NULL) {
>> +tmp = rte_eal_mbuf_default_mempool_ops();
>> +if (!strcmp(tmp, pool))
>> +return 0;
>> +else
>> +return -ENOTSUP;
> I don't understand why we are comparing with
> rte_eal_mbuf_default_mempool_ops().
>
> It means that the result of the function would be influenced
> by the parameter given by the user.

But that will be only for ops not supported case and in that case,
function _must_ make sure that if inputted param is _default_ops_name
then function should return ops supported correct info (whether 
returning '0' : Best ops or '1': ops does support 
, this part is arguable.. meaning One can say that default_ops ='handle-name' 
is best possible handle Or 
one of handle which platform supports).

> I think that a PMD that does not implement ->pools_ops_supported
> should always return 1 (mempool is supported).

Return 1 says: PMD support this ops.. 

So if ops is not supported and func returns with 1, then which ops application 
will use?
If that ops is default_ops.. then How application will distinguish when to use 
default ops or
param ops?.. as because in both cases func will return with value 1.

The approach in the patch takes care of that condition and func will return 
-ENOTSUP 
if (ops not support || inputted param not matching with default ops) otherwise 
will return
0 or 1.

At application side; 
For error case: In case of -ENOTSUP, its upto application to use _default_ops 
or exit.
For good case: 0 or 1 case, func gaurantee that handle is either best handle 
for pool or pool supports
that handle.. However in your suggestion if ops not supported case returns 1 
then application is not 
sure which ops to use.. default_ops Or input ops given to func.

make sense? 

>> +}
>> +
>> +return (*dev->dev_ops->pools_ops_supported)(dev, pool);
>> +}
>> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
>> index 0adf3274a..d90029b1e 100644
>> --- a/lib/librte_ether/rte_ethdev.h
>> +++ b/lib/librte_ether/rte_ethdev.h
>> @@ -1425,6 +1425,10 @@ typedef int (*eth_get_dcb_info)(struct rte_eth_dev 
>> *dev,
>>   struct rte_eth_dcb_info *dcb_info);
>>  /**< @internal Get dcb information on an Ethernet device */
>>  
>> +typedef int (*eth_pools_ops_supported_t)(struct rte_eth_dev *dev,
>> +const char *pool);
>> +/**< @internal Get the supported pools for a port */
>> +
> The comment should be something like:
> Test if a port supports specific mempool ops.
>
>>  /**
>>   * @internal A structure containing the functions exported by an Ethernet 
>> driver.
>>   */
>> @@ -1544,6 +1548,8 @@ struct eth_dev_ops {
>>  
>>  eth_tm_ops_get_t tm_ops_get;
>>  /**< Get Traffic Management (TM) operations. */
>> +eth_pools_ops_supported_t pools_ops_supported;
>> +/**< Get the supported pools for a port */
> Same
>
>>  };
>>  
>>  /**
>> @@ -4436,6 +4442,24 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id,
>>   uint16_t *nb_rx_desc,
>>   uint16_t *nb_tx_desc);
>>  
>> +
>> +/**
>> + * Get the supported pools for a port
> Same
>
>> + *
>> + * @param port_id
>> + *   Port identifier of the Ethernet device.
>> + * @param [in] pool
>> + *   The supported pool handle for this port.
> The name of the pool operations to test

ok, v5.

>> + *   Maximum length of pool handle name is RTE_MEMPOOL_OPS_NAMESIZE.
> I don't think we should keep this
>
Ok

>
> Thanks,
> Olivier

Re: [dpdk-dev] [PATCH v4 0/2] Dynamically configure mempool handle

2017-09-25 Thread santosh

On Monday 25 September 2017 08:24 AM, Olivier MATZ wrote:
> Hi Santosh,
>
> On Tue, Sep 19, 2017 at 01:58:14PM +0530, santosh wrote:
>> Hi Olivier,
>>
>>
>> On Wednesday 13 September 2017 03:30 PM, santosh wrote:
>>> Hi Olivier,
>>>
>>>
>>> On Monday 11 September 2017 08:48 PM, Santosh Shukla wrote:
 v4:
 - Includes v3 review coment changes.
   Patches rebased on 06791a4bce: ethdev: get the supported pools for a 
 port 
>>> are you fine with v4? Thanks.
>>>
>> Ping? Thanks.
> Really sorry for the late review.
> I'm sending some minor comments, hoping the patches can be
> integrated in RC1.

Mempool nat_align and this series blocking octeontx pmd, must to get in -rc1.

Appreciate if you spend little time in reviewing, as those series design part 
addressed,
only style and renaming is under discussion.. few style comments are comming 
over weeks of time.. it is blocking 
patch progress.. appreciate if you review little quickly.

Thanks.

> Olivier

Re: [dpdk-dev] [PATCH v6 7/8] mempool: introduce block size align flag

2017-09-25 Thread santosh


On Monday 25 September 2017 12:32 PM, Olivier MATZ wrote:
> On Thu, Sep 07, 2017 at 09:00:41PM +0530, Santosh Shukla wrote:
>> Some mempool hw like octeontx/fpa block, demands block size
>> (/total_elem_sz) aligned object start address.
>>
>> Introducing an MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS flag.
>> If this flag is set:
>> - Align object start address(vaddr) to a multiple of total_elt_sz.
>> - Allocate one additional object. Additional object is needed to make
>>   sure that requested 'n' object gets correctly populated.
>>
>> Example:
>> - Let's say that we get 'x' size of memory chunk from memzone.
>> - And application has requested 'n' object from mempool.
>> - Ideally, we start using objects at start address 0 to...(x-block_sz)
>>   for n obj.
>> - Not necessarily first object address i.e. 0 is aligned to block_sz.
>> - So we derive 'offset' value for block_sz alignment purpose i.e..'off'.
>> - That 'off' makes sure that start address of object is blk_sz aligned.
>> - Calculating 'off' may end up sacrificing first block_sz area of
>>   memzone area x. So total number of the object which can fit in the
>>   pool area is n-1, Which is incorrect behavior.
>>
>> Therefore we request one additional object (/block_sz area) from memzone
>> when MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS flag is set.
>>
>> Signed-off-by: Santosh Shukla 
>> Signed-off-by: Jerin Jacob 
>>
>> [...]
>>
>> --- a/lib/librte_mempool/rte_mempool.c
>> +++ b/lib/librte_mempool/rte_mempool.c
>> @@ -239,10 +239,15 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t 
>> flags,
>>   */
>>  size_t
>>  rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t 
>> pg_shift,
>> -  __rte_unused unsigned int flags)
>> +  unsigned int flags)
>>  {
>>  size_t obj_per_page, pg_num, pg_sz;
>>  
>> +if (flags & (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS |
>> +MEMPOOL_F_CAPA_PHYS_CONTIG))
>> +/* alignment need one additional object */
>> +elt_num += 1;
>> +
> In previous version, we agreed to test both _BLK_ALIGNED_OBJECTS
> and _PHYS_CONTIG in _xmem_size()/_usage(). Here, the test will
> also be true if only MEMPOOL_F_CAPA_PHYS_CONTIG is set.
>
> If we want to test both, the test should be:
>
> mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
> if ((flags & mask) == mask)

queued for v7. agree strict check. Thanks.

>> @@ -265,13 +270,18 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t 
>> total_elt_sz, uint32_t pg_shift,
>>  ssize_t
>>  rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
>>  size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num,
>> -uint32_t pg_shift, __rte_unused unsigned int flags)
>> +uint32_t pg_shift, unsigned int flags)
>>  {
>>  uint32_t elt_cnt = 0;
>>  phys_addr_t start, end;
>>  uint32_t paddr_idx;
>>  size_t pg_sz = (size_t)1 << pg_shift;
>>  
>> +if (flags & (MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS |
>> +MEMPOOL_F_CAPA_PHYS_CONTIG))
>> +/* alignment need one additional object */
>> +elt_num += 1;
>> +
> Same here
>

Re: [dpdk-dev] [PATCH v6 8/8] mempool: notify memory area to pool

2017-09-25 Thread santosh


On Monday 25 September 2017 12:41 PM, Olivier MATZ wrote:
> On Thu, Sep 07, 2017 at 09:00:42PM +0530, Santosh Shukla wrote:
>> HW pool manager e.g. Octeontx SoC demands s/w to program start and end
>> address of pool. Currently, there is no such api in external mempool.
>> Introducing rte_mempool_ops_register_memory_area api which will let HW(pool
>> manager) to know when common layer selects hugepage:
>> For each hugepage - Notify its start/end address to HW pool manager.
>>
>> Signed-off-by: Santosh Shukla 
>> Signed-off-by: Jerin Jacob 
>>
>> [...]
>>
>> +/**
>> + * @internal wrapper for mempool_ops register_memory_area callback.
>> + * API to notify the mempool handler if a new memory area is added to pool.
>> + *
> if -> when

ok.

>> + * Mempool handler usually get notified once for the case of mempool get 
>> full
>> + * range of memory area. However, if several memory areas exist then mempool
>> + * handler gets notified each time.
> Not sure I understand this last paragraph.

Refer v5 history [1] for same.

[1] http://dpdk.org/dev/patchwork/patch/28419/

there will be a case where mempool handler may have more than one memory 
example, no-hugepage case.
In that case _register_memory_area() ops will be called for more than once.

In v5, you suggested to mention this case explicitly in api description.

If your not clear with write up then could you propose one and also are you fine
with [8/8] patch beside above note? planning to send v7 by tomorrow, appreciate 
if you answer question.

>> + *
>> + * @param mp
>> + *   Pointer to the memory pool.
>> + * @param vaddr
>> + *   Pointer to the buffer virtual address
>> + * @param paddr
>> + *   Pointer to the buffer physical address
>> + * @param len
>> + *   Pool size
> Minor: missing dot at the end

ok.

>> + * @return
>> + *  - 0: Success;
>> + *  - ENOTSUP: doesn't support register_memory_area ops (valid error case).
> Missing minus before ENOTSUP.
> The dot should be a semicolon instead.
>
ok.

Thanks.

Re: [dpdk-dev] Issue with pktgen-dpdk replaying >1500bytes pcap on MCX4

2017-09-25 Thread Yongseok Koh

Hi, Damien

Can you please let me know the versions of your SW - pktgen-dpdk, DPDK and 
MLNX_OFED?
Also Firmware version if available.

Thanks,
Yongseok

> On Sep 25, 2017, at 10:19 AM, Damien Clabaut  
> wrote:
> 
> Hello DPDK devs,
> 
> I am sending this message here as I did not find a bugtracker on the website.
> 
> If this is the wrong place, I would kindly apologize and ask you to redirect 
> me to the proper place,
> 
> Thank you.
> 
> Description of the issue:
> 
> I am using pktgen-dpdk to replay a pcap file containing exactly 1 packet.
> 
> The packet in question is generated using this Scapy command:
> 
> pkt=(Ether(src="ec:0d:9a:37:d1:ab",dst="7c:fe:90:31:0d:52")/Dot1Q(vlan=2)/IP(dst="192.168.0.254")/UDP(sport=1020,dport=1021)/Raw(RandBin(size=8500)))
> 
> The pcap is then replayed in pktgen-dpdk:
> 
> ./app/app/x86_64-native-linuxapp-gcc/pktgen -l 0-7 -- -m [1-7].0 -s 
> 0:pcap/8500Bpp.pcap
> 
> When I run this on a machine with Mellanox ConnectX-4 NIC (MCX4), the switch 
> towards which I generate traffic gets a strange behaviour
> 
> #sh int et29/1 | i rate
>   5 seconds input rate 39.4 Gbps (98.4% with framing overhead), 0 packets/sec
> 
> A capture of this traffic (I used a monitor session to redirect all to a 
> different port, connected to a machine on which I ran tcpdump) gives me this:
> 
> 19:04:50.210792 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210795 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210796 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210797 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210799 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 
> The issue cannot be reproduced if any of the following conditions is met:
> 
> - Set the size in the Raw(RandBin()) to a value lower than 1500
> 
> - Send the packet from a Mellanox ConnectX-3 (MCX3) NIC (both machines are 
> indentical in terms of software).
> 
> Is this a known problem ?
> 
> I remain available for any question you may have.
> 
> Regards,
> 
> -- 
> Damien Clabaut
> R&D vRouter
> ovh.qc.ca
>

Re: [dpdk-dev] [RFC] kni: remove single mempool, single mem_chunk restriction

2017-09-25 Thread Ajeet Gill (ajgill)

Hello Ferruh,
 I see there might be issue with multi-chunk mempools in the KNI kernel driver.
It looks like the logic of calculating the addresses is as following the below 
method:

Va-Pa=Vb-Pb.

where V’s are the Virtual addresses and P’s are the physical addresses.
But this formula may not be true for multi-chunk mempools.

I have seen kernel crashes when trying to access the multi-segmented buffers on 
KNI interface.
I have a working patch for the issue.
I will submit it for review.

Thanks
Ajeet

Re: [dpdk-dev] Issue with pktgen-dpdk replaying >1500bytes pcap on MCX4

2017-09-25 Thread Wiles, Keith


> On Sep 25, 2017, at 6:19 PM, Damien Clabaut  
> wrote:
> 
> Hello DPDK devs,
> 
> I am sending this message here as I did not find a bugtracker on the website.
> 
> If this is the wrong place, I would kindly apologize and ask you to redirect 
> me to the proper place,
> 
> Thank you.
> 
> Description of the issue:
> 
> I am using pktgen-dpdk to replay a pcap file containing exactly 1 packet.
> 
> The packet in question is generated using this Scapy command:
> 
> pkt=(Ether(src="ec:0d:9a:37:d1:ab",dst="7c:fe:90:31:0d:52")/Dot1Q(vlan=2)/IP(dst="192.168.0.254")/UDP(sport=1020,dport=1021)/Raw(RandBin(size=8500)))
> 
> The pcap is then replayed in pktgen-dpdk:
> 
> ./app/app/x86_64-native-linuxapp-gcc/pktgen -l 0-7 -- -m [1-7].0 -s 
> 0:pcap/8500Bpp.pcap

This could be the issue as I can not setup your system (no cards). The pktgen 
command line is using 1-7 cores for TX/RX of packets. This means to pktgen to 
send the pcap from each core and this means the packet will be sent from each 
core. If you set the number of TX/RX cores to 1.0 then you should only see one. 
I assume you are using 1-7 cores to increase the bit rate closer to the 
performance of the card.

> 
> When I run this on a machine with Mellanox ConnectX-4 NIC (MCX4), the switch 
> towards which I generate traffic gets a strange behaviour
> 
> #sh int et29/1 | i rate
>   5 seconds input rate 39.4 Gbps (98.4% with framing overhead), 0 packets/sec
> 
> A capture of this traffic (I used a monitor session to redirect all to a 
> different port, connected to a machine on which I ran tcpdump) gives me this:
> 
> 19:04:50.210792 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210795 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210796 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210797 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 19:04:50.210799 00:00:00:00:00:00 (oui Ethernet) > 00:00:00:00:00:00 (oui 
> Ethernet) Null Unnumbered, ef, Flags [Poll], length 1500
> 
> The issue cannot be reproduced if any of the following conditions is met:
> 
> - Set the size in the Raw(RandBin()) to a value lower than 1500
> 
> - Send the packet from a Mellanox ConnectX-3 (MCX3) NIC (both machines are 
> indentical in terms of software).
> 
> Is this a known problem ?
> 
> I remain available for any question you may have.
> 
> Regards,
> 
> -- 
> Damien Clabaut
> R&D vRouter
> ovh.qc.ca
> 

Regards,
Keith

Re: [dpdk-dev] [PATCH v5 1/2] net/i40e: get information about protocols defined in ddp profile

2017-09-25 Thread Xing, Beilei



> -Original Message-
> From: Rybalchenko, Kirill
> Sent: Wednesday, September 20, 2017 1:54 AM
> To: dev@dpdk.org
> Cc: Rybalchenko, Kirill ; Chilikin, Andrey
> ; Xing, Beilei ; Wu,
> Jingjing 
> Subject: [PATCH v5 1/2] net/i40e: get information about protocols defined in
> ddp profile
> 
> This patch adds new package info types to get list of protocols, pctypes and
> ptypes defined in a profile
> 
> ---
> v3
> info_size parameter always represents size of the info buffer in bytes
> 
> Signed-off-by: Kirill Rybalchenko 
> ---
>  drivers/net/i40e/rte_pmd_i40e.c | 171
> 
>  drivers/net/i40e/rte_pmd_i40e.h |  25 ++
>  2 files changed, 196 insertions(+)
> 
> diff --git a/drivers/net/i40e/rte_pmd_i40e.c
> b/drivers/net/i40e/rte_pmd_i40e.c index c08e07a..80af18b 100644
> --- a/drivers/net/i40e/rte_pmd_i40e.c
> +++ b/drivers/net/i40e/rte_pmd_i40e.c
> @@ -1706,6 +1706,26 @@ rte_pmd_i40e_process_ddp_package(uint8_t
> port, uint8_t *buff,
>   return status;
>  }
> 
> +/* Get number of tvl records in the section */ static unsigned int
> +i40e_get_tlv_section_size(struct i40e_profile_section_header *sec) {
> + unsigned int i, nb_rec, nb_tlv = 0;
> + struct i40e_profile_tlv_section_record *tlv;
> +
> + if (!sec)
> + return nb_tlv;
> +
> + /* get number of records in the section */
> + nb_rec = sec->section.size / sizeof(struct
> i40e_profile_tlv_section_record);
> + for (i = 0; i < nb_rec; ) {
> + tlv = (struct i40e_profile_tlv_section_record *)&sec[1 + i];
> + i += tlv->len;
> + nb_tlv++;
> + }
> + return nb_tlv;

In my opinion, nb_rec should be the same with nb_tlv, why need the for loop to 
get and return nb_tlv? Please correct me if I'm wrong.



> +
> + /* get list of protocols */
> + if (type == RTE_PMD_I40E_PKG_INFO_PROTOCOL_LIST) {
> + uint32_t i, j, nb_rec;
> + struct rte_pmd_i40e_proto_info *pinfo;
> + struct i40e_profile_section_header *proto;
> + struct i40e_profile_tlv_section_record *tlv;
> +
> + proto = i40e_find_section_in_profile(SECTION_TYPE_PROTO,
> +  (struct
> i40e_profile_segment *)
> +  i40e_seg_hdr);
> + nb_rec = i40e_get_tlv_section_size(proto);
> + if (info_size < nb_rec * sizeof(struct
> rte_pmd_i40e_proto_info)) {
> + PMD_DRV_LOG(ERR, "Invalid information buffer
> size");
> + return -EINVAL;
> + }
> + pinfo = (struct rte_pmd_i40e_proto_info *)info_buff;
> + for (i = 0; i < info_size; i++) {

You changed info_size represents the size of the info buffer, then should use 
'i < nb_rec' instead of ' i < info_size ' in for loop, otherwise there will be 
unexpected issue.

> + pinfo[i].proto_id = RTE_PMD_I40E_PROTO_UNUSED;
> + memset(pinfo[i].name, 0,
> RTE_PMD_I40E_DDP_NAME_SIZE);
> + }
> + if (nb_rec == 0)
> + return I40E_SUCCESS;

Why not adding the condition just after 'nb_rec = 
i40e_get_tlv_section_size(proto);' ?

> + /* get number of records in the section */
> + nb_rec = proto->section.size /
> + sizeof(struct
> i40e_profile_tlv_section_record);

Why do we need to get the nb_rec again? I think the value should be the same as 
the last value.

> + tlv = (struct i40e_profile_tlv_section_record *)&proto[1];
> + for (i = j = 0; i < nb_rec; j++) {
> + pinfo[j].proto_id = tlv->data[0];
> + strncpy(pinfo[j].name, (const char *)&tlv->data[1],
> + I40E_DDP_NAME_SIZE);
> + i += tlv->len;
> + tlv = &tlv[tlv->len];
> + }
> + return I40E_SUCCESS;
> + }
> +



Same comments for get RTE_PMD_I40E_PKG_INFO_PCTYPE_LIST and 
RTE_PMD_I40E_PKG_INFO_PTYPE_LIST.

Re: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight mode TCP/IPv4 GRO

2017-09-25 Thread Hu, Jiayu

Hi Lei,

> -Original Message-
> From: Hu, Jiayu
> Sent: Monday, September 25, 2017 6:20 PM
> To: Yao, Lei A ; dev@dpdk.org
> Cc: Yigit, Ferruh ; Ananyev, Konstantin
> ; Tan, Jianfeng ;
> Wu, Jingjing 
> Subject: RE: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight
> mode TCP/IPv4 GRO
> 
> Hi Lei,
> 
> > -Original Message-
> > From: Yao, Lei A
> > Sent: Wednesday, September 20, 2017 3:00 PM
> > To: Hu, Jiayu ; dev@dpdk.org
> > Cc: Yigit, Ferruh ; Ananyev, Konstantin
> > ; Tan, Jianfeng ;
> > Wu, Jingjing ; Hu, Jiayu 
> > Subject: RE: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight
> > mode TCP/IPv4 GRO
> >
> >
> >
> > > -Original Message-
> > > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Jiayu Hu
> > > Sent: Sunday, September 3, 2017 2:30 PM
> > > To: dev@dpdk.org
> > > Cc: Yigit, Ferruh ; Ananyev, Konstantin
> > > ; Tan, Jianfeng
> ;
> > > Wu, Jingjing ; Hu, Jiayu 
> > > Subject: [dpdk-dev] [PATCH v3] app/testpmd: enable the heavyweight
> > > mode TCP/IPv4 GRO
> > >
> > > The GRO library provides two modes to reassemble packets. Currently,
> the
> > > csum forwarding engine has supported to use the lightweight mode to
> > > reassemble TCP/IPv4 packets. This patch introduces the heavyweight
> mode
> > > for TCP/IPv4 GRO in the csum forwarding engine.
> > >
> > > With the command "set port  gro on|off", users can enable
> > > TCP/IPv4 GRO for a given port. With the command "set gro flush
> ",
> > > users can determine when the GROed TCP/IPv4 packets are flushed from
> > > reassembly tables. With the command "show port  gro", users
> > can
> > > display GRO configuration.
> > >
> > > Signed-off-by: Jiayu Hu 
> > Tested-by : Lei Yao
> >
> > This patch has been tested on my bench, iperf test result  is as following:
> > No-GRO: 8 Gbps
> > Kernel GRO: 14.3 Gbps
> > GRO flush 0 : 12.7 Gbps
> > GRO flush 1: 16.8 Gbps
> > But when I use 40G NIC and set GRO flush cycle as 2, sometimes
> > the iperf traffic will stall for several seconds. Still need investigate.

The default descriptor number per queue is 128. Given the flush
cycle is 3, the demanded descriptor number is up to 3x32, even if we
perform GRO. This is because each mbuf of a merged packet demands
one descriptor. Therefore, we'd better set large descriptor number
per-queue when launch testpmd, instead of using the default value 128.

After setting rxd and txd to 256, the iperf performance is stable at
16Gbps with different flush cycle settings.

Thanks,
Jiayu

> 
> This issue happens in high link speed environment, like 40Gbps. I am
> looking into this issue now.
> 
> Current experiment data shows that the large flush cycle value causes
> lots of duplicated TCP ACKs with same ACK numbers, which may be one
> of the reasons for the poor TCP/IP stack performance.
> 
> Thanks,
> Jiayu
> >
> > > ---
> > > changes in v3:
> > > - remove "heavyweight mode" and "lightweight mode" from GRO
> > > commands
> > > - combine two patches into one
> > > - use consistent help string for GRO commands
> > > - remove the unnecessary command "gro set (max_flow_num)
> > >   (max_item_num_per_flow) (port_id)"
> > > changes in v2:
> > > - use "set" and "show" as the root level command
> > > - add a new command to show GRO configuration
> > > - fix l2_len/l3_len/l4_len unset etc. bugs
> > >
> > >  app/test-pmd/cmdline.c  | 206 
> > > 
> > >  app/test-pmd/config.c   |  67 +++--
> > >  app/test-pmd/csumonly.c |  31 -
> > >  app/test-pmd/testpmd.c  |  18 ++-
> > >  app/test-pmd/testpmd.h  |  16 ++-
> > >  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  45 --
> > >  6 files changed, 263 insertions(+), 120 deletions(-)
> > >
> > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> > > index cd8c358..d628250 100644
> > > --- a/app/test-pmd/cmdline.c
> > > +++ b/app/test-pmd/cmdline.c
> > > @@ -423,13 +423,16 @@ static void cmd_help_long_parsed(void
> > > *parsed_result,
> > >   "tso show (portid)"
> > >   "Display the status of TCP Segmentation
> > > Offload.\n\n"
> > >
> > > - "gro (on|off) (port_id)"
> > > + "set port (port_id) gro on|off\n"
> > >   "Enable or disable Generic Receive Offload in"
> > >   " csum forwarding engine.\n\n"
> > >
> > > - "gro set (max_flow_num)
> > > (max_item_num_per_flow) (port_id)\n"
> > > - "Set max flow number and max packet number
> > > per-flow"
> > > - " for GRO.\n\n"
> > > + "show port (port_id) gro\n"
> > > + "Display GRO configuration.\n\n"
> > > +
> > > + "set gro flush (cycles)\n"
> > > + "Set the cycle to flush GROed packets from"
> > > + " reassembly tables.\n\n"
> > >
> > >   "set fwd (%s)

Re: [dpdk-dev] [PATCH v9 0/9] Infrastructure to detect iova mapping on the bus

2017-09-25 Thread santosh

Hi Thomas,

On Wednesday 20 September 2017 12:23 PM, Santosh Shukla wrote:
> v9:
> - Added Tested-By: to series.
> - Includes minor changes related to linuxapp api stub in [02/09]
>   (Suggested by Anatoly)
> - Series rebased on tip commit : aee62e90

imo, series is ready to merge, note that octeontx pmd needs this + other 
mempool series,
we need them in -rc1 release. Can you pl. plan to merge this series in -rc1?

Thanks.

Re: [dpdk-dev] [PATCH v3 2/2] app/testpmd: add API for configuration of queue region

2017-09-25 Thread Zhao1, Wei


Hi，Ferruh

> -Original Message-
> From: Yigit, Ferruh
> Sent: Monday, September 25, 2017 5:43 PM
> To: Zhao1, Wei ; dev@dpdk.org; Wu, Jingjing
> 
> Subject: Re: [dpdk-dev] [PATCH v3 2/2] app/testpmd: add API for
> configuration of queue region
> 
> On 9/25/2017 10:25 AM, Zhao1, Wei wrote:
> > Hi, Ferruh
> >
> >> -Original Message-
> >> From: Yigit, Ferruh
> >> Sent: Wednesday, September 20, 2017 6:46 PM
> >> To: Zhao1, Wei ; dev@dpdk.org; Wu, Jingjing
> >> 
> >> Subject: Re: [dpdk-dev] [PATCH v3 2/2] app/testpmd: add API for
> >> configuration of queue region
> >>
> >> On 9/15/2017 4:13 AM, Wei Zhao wrote:
> >>> This patch add a API configuration of queue region in rss.
> >>> It can parse the parameters of region index, queue number, queue
> >>> start index, user priority, traffic classes and so on.
> >>> According to commands from command line, it will call i40e private
> >>> API and start the process of set or flush queue region configure. As
> >>> this feature is specific for i40e, so private API will be used.
> >>>
> >>> Signed-off-by: Wei Zhao 
> >>> ---
> >>>  app/test-pmd/cmdline.c | 328
> >>> +
> >>
> >> Testpmd documentation also needs to be updated.
> >
> > Do you mean the following doc or others?
> > dpdk\doc\guides\testpmd_app_ug.rst
> 
> Yes this one, thanks.
> 
> >
> >
> >>
> >>>  1 file changed, 328 insertions(+)
> >>>
> >>> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> >>> 0144191..060fcb1 100644
> >>> --- a/app/test-pmd/cmdline.c
> >>> +++ b/app/test-pmd/cmdline.c
> >>> @@ -637,6 +637,21 @@ static void cmd_help_long_parsed(void
> >> *parsed_result,
> >>>   "ptype mapping update (port_id) (hw_ptype)
> >> (sw_ptype)\n"
> >>>   "Update a ptype mapping item on a port\n\n"
> >>>
> >>> + "queue-region set port (port_id) region_id (value) "
> >>> + "queue_start_index (value) queue_num (value)\n"
> >>> + "Set a queue region on a port\n\n"
> >>> +
> >>> + "queue-region set (pf|vf) port (port_id) region_id
> >> (value) "
> >>> + "flowtype (value)\n"
> >>> + "Set a flowtype region index on a port\n\n"
> >>> +
> >>> + "queue-region set port (port_id) UP (value)
> >> region_id (value)\n"
> >>> + "Set the mapping of User Priority to "
> >>> + "queue region on a port\n\n"
> >>> +
> >>> + "queue-region flush (on|off) port (port_id)\n"
> >>> + "flush all queue region related configuration\n\n"
> >>
> >> I keep doing same comment but I will do it again...
> >>
> >> Each patch adding a new feature looking from its own context and
> >> adding a new root level command and this is making overall testpmd
> confusing.
> >>
> >> Since this is to set an option of the port, what do you think making
> >> this command part of existing commands, like:
> >> "port config #P queue-region "
> >> OR
> >> "set port #P queue-region ..." ?
> >
> > What you said is very meaningful, but other feature liake ptype mapping
> use the same mode  and so on.
> > maybe we should do a whole work to make CLI command style consistent.
> 
> Yes ptype does it, is seems it is one of the missed ones. Although we can do a
> whole work for CLI commands, meanwhile I think new ones can be added
> properly.
> 
> This may be good opportunity to remember broken window theory [1] :)


But  this type of CLI for queue region has been discussion when this feature 
skype meeting
We have do a ppt, which  review by DPDK-ENG-TECH-COMMITTEE, they have support 
and approve this type of command for this feature.
I think we should respect their review wok and decision  of other committee 
member.
AND also, The minority is subordinate to the majority, do you think so ?
I do not want do hold a second meeting later to  persuade them accept the new 
type of CLI command.
They may not also not change their idea too.


> 
> [1]
> https://blog.codinghorror.com/the-broken-window-theory/
> 
> <...>

[dpdk-dev] [PATCH v4] app/testpmd: enable the heavyweight mode TCP/IPv4 GRO

2017-09-25 Thread Jiayu Hu

The GRO library provides two modes to reassemble packets. Currently, the
csum forwarding engine has supported to use the lightweight mode to
reassemble TCP/IPv4 packets. This patch introduces the heavyweight mode
for TCP/IPv4 GRO in the csum forwarding engine.

With the command "set port  gro on|off", users can enable
TCP/IPv4 GRO for a given port. With the command "set gro flush ",
users can determine when the GROed TCP/IPv4 packets are flushed from
reassembly tables. With the command "show port  gro", users can
display GRO configuration.

The GRO library doesn't re-calculate checksums for merged packets. If
users want the merged packets to have correct IP and TCP checksums,
please select HW IP checksum calculation and HW TCP checksum calculation
for the port which the merged packets are transmitted to.

Signed-off-by: Jiayu Hu 
Reviewed-by: Ferruh Yigit 
---
changes in v4:
- fix unchecking the min value of 'cycle' bug in setup_gro_flush_cycles
- update the context of the testpmd document and commit logs
changes in v3:
- remove "heavyweight mode" and "lightweight mode" from GRO commands
- combine two patches into one
- use consistent help string for GRO commands 
- remove the unnecessary command "gro set (max_flow_num)
(max_item_num_per_flow) (port_id)"
changes in v2:
- use "set" and "show" as the root level command
- add a new command to show GRO configuration
- fix l2_len/l3_len/l4_len unset etc. bugs

 app/test-pmd/cmdline.c  | 206 
 app/test-pmd/config.c   |  68 +++--
 app/test-pmd/csumonly.c |  31 -
 app/test-pmd/testpmd.c  |  19 ++-
 app/test-pmd/testpmd.h  |  16 ++-
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  50 +--
 6 files changed, 270 insertions(+), 120 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index ccdf239..e44c02e 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -423,13 +423,16 @@ static void cmd_help_long_parsed(void *parsed_result,
"tso show (portid)"
"Display the status of TCP Segmentation 
Offload.\n\n"
 
-   "gro (on|off) (port_id)"
+   "set port (port_id) gro on|off\n"
"Enable or disable Generic Receive Offload in"
" csum forwarding engine.\n\n"
 
-   "gro set (max_flow_num) (max_item_num_per_flow) 
(port_id)\n"
-   "Set max flow number and max packet number per-flow"
-   " for GRO.\n\n"
+   "show port (port_id) gro\n"
+   "Display GRO configuration.\n\n"
+
+   "set gro flush (cycles)\n"
+   "Set the cycle to flush GROed packets from"
+   " reassembly tables.\n\n"
 
"set fwd (%s)\n"
"Set packet forwarding mode.\n\n"
@@ -3854,115 +3857,145 @@ cmdline_parse_inst_t cmd_tunnel_tso_show = {
 };
 
 /* *** SET GRO FOR A PORT *** */
-struct cmd_gro_result {
+struct cmd_gro_enable_result {
+   cmdline_fixed_string_t cmd_set;
+   cmdline_fixed_string_t cmd_port;
cmdline_fixed_string_t cmd_keyword;
-   cmdline_fixed_string_t mode;
-   uint8_t port_id;
+   cmdline_fixed_string_t cmd_onoff;
+   uint8_t cmd_pid;
 };
 
 static void
-cmd_enable_gro_parsed(void *parsed_result,
+cmd_gro_enable_parsed(void *parsed_result,
__attribute__((unused)) struct cmdline *cl,
__attribute__((unused)) void *data)
 {
-   struct cmd_gro_result *res;
+   struct cmd_gro_enable_result *res;
 
res = parsed_result;
-   setup_gro(res->mode, res->port_id);
-}
-
-cmdline_parse_token_string_t cmd_gro_keyword =
-   TOKEN_STRING_INITIALIZER(struct cmd_gro_result,
+   if (!strcmp(res->cmd_keyword, "gro"))
+   setup_gro(res->cmd_onoff, res->cmd_pid);
+}
+
+cmdline_parse_token_string_t cmd_gro_enable_set =
+   TOKEN_STRING_INITIALIZER(struct cmd_gro_enable_result,
+   cmd_set, "set");
+cmdline_parse_token_string_t cmd_gro_enable_port =
+   TOKEN_STRING_INITIALIZER(struct cmd_gro_enable_result,
+   cmd_keyword, "port");
+cmdline_parse_token_num_t cmd_gro_enable_pid =
+   TOKEN_NUM_INITIALIZER(struct cmd_gro_enable_result,
+   cmd_pid, UINT8);
+cmdline_parse_token_string_t cmd_gro_enable_keyword =
+   TOKEN_STRING_INITIALIZER(struct cmd_gro_enable_result,
cmd_keyword, "gro");
-cmdline_parse_token_string_t cmd_gro_mode =
-   TOKEN_STRING_INITIALIZER(struct cmd_gro_result,
-   mode, "on#off");
-cmdline_parse_token_num_t cmd_gro_pid =
-   TOKEN_NUM_INITIALIZER(struct cmd_gro_result,
-   port_id, UINT8);
+cmdline_parse_token_string

94 matches

Mail list logo