Re: [dpdk-dev] [PATCH v4] net/kni: add KNI PMD

Ferruh Yigit Thu, 15 Dec 2016 07:56:55 -0800

On 12/14/2016 7:25 PM, Yong Wang wrote:
>> -----Original Message-----
>> From: Ferruh Yigit [mailto:ferruh.yi...@intel.com]
>> Sent: Wednesday, December 14, 2016 8:00 AM
>> To: Yong Wang <yongw...@vmware.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4] net/kni: add KNI PMD
>>
>> On 12/12/2016 9:59 PM, Yong Wang wrote:
>>>> -----Original Message-----
>>>> From: Ferruh Yigit [mailto:ferruh.yi...@intel.com]
>>>> Sent: Wednesday, November 30, 2016 10:12 AM
>>>> To: dev@dpdk.org
>>>> Cc: Ferruh Yigit <ferruh.yi...@intel.com>; Yong Wang
>>>> <yongw...@vmware.com>
>>>> Subject: [PATCH v4] net/kni: add KNI PMD
>>>>
>>>> Add KNI PMD which wraps librte_kni for ease of use.
>>>>
>>>> KNI PMD can be used as any regular PMD to send / receive packets to the
>>>> Linux networking stack.
>>>>
>>>> Signed-off-by: Ferruh Yigit <ferruh.yi...@intel.com>
>>>> ---
>>>>
>>>> v4:
>>>> * allow only single queue
>>>> * use driver.name as name
>>>>
>>>> v3:
>>>> * rebase on top of latest master
>>>>
>>>> v2:
>>>> * updated driver name eth_kni -> net_kni
>>>> ---
>>>>  config/common_base                      |   1 +
>>>>  config/common_linuxapp                  |   1 +
>>>>  drivers/net/Makefile                    |   1 +
>>>>  drivers/net/kni/Makefile                |  63 +++++
>>>>  drivers/net/kni/rte_eth_kni.c           | 462
>>>> ++++++++++++++++++++++++++++++++
>>>>  drivers/net/kni/rte_pmd_kni_version.map |   4 +
>>>>  mk/rte.app.mk                           |  10 +-
>>>>  7 files changed, 537 insertions(+), 5 deletions(-)
>>>>  create mode 100644 drivers/net/kni/Makefile
>>>>  create mode 100644 drivers/net/kni/rte_eth_kni.c
>>>>  create mode 100644 drivers/net/kni/rte_pmd_kni_version.map
>>>>
>>>> diff --git a/config/common_base b/config/common_base
>>>> index 4bff83a..3385879 100644
>>>> --- a/config/common_base
>>>> +++ b/config/common_base
>>>> @@ -543,6 +543,7 @@ CONFIG_RTE_PIPELINE_STATS_COLLECT=n
>>>>  # Compile librte_kni
>>>>  #
>>>>  CONFIG_RTE_LIBRTE_KNI=n
>>>> +CONFIG_RTE_LIBRTE_PMD_KNI=n
>>>>  CONFIG_RTE_KNI_KMOD=n
>>>>  CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
>>>>  CONFIG_RTE_KNI_VHOST=n
>>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>>>> index 2483dfa..2ecd510 100644
>>>> --- a/config/common_linuxapp
>>>> +++ b/config/common_linuxapp
>>>> @@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
>>>>  CONFIG_RTE_EAL_VFIO=y
>>>>  CONFIG_RTE_KNI_KMOD=y
>>>>  CONFIG_RTE_LIBRTE_KNI=y
>>>> +CONFIG_RTE_LIBRTE_PMD_KNI=y
>>>>  CONFIG_RTE_LIBRTE_VHOST=y
>>>>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
>>>>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>>>> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
>>>> index bc93230..c4771cd 100644
>>>> --- a/drivers/net/Makefile
>>>> +++ b/drivers/net/Makefile
>>>> @@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
>>>> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
>>>> diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
>>>> new file mode 100644
>>>> index 0000000..0b7cf91
>>>> --- /dev/null
>>>> +++ b/drivers/net/kni/Makefile
>>>> @@ -0,0 +1,63 @@
>>>> +#   BSD LICENSE
>>>> +#
>>>> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
>>>> +#
>>>> +#   Redistribution and use in source and binary forms, with or without
>>>> +#   modification, are permitted provided that the following conditions
>>>> +#   are met:
>>>> +#
>>>> +#     * Redistributions of source code must retain the above copyright
>>>> +#       notice, this list of conditions and the following disclaimer.
>>>> +#     * Redistributions in binary form must reproduce the above copyright
>>>> +#       notice, this list of conditions and the following disclaimer in
>>>> +#       the documentation and/or other materials provided with the
>>>> +#       distribution.
>>>> +#     * Neither the name of Intel Corporation nor the names of its
>>>> +#       contributors may be used to endorse or promote products derived
>>>> +#       from this software without specific prior written permission.
>>>> +#
>>>> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>>>> CONTRIBUTORS
>>>> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
>> BUT
>>>> NOT
>>>> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>>>> FITNESS FOR
>>>> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>>>> COPYRIGHT
>>>> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>>>> INCIDENTAL,
>>>> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
>> BUT
>>>> NOT
>>>> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>> LOSS
>>>> OF USE,
>>>> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>>>> AND ON ANY
>>>> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>>>> TORT
>>>> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>> OF
>>>> THE USE
>>>> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>>>> DAMAGE.
>>>> +
>>>> +include $(RTE_SDK)/mk/rte.vars.mk
>>>> +
>>>> +#
>>>> +# library name
>>>> +#
>>>> +LIB = librte_pmd_kni.a
>>>> +
>>>> +CFLAGS += -O3
>>>> +CFLAGS += $(WERROR_FLAGS)
>>>> +LDLIBS += -lpthread
>>>> +
>>>> +EXPORT_MAP := rte_pmd_kni_version.map
>>>> +
>>>> +LIBABIVER := 1
>>>> +
>>>> +#
>>>> +# all source are stored in SRCS-y
>>>> +#
>>>> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
>>>> +
>>>> +#
>>>> +# Export include files
>>>> +#
>>>> +SYMLINK-y-include +=
>>>> +
>>>> +# this lib depends upon:
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
>>>> +
>>>> +include $(RTE_SDK)/mk/rte.lib.mk
>>>> diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
>>>> new file mode 100644
>>>> index 0000000..6c4df96
>>>> --- /dev/null
>>>> +++ b/drivers/net/kni/rte_eth_kni.c
>>>> @@ -0,0 +1,462 @@
>>>> +/*-
>>>> + *   BSD LICENSE
>>>> + *
>>>> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
>>>> + *   All rights reserved.
>>>> + *
>>>> + *   Redistribution and use in source and binary forms, with or without
>>>> + *   modification, are permitted provided that the following conditions
>>>> + *   are met:
>>>> + *
>>>> + *     * Redistributions of source code must retain the above copyright
>>>> + *       notice, this list of conditions and the following disclaimer.
>>>> + *     * Redistributions in binary form must reproduce the above copyright
>>>> + *       notice, this list of conditions and the following disclaimer in
>>>> + *       the documentation and/or other materials provided with the
>>>> + *       distribution.
>>>> + *     * Neither the name of Intel Corporation nor the names of its
>>>> + *       contributors may be used to endorse or promote products derived
>>>> + *       from this software without specific prior written permission.
>>>> + *
>>>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>>>> CONTRIBUTORS
>>>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
>> BUT
>>>> NOT
>>>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>>>> FITNESS FOR
>>>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>>>> COPYRIGHT
>>>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>>>> INCIDENTAL,
>>>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
>> BUT
>>>> NOT
>>>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>> LOSS
>>>> OF USE,
>>>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>>>> AND ON ANY
>>>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>>>> TORT
>>>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>> OF
>>>> THE USE
>>>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>>>> DAMAGE.
>>>> + */
>>>> +
>>>> +#include <fcntl.h>
>>>> +#include <pthread.h>
>>>> +#include <unistd.h>
>>>> +
>>>> +#include <rte_ethdev.h>
>>>> +#include <rte_kni.h>
>>>> +#include <rte_malloc.h>
>>>> +#include <rte_vdev.h>
>>>> +
>>>> +/* Only single queue supported */
>>>> +#define KNI_MAX_QUEUE_PER_PORT 1
>>>> +
>>>> +#define MAX_PACKET_SZ 2048
>>>> +#define MAX_KNI_PORTS 8
>>>> +
>>>> +struct pmd_queue_stats {
>>>> +  uint64_t pkts;
>>>> +  uint64_t bytes;
>>>> +  uint64_t err_pkts;
>>>> +};
>>>> +
>>>> +struct pmd_queue {
>>>> +  struct pmd_internals *internals;
>>>> +  struct rte_mempool *mb_pool;
>>>> +
>>>> +  struct pmd_queue_stats rx;
>>>> +  struct pmd_queue_stats tx;
>>>> +};
>>>> +
>>>> +struct pmd_internals {
>>>> +  struct rte_kni *kni;
>>>> +  int is_kni_started;
>>>> +
>>>> +  pthread_t thread;
>>>> +  int stop_thread;
>>>> +
>>>> +  struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
>>>> +  struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
>>>> +};
>>>> +
>>>> +static struct ether_addr eth_addr;
>>>> +static struct rte_eth_link pmd_link = {
>>>> +          .link_speed = ETH_SPEED_NUM_10G,
>>>> +          .link_duplex = ETH_LINK_FULL_DUPLEX,
>>>> +          .link_status = 0
>>>> +};
>>>> +static int is_kni_initialized;
>>>> +
>>>> +static uint16_t
>>>> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>>>> +{
>>>> +  struct pmd_queue *kni_q = q;
>>>> +  struct rte_kni *kni = kni_q->internals->kni;
>>>> +  uint16_t nb_pkts;
>>>> +
>>>> +  nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
>>>> +
>>>> +  kni_q->rx.pkts += nb_pkts;
>>>> +  kni_q->rx.err_pkts += nb_bufs - nb_pkts;
>>>> +
>>>> +  return nb_pkts;
>>>> +}
>>>> +
>>>> +static uint16_t
>>>> +eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>>>> +{
>>>> +  struct pmd_queue *kni_q = q;
>>>> +  struct rte_kni *kni = kni_q->internals->kni;
>>>> +  uint16_t nb_pkts;
>>>> +
>>>> +  nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
>>>> +
>>>> +  kni_q->tx.pkts += nb_pkts;
>>>> +  kni_q->tx.err_pkts += nb_bufs - nb_pkts;
>>>> +
>>>> +  return nb_pkts;
>>>> +}
>>>> +
>>>> +static void *
>>>> +kni_handle_request(void *param)
>>>> +{
>>>> +  struct pmd_internals *internals = param;
>>>> +#define MS 1000
>>>> +
>>>> +  while (!internals->stop_thread) {
>>>> +          rte_kni_handle_request(internals->kni);
>>>> +          usleep(500 * MS);
>>>> +  }
>>>> +
>>>> +  return param;
>>>> +}
>>>> +
>>>
>>> Do we really need a thread to handle request by default? I know there are
>> apps that handle request their own way and having a separate thread could
>> add synchronization problems.  Can we at least add an option to disable this?
>>
>> I didn't think about there can be a use case that requires own request
>> handling.
>>
>> But, kni requests should be handled to make kni interface run properly,
>> and to handle interface "kni" handler (internals->kni) required, which
>> this PMD doesn't expose.
>>
>> So, just disabling this thread won't work on its own.
> 
> I understand that and what I am asking is a way to at least disable this 
> without having to make code changes for applications that have their own way 
> of handling KNI request and the callback mentioned below sounds good to me.  
> I am fine with adding this capability with this commit or in a separate 
> commit after you have this commit checked in.


I don't mind adding in new version, only I am trying to understand it.

Normally what it does is calling KNI library rte_kni_handle_request()
API periodically on KNI handler. What an app may be doing own its way,
other than tweaking the period?

>  
>> A solution can be found, like callback registraion, or get_handler API,
>> but if an application has custom request handling, perhaps it may prefer
>> to use kni library directly instead of this wrapper, since wrapper
>> already doesn't expose all kni features.
> 
> I think one of the motivation of having KNI pmd is that it's abstracted the 
> same way as other physical or virtual devices.  I think it makes sense to 
> achieve  feature parity with the KNI library as much as possible.  What's 
> currently supported in KNI library but missing in KNI PMD and any specific 
> reason they are not supported?

Mainly what missing is rte_kni_conf and some APIs has default values,
instead of being variable.
And ethtool (kni control path) is not supported with PMD.

Default values used (instead of configurable devargs) , to make PMD simple.
And ethtool support is a) hard to add, b) doesn't quite fit to KNI PMD
logic.

> 
>>>
>>>> +static int
>>>> +eth_kni_start(struct rte_eth_dev *dev)
>>>> +{
>>>> +  struct pmd_internals *internals = dev->data->dev_private;
>>>> +  uint16_t port_id = dev->data->port_id;
>>>> +  struct rte_mempool *mb_pool;
>>>> +  struct rte_kni_conf conf;
>>>> +  const char *name = dev->data->name + 4; /* remove net_ */
>>>> +
>>>> +  snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
>>>> +  conf.force_bind = 0;
>>>> +  conf.group_id = port_id;
>>>> +  conf.mbuf_size = MAX_PACKET_SZ;
>>>> +  mb_pool = internals->rx_queues[0].mb_pool;
>>>> +
>>>> +  internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
>>>> +  if (internals->kni == NULL) {
>>>> +          RTE_LOG(ERR, PMD,
>>>> +                  "Fail to create kni for port: %d\n", port_id);
>>>> +          return -1;
>>>> +  }
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_dev_start(struct rte_eth_dev *dev)
>>>> +{
>>>> +  struct pmd_internals *internals = dev->data->dev_private;
>>>> +  int ret;
>>>> +
>>>> +  if (internals->is_kni_started == 0) {
>>>> +          ret = eth_kni_start(dev);
>>>> +          if (ret)
>>>> +                  return -1;
>>>> +          internals->is_kni_started = 1;
>>>> +  }
>>>> +
>>>
>>> In case is_kni_started is 1 already,  shouldn't we return directly instead 
>>> of
>> proceeding?
>>
>> "is_kni_started" is just to protect "eth_kni_start()", as you can see it
>> doesn't have a counterpart in eth_kni_dev_stop(). This flag is to be
>> sure "eth_kni_start()" called only once during PMD life cycle.
>>
>> The check you mentioned already done, start() / stop() functions already
>> balanced by APIs calling these functions.
> 
> What about KNI request handing thread then?  Is it safe to have multiple 
> threads calling into rte_kni_handle_request()? My understanding is that this 
> is not safe as kni_fifo is not multi-thread safe.  It's also a bit wasteful 
> to create multiple threads here.

That thread created within start() and canceled in stop().
And it is not possible to have start() call twice, the API that calls
start(), rte_eth_dev_start(), prevents multiple calls already. Same for
stop().

> 
>>>
>>>> +  ret = pthread_create(&internals->thread, NULL, kni_handle_request,
>>>> +                  internals);
>>>> +  if (ret) {
>>>> +          RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
>>>> +          return -1;
>>>> +  }
>>>> +
>>>> +  dev->data->dev_link.link_status = 1;
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_dev_stop(struct rte_eth_dev *dev)
>>>> +{
>>>> +  struct pmd_internals *internals = dev->data->dev_private;
>>>> +  int ret;
>>>> +
>>>> +  internals->stop_thread = 1;
>>>> +
>>>> +  ret = pthread_cancel(internals->thread);
>>>> +  if (ret)
>>>> +          RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
>>>> +
>>>> +  ret = pthread_join(internals->thread, NULL);
>>>> +  if (ret)
>>>> +          RTE_LOG(ERR, PMD, "Can't join the thread\n");
>>>> +
>>>> +  internals->stop_thread = 0;
>>>> +
>>>> +  dev->data->dev_link.link_status = 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
>>>> +{
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info
>>>> *dev_info)
>>>> +{
>>>> +  struct rte_eth_dev_data *data = dev->data;
>>>> +
>>>> +  dev_info->driver_name = data->drv_name;
>>>> +  dev_info->max_mac_addrs = 1;
>>>> +  dev_info->max_rx_pktlen = (uint32_t)-1;
>>>> +  dev_info->max_rx_queues = KNI_MAX_QUEUE_PER_PORT;
>>>> +  dev_info->max_tx_queues = KNI_MAX_QUEUE_PER_PORT;
>>>> +  dev_info->min_rx_bufsize = 0;
>>>> +  dev_info->pci_dev = NULL;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
>>>> +          uint16_t rx_queue_id,
>>>> +          uint16_t nb_rx_desc __rte_unused,
>>>> +          unsigned int socket_id __rte_unused,
>>>> +          const struct rte_eth_rxconf *rx_conf __rte_unused,
>>>> +          struct rte_mempool *mb_pool)
>>>> +{
>>>> +  struct pmd_internals *internals = dev->data->dev_private;
>>>> +  struct pmd_queue *q;
>>>> +
>>>> +  q = &internals->rx_queues[rx_queue_id];
>>>> +  q->internals = internals;
>>>> +  q->mb_pool = mb_pool;
>>>> +
>>>> +  dev->data->rx_queues[rx_queue_id] = q;
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
>>>> +          uint16_t tx_queue_id,
>>>> +          uint16_t nb_tx_desc __rte_unused,
>>>> +          unsigned int socket_id __rte_unused,
>>>> +          const struct rte_eth_txconf *tx_conf __rte_unused)
>>>> +{
>>>> +  struct pmd_internals *internals = dev->data->dev_private;
>>>> +  struct pmd_queue *q;
>>>> +
>>>> +  q = &internals->tx_queues[tx_queue_id];
>>>> +  q->internals = internals;
>>>> +
>>>> +  dev->data->tx_queues[tx_queue_id] = q;
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_queue_release(void *q __rte_unused)
>>>> +{
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
>>>> +          int wait_to_complete __rte_unused)
>>>> +{
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>>>> +{
>>>> +  unsigned long rx_packets_total = 0, rx_bytes_total = 0;
>>>> +  unsigned long tx_packets_total = 0, tx_bytes_total = 0;
>>>> +  struct rte_eth_dev_data *data = dev->data;
>>>> +  unsigned long tx_packets_err_total = 0;
>>>> +  unsigned int i, num_stats;
>>>> +  struct pmd_queue *q;
>>>> +
>>>> +  num_stats = RTE_MIN((unsigned
>>>> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
>>>> +                  data->nb_rx_queues);
>>>> +  for (i = 0; i < num_stats; i++) {
>>>> +          q = data->rx_queues[i];
>>>> +          stats->q_ipackets[i] = q->rx.pkts;
>>>> +          stats->q_ibytes[i] = q->rx.bytes;
>>>> +          rx_packets_total += stats->q_ipackets[i];
>>>> +          rx_bytes_total += stats->q_ibytes[i];
>>>> +  }
>>>> +
>>>> +  num_stats = RTE_MIN((unsigned
>>>> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
>>>> +                  data->nb_tx_queues);
>>>> +  for (i = 0; i < num_stats; i++) {
>>>> +          q = data->tx_queues[i];
>>>> +          stats->q_opackets[i] = q->tx.pkts;
>>>> +          stats->q_obytes[i] = q->tx.bytes;
>>>> +          stats->q_errors[i] = q->tx.err_pkts;
>>>> +          tx_packets_total += stats->q_opackets[i];
>>>> +          tx_bytes_total += stats->q_obytes[i];
>>>> +          tx_packets_err_total += stats->q_errors[i];
>>>> +  }
>>>> +
>>>> +  stats->ipackets = rx_packets_total;
>>>> +  stats->ibytes = rx_bytes_total;
>>>> +  stats->opackets = tx_packets_total;
>>>> +  stats->obytes = tx_bytes_total;
>>>> +  stats->oerrors = tx_packets_err_total;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_stats_reset(struct rte_eth_dev *dev)
>>>> +{
>>>> +  struct rte_eth_dev_data *data = dev->data;
>>>> +  struct pmd_queue *q;
>>>> +  unsigned int i;
>>>> +
>>>> +  for (i = 0; i < data->nb_rx_queues; i++) {
>>>> +          q = data->rx_queues[i];
>>>> +          q->rx.pkts = 0;
>>>> +          q->rx.bytes = 0;
>>>> +  }
>>>> +  for (i = 0; i < data->nb_tx_queues; i++) {
>>>> +          q = data->tx_queues[i];
>>>> +          q->tx.pkts = 0;
>>>> +          q->tx.bytes = 0;
>>>> +          q->tx.err_pkts = 0;
>>>> +  }
>>>> +}
>>>> +
>>>> +static const struct eth_dev_ops eth_kni_ops = {
>>>> +  .dev_start = eth_kni_dev_start,
>>>> +  .dev_stop = eth_kni_dev_stop,
>>>> +  .dev_configure = eth_kni_dev_configure,
>>>> +  .dev_infos_get = eth_kni_dev_info,
>>>> +  .rx_queue_setup = eth_kni_rx_queue_setup,
>>>> +  .tx_queue_setup = eth_kni_tx_queue_setup,
>>>> +  .rx_queue_release = eth_kni_queue_release,
>>>> +  .tx_queue_release = eth_kni_queue_release,
>>>> +  .link_update = eth_kni_link_update,
>>>> +  .stats_get = eth_kni_stats_get,
>>>> +  .stats_reset = eth_kni_stats_reset,
>>>> +};
>>>> +
>>>> +static struct rte_vdev_driver eth_kni_drv;
>>>> +
>>>> +static struct rte_eth_dev *
>>>> +eth_kni_create(const char *name, unsigned int numa_node)
>>>> +{
>>>> +  struct pmd_internals *internals = NULL;
>>>> +  struct rte_eth_dev_data *data;
>>>> +  struct rte_eth_dev *eth_dev;
>>>> +
>>>> +  RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
>>>> +                  numa_node);
>>>> +
>>>> +  data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
>>>> +  if (data == NULL)
>>>> +          goto error;
>>>> +
>>>> +  internals = rte_zmalloc_socket(name, sizeof(*internals), 0,
>>>> numa_node);
>>>> +  if (internals == NULL)
>>>> +          goto error;
>>>> +
>>>> +  /* reserve an ethdev entry */
>>>> +  eth_dev = rte_eth_dev_allocate(name);
>>>> +  if (eth_dev == NULL)
>>>> +          goto error;
>>>> +
>>>> +  data->dev_private = internals;
>>>> +  data->port_id = eth_dev->data->port_id;
>>>> +  memmove(data->name, eth_dev->data->name, sizeof(data-
>>>>> name));
>>>> +  data->nb_rx_queues = 1;
>>>> +  data->nb_tx_queues = 1;
>>>> +  data->dev_link = pmd_link;
>>>> +  data->mac_addrs = &eth_addr;
>>>> +
>>>> +  eth_dev->data = data;
>>>> +  eth_dev->dev_ops = &eth_kni_ops;
>>>> +  eth_dev->driver = NULL;
>>>> +
>>>> +  data->dev_flags = RTE_ETH_DEV_DETACHABLE;
>>>> +  data->kdrv = RTE_KDRV_NONE;
>>>> +  data->drv_name = eth_kni_drv.driver.name;
>>>> +  data->numa_node = numa_node;
>>>> +
>>>> +  return eth_dev;
>>>> +
>>>> +error:
>>>> +  rte_free(data);
>>>> +  rte_free(internals);
>>>> +
>>>> +  return NULL;
>>>> +}
>>>> +
>>>> +static int
>>>> +kni_init(void)
>>>> +{
>>>> +  if (is_kni_initialized == 0)
>>>> +          rte_kni_init(MAX_KNI_PORTS);
>>>> +
>>>> +  is_kni_initialized += 1;
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_probe(const char *name, const char *params __rte_unused)
>>>> +{
>>>> +  struct rte_eth_dev *eth_dev;
>>>> +  int ret;
>>>> +
>>>> +  RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
>>>> +
>>>> +  ret = kni_init();
>>>> +  if (ret < 0)
>>>> +          /* Not return error to prevent panic in rte_eal_init() */
>>>> +          return 0;
>>>
>>> If we don't return error here, the application that needs to add KNI ports
>> eventually will fail.  If it's a fail-stop situation, isn't it better to 
>> return error
>> where the it happened?
>>
>> I am not sure this is fail-stop situation, but instead this gives a
>> chance to applicaton for a graceful exit.
>>
>> If an error value returned here, it will lead to a rte_panic() and
>> application terminated abnormally!
>>
>> But if we return a success at this point, since no ethernet device
>> created, there is no handler in application to use, which also means no
>> KNI interface created.
>> Application can check number of ports and recognize KNI port is missing,
>> app may chose to terminate or not, also it prefers to terminate, can do
>> it properly.
> 
> I might be wrong but as far as I know,  other virtual or physical PMDS do not 
> have this behavior.  What you proposed makes sense but it also means that the 
> application needs extra logic (checking if all ports are successfully 
> initialized) to handle such failures (depending on the application, it might 
> be able to proceed or it might need to fail-stop).  Personally I would prefer 
> consistency across all PMDs here no matter what behavior we choose here as 
> that's the "contract" the application needs to know.

Right, other PMDs don't have this behavior, I will update this to be
consistent with others.

>  
>>>
>>>> +  eth_dev = eth_kni_create(name, rte_socket_id());
>>>> +  if (eth_dev == NULL)
>>>> +          return -1;
>>>> +
>>>> +  eth_dev->rx_pkt_burst = eth_kni_rx;
>>>> +  eth_dev->tx_pkt_burst = eth_kni_tx;
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_remove(const char *name)
>>>> +{
>>>> +  struct rte_eth_dev *eth_dev;
>>>> +  struct pmd_internals *internals;
>>>> +
>>>> +  RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
>>>> +
>>>> +  /* find the ethdev entry */
>>>> +  eth_dev = rte_eth_dev_allocated(name);
>>>> +  if (eth_dev == NULL)
>>>> +          return -1;
>>>> +
>>>> +  eth_kni_dev_stop(eth_dev);
>>>> +
>>>> +  if (eth_dev->data) {
>>>> +          internals = eth_dev->data->dev_private;
>>>> +          rte_kni_release(internals->kni);
>>>> +
>>>> +          rte_free(internals);
>>>> +  }
>>>> +  rte_free(eth_dev->data);
>>>> +
>>>> +  rte_eth_dev_release_port(eth_dev);
>>>> +
>>>> +  is_kni_initialized -= 1;
>>>> +  if (is_kni_initialized == 0)
>>>> +          rte_kni_close();
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +static struct rte_vdev_driver eth_kni_drv = {
>>>> +  .probe = eth_kni_probe,
>>>> +  .remove = eth_kni_remove,
>>>> +};
>>>> +
>>>> +RTE_PMD_REGISTER_VDEV(net_kni, eth_kni_drv);
>>>> diff --git a/drivers/net/kni/rte_pmd_kni_version.map
>>>> b/drivers/net/kni/rte_pmd_kni_version.map
>>>> new file mode 100644
>>>> index 0000000..31eca32
>>>> --- /dev/null
>>>> +++ b/drivers/net/kni/rte_pmd_kni_version.map
>>>> @@ -0,0 +1,4 @@
>>>> +DPDK_17.02 {
>>>> +
>>>> +  local: *;
>>>> +};
>>>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>>>> index f75f0e2..af02816 100644
>>>> --- a/mk/rte.app.mk
>>>> +++ b/mk/rte.app.mk
>>>> @@ -59,11 +59,6 @@ _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
>>>>  #
>>>>  # Order is important: from higher level to lower level
>>>>  #
>>>> -
>>>> -ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
>>>> -_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
>>>> -endif
>>>> -
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
>>>> @@ -84,6 +79,10 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -
>>>> lrte_power
>>>>
>>>>  _LDLIBS-y += --whole-archive
>>>>
>>>> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
>>>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
>>>> +endif
>>>> +
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
>>>> @@ -115,6 +114,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       +=
>> -
>>>> lrte_pmd_enic
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
>>>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -
>>>> libverbs
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -
>>>> libverbs
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -
>> lgxio
>>>> --
>>>> 2.9.3
>>>
>

Re: [dpdk-dev] [PATCH v4] net/kni: add KNI PMD

Reply via email to