Hi Matan,

Few comments from me below.
BTW, do you plan to add ownership mandatory check in control path functions
that change port configuration?
Konstantin

> -----Original Message-----
> From: Matan Azrad [mailto:ma...@mellanox.com]
> Sent: Sunday, January 7, 2018 9:46 AM
> To: Thomas Monjalon <tho...@monjalon.net>; Gaetan Rivet 
> <gaetan.ri...@6wind.com>; Wu, Jingjing <jingjing...@intel.com>
> Cc: dev@dpdk.org; Neil Horman <nhor...@tuxdriver.com>; Richardson, Bruce 
> <bruce.richard...@intel.com>; Ananyev, Konstantin
> <konstantin.anan...@intel.com>
> Subject: [PATCH v2 2/6] ethdev: add port ownership
> 
> The ownership of a port is implicit in DPDK.
> Making it explicit is better from the next reasons:
> 1. It will define well who is in charge of the port usage synchronization.
> 2. A library could work on top of a port.
> 3. A port can work on top of another port.
> 
> Also in the fail-safe case, an issue has been met in testpmd.
> We need to check that the application is not trying to use a port which
> is already managed by fail-safe.
> 
> A port owner is built from owner id(number) and owner name(string) while
> the owner id must be unique to distinguish between two identical entity
> instances and the owner name can be any name.
> The name helps to logically recognize the owner by different DPDK
> entities and allows easy debug.
> Each DPDK entity can allocate an owner unique identifier and can use it
> and its preferred name to owns valid ethdev ports.
> Each DPDK entity can get any port owner status to decide if it can
> manage the port or not.
> 
> The mechanism is synchronized for both the primary process threads and
> the secondary processes threads to allow secondary process entity to be
> a port owner.
> 
> Add a sinchronized ownership mechanism to DPDK Ethernet devices to
> avoid multiple management of a device by different DPDK entities.
> 
> The current ethdev internal port management is not affected by this
> feature.
> 
> Signed-off-by: Matan Azrad <ma...@mellanox.com>
> ---
>  doc/guides/prog_guide/poll_mode_drv.rst |  14 ++-
>  lib/librte_ether/rte_ethdev.c           | 206 
> ++++++++++++++++++++++++++++++--
>  lib/librte_ether/rte_ethdev.h           |  89 ++++++++++++++
>  lib/librte_ether/rte_ethdev_version.map |  12 ++
>  4 files changed, 311 insertions(+), 10 deletions(-)


> 
> 
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 684e3e8..0e12452 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -70,7 +70,10 @@
> 
>  static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
>  struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
> +/* ports data array stored in shared memory */
>  static struct rte_eth_dev_data *rte_eth_dev_data;
> +/* next owner identifier stored in shared memory */
> +static uint16_t *rte_eth_next_owner_id;
>  static uint8_t eth_dev_last_created_port;
> 
>  /* spinlock for eth device callbacks */
> @@ -82,6 +85,9 @@
>  /* spinlock for add/remove tx callbacks */
>  static rte_spinlock_t rte_eth_tx_cb_lock = RTE_SPINLOCK_INITIALIZER;
> 
> +/* spinlock for eth device ownership management stored in shared memory */
> +static rte_spinlock_t *rte_eth_dev_ownership_lock;
> +
>  /* store statistics names and its offset in stats structure  */
>  struct rte_eth_xstats_name_off {
>       char name[RTE_ETH_XSTATS_NAME_SIZE];
> @@ -153,14 +159,18 @@ enum {
>  }
> 
>  static void
> -rte_eth_dev_data_alloc(void)
> +rte_eth_dev_share_data_alloc(void)
>  {
>       const unsigned flags = 0;
>       const struct rte_memzone *mz;
> +     const unsigned int data_size = RTE_MAX_ETHPORTS *
> +                                             sizeof(*rte_eth_dev_data);
> 
>       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +             /* Allocate shared memory for port data and ownership */
>               mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA,
> -                             RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data),
> +                             data_size + sizeof(*rte_eth_next_owner_id) +
> +                             sizeof(*rte_eth_dev_ownership_lock),
>                               rte_socket_id(), flags);
>       } else
>               mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA);
> @@ -168,9 +178,17 @@ enum {
>               rte_panic("Cannot allocate memzone for ethernet port data\n");
> 
>       rte_eth_dev_data = mz->addr;
> -     if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> -             memset(rte_eth_dev_data, 0,
> -                             RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data));
> +     rte_eth_next_owner_id = (uint16_t *)((uintptr_t)mz->addr +
> +                                          data_size);
> +     rte_eth_dev_ownership_lock = (rte_spinlock_t *)
> +             ((uintptr_t)rte_eth_next_owner_id +
> +              sizeof(*rte_eth_next_owner_id));


I think that might make  rte_eth_dev_ownership_lock location not 4B aligned...
Why just not to put all data that you are trying to allocate as one chunck into 
the same struct:
static struct {
        uint16_t next_owner_id;
        /* spinlock for eth device ownership management stored in shared memory 
*/
        rte_spinlock_t dev_ownership_lock;
        rte_eth_dev_data *data;
} rte_eth_dev_data;
and allocate/use it everywhere?
That would simplify allocation/management stuff. 

It is good to see that now scanning/updating rte_eth_dev_data[] is lock 
protected,
but it might be not very plausible to protect both data[] and next_owner_id 
using the same lock.
In fact, for next_owner_id, you don't need a lock - just rte_atomic_t should be 
enough.
Another alternative would be to use 2 locks - one for next_owner_id second for 
actual data[]
protection. 

Another thing - you'll probably need to grab/release a lock inside 
rte_eth_dev_allocated() too.
It is a public function used by drivers, so need to be protected too.

> +
> +     if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +             memset(rte_eth_dev_data, 0, data_size);
> +             *rte_eth_next_owner_id = RTE_ETH_DEV_NO_OWNER + 1;
> +             rte_spinlock_init(rte_eth_dev_ownership_lock);
> +     }
>  }
> 
>  struct rte_eth_dev *
> @@ -225,7 +243,7 @@ struct rte_eth_dev *
>       }
> 
>       if (rte_eth_dev_data == NULL)
> -             rte_eth_dev_data_alloc();
> +             rte_eth_dev_share_data_alloc();
> 
>       if (rte_eth_dev_allocated(name) != NULL) {
>               RTE_PMD_DEBUG_TRACE("Ethernet Device with name %s already 
> allocated!\n",
> @@ -253,7 +271,7 @@ struct rte_eth_dev *
>       struct rte_eth_dev *eth_dev;
> 
>       if (rte_eth_dev_data == NULL)
> -             rte_eth_dev_data_alloc();
> +             rte_eth_dev_share_data_alloc();
> 
>       for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
>               if (strcmp(rte_eth_dev_data[i].name, name) == 0)
> @@ -278,8 +296,12 @@ struct rte_eth_dev *
>       if (eth_dev == NULL)
>               return -EINVAL;
> 
> -     memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
> +     rte_spinlock_lock(rte_eth_dev_ownership_lock);
> +
>       eth_dev->state = RTE_ETH_DEV_UNUSED;
> +     memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
> +
> +     rte_spinlock_unlock(rte_eth_dev_ownership_lock);
>       return 0;
>  }
> 
> @@ -294,6 +316,174 @@ struct rte_eth_dev *
>               return 1;
>  }
> 
> +static int
> +rte_eth_is_valid_owner_id(uint16_t owner_id)
> +{
> +     if (owner_id == RTE_ETH_DEV_NO_OWNER ||
> +         (*rte_eth_next_owner_id > RTE_ETH_DEV_NO_OWNER &&
> +          *rte_eth_next_owner_id <= owner_id)) {
> +             RTE_LOG(ERR, EAL, "Invalid owner_id=%d.\n", owner_id);
> +             return 0;
> +     }
> +     return 1;
> +}
> +
> +uint16_t
> +rte_eth_find_next_owned_by(uint16_t port_id, const uint16_t owner_id)
> +{
> +     while (port_id < RTE_MAX_ETHPORTS &&
> +            (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED ||
> +            rte_eth_devices[port_id].data->owner.id != owner_id))
> +             port_id++;
> +
> +     if (port_id >= RTE_MAX_ETHPORTS)
> +             return RTE_MAX_ETHPORTS;
> +
> +     return port_id;
> +}
> +
> +int
> +rte_eth_dev_owner_new(uint16_t *owner_id)
> +{
> +     int ret = 0;
> +
> +     rte_spinlock_lock(rte_eth_dev_ownership_lock);
> +
> +     if (*rte_eth_next_owner_id == RTE_ETH_DEV_NO_OWNER) {
> +             /* Counter wrap around. */
> +             RTE_PMD_DEBUG_TRACE("Reached maximum number of Ethernet port 
> owners.\n");
> +             ret = -EUSERS;
> +     } else {
> +             *owner_id = (*rte_eth_next_owner_id)++;
> +     }
> +
> +     rte_spinlock_unlock(rte_eth_dev_ownership_lock);
> +     return ret;
> +}
> +
> +int
> +rte_eth_dev_owner_set(const uint16_t port_id,
> +                   const struct rte_eth_dev_owner *owner)

As a nit - if you'll have rte_eth_dev_owner_set(port_id, old_owner, new_owner) 
- that might be more plausible for user, and would greatly simplify unset() 
part:
just set(port_id, cur_owner, zero_owner);

> +{
> +     struct rte_eth_dev_owner *port_owner;
> +     int ret = 0;
> +     int sret;
> +
> +     rte_spinlock_lock(rte_eth_dev_ownership_lock);
> +
> +     if (!rte_eth_dev_is_valid_port(port_id)) {
> +             RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> +             ret = -ENODEV;
> +             goto unlock;
> +     }
> +
> +     if (!rte_eth_is_valid_owner_id(owner->id)) {
> +             ret = -EINVAL;
> +             goto unlock;
> +     }
> +
> +     port_owner = &rte_eth_devices[port_id].data->owner;
> +     if (port_owner->id != RTE_ETH_DEV_NO_OWNER &&
> +         port_owner->id != owner->id) {
> +             RTE_LOG(ERR, EAL,
> +                     "Cannot set owner to port %d already owned by 
> %s_%05d.\n",
> +                     port_id, port_owner->name, port_owner->id);
> +             ret = -EPERM;
> +             goto unlock;
> +     }
> +
> +     sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s",
> +                     owner->name);
> +     if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN) {

Personally, I don't see any reason to fail if description was truncated...
Another alternative - just use rte_malloc() here to allocate big enough buffer 
to hold the description.

> +             memset(port_owner->name, 0, RTE_ETH_MAX_OWNER_NAME_LEN);
> +             RTE_LOG(ERR, EAL, "Invalid owner name.\n");
> +             ret = -EINVAL;
> +             goto unlock;
> +     }
> +
> +     port_owner->id = owner->id;
> +     RTE_PMD_DEBUG_TRACE("Port %d owner is %s_%05d.\n", port_id,
> +                         owner->name, owner->id);
> +

As another nit - you can avoid all these gotos by restructuring code a bit:

rte_eth_dev_owner_set(const uint16_t port_id, const struct rte_eth_dev_owner 
*owner)
{
    rte_spinlock_lock(...);
    ret = _eth_dev_owner_set_unlocked(port_id, owner);
    rte_spinlock_unlock(...);
    return ret;
}


> +unlock:
> +     rte_spinlock_unlock(rte_eth_dev_ownership_lock);
> +     return ret;
> +}
> +
> +int
> +rte_eth_dev_owner_unset(const uint16_t port_id, const uint16_t owner_id)
> +{
> +     struct rte_eth_dev_owner *port_owner;
> +     int ret = 0;
> +
> +     rte_spinlock_lock(rte_eth_dev_ownership_lock);
> +
> +     if (!rte_eth_dev_is_valid_port(port_id)) {
> +             RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> +             ret = -ENODEV;
> +             goto unlock;
> +     }
> +
> +     if (!rte_eth_is_valid_owner_id(owner_id)) {
> +             ret = -EINVAL;
> +             goto unlock;
> +     }
> +
> +     port_owner = &rte_eth_devices[port_id].data->owner;
> +     if (port_owner->id != owner_id) {
> +             RTE_LOG(ERR, EAL, "Cannot unset port %d owner (%s_%05d) by"
> +                     " a different owner with id %5d.\n", port_id,
> +                     port_owner->name, port_owner->id, owner_id);
> +             ret = -EPERM;
> +             goto unlock;
> +     }
> +     RTE_PMD_DEBUG_TRACE("Port %d owner %s_%05d has removed.\n", port_id,
> +                         port_owner->name, port_owner->id);
> +
> +     memset(port_owner, 0, sizeof(struct rte_eth_dev_owner));
> +
> +unlock:
> +     rte_spinlock_unlock(rte_eth_dev_ownership_lock);
> +     return ret;
> +}
> +
> +void
> +rte_eth_dev_owner_delete(const uint16_t owner_id)
> +{
> +     uint16_t port_id;
> +
> +     rte_spinlock_lock(rte_eth_dev_ownership_lock);
> +
> +     if (rte_eth_is_valid_owner_id(owner_id)) {
> +             RTE_ETH_FOREACH_DEV_OWNED_BY(port_id, owner_id)
> +                     memset(&rte_eth_devices[port_id].data->owner, 0,
> +                            sizeof(struct rte_eth_dev_owner));
> +             RTE_PMD_DEBUG_TRACE("All port owners owned by %05d identifier"
> +                                 " have removed.\n", owner_id);
> +     }
> +
> +     rte_spinlock_unlock(rte_eth_dev_ownership_lock);
> +}
> +
> +int
> +rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner 
> *owner)
> +{
> +     int ret = 0;
> +
> +     rte_spinlock_lock(rte_eth_dev_ownership_lock);
> +
> +     if (!rte_eth_dev_is_valid_port(port_id)) {
> +             RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
> +             ret = -ENODEV;
> +     } else {
> +             rte_memcpy(owner, &rte_eth_devices[port_id].data->owner,
> +                        sizeof(*owner));
> +     }
> +
> +     rte_spinlock_unlock(rte_eth_dev_ownership_lock);
> +     return ret;
> +}
> +
>  int
>  rte_eth_dev_socket_id(uint16_t port_id)
>  {
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 57b61ed..88ad765 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -1760,6 +1760,15 @@ struct rte_eth_dev_sriov {
> 
>  #define RTE_ETH_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
> 
> +#define RTE_ETH_DEV_NO_OWNER 0
> +
> +#define RTE_ETH_MAX_OWNER_NAME_LEN 64
> +
> +struct rte_eth_dev_owner {
> +     uint16_t id; /**< The owner unique identifier. */

Why limit yourself to 16bit here?
Why not uint32_t/uint64_t - or even uuid_t and make system library to generate 
it for you?
Wouldn't need to worry about overflows then.

> +     char name[RTE_ETH_MAX_OWNER_NAME_LEN]; /**< The owner name. */
> +};
> +
>  /**
>   * @internal
>   * The data part, with no function pointers, associated with each ethernet 
> device.
> @@ -1810,6 +1819,7 @@ struct rte_eth_dev_data {
>       int numa_node;  /**< NUMA node connection */
>       struct rte_vlan_filter_conf vlan_filter_conf;
>       /**< VLAN filter configuration. */
> +     struct rte_eth_dev_owner owner; /**< The port owner. */
>  };
> 
>  /** Device supports link state interrupt */
> @@ -1846,6 +1856,85 @@ struct rte_eth_dev_data {
> 
> 
>  /**
> + * Iterates over valid ethdev ports owned by a specific owner.
> + *
> + * @param port_id
> + *   The id of the next possible valid owned port.
> + * @param    owner_id
> + *  The owner identifier.
> + *  RTE_ETH_DEV_NO_OWNER means iterate over all valid ownerless ports.
> + * @return
> + *   Next valid port id owned by owner_id, RTE_MAX_ETHPORTS if there is none.
> + */
> +uint16_t rte_eth_find_next_owned_by(uint16_t port_id, const uint16_t 
> owner_id);
> +
> +/**
> + * Macro to iterate over all enabled ethdev ports owned by a specific owner.
> + */
> +#define RTE_ETH_FOREACH_DEV_OWNED_BY(p, o) \
> +     for (p = rte_eth_find_next_owned_by(0, o); \
> +          (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS; \
> +          p = rte_eth_find_next_owned_by(p + 1, o))
> +
> +/**
> + * Get a new unique owner identifier.
> + * An owner identifier is used to owns Ethernet devices by only one DPDK 
> entity
> + * to avoid multiple management of device by different entities.
> + *
> + * @param    owner_id
> + *   Owner identifier pointer.
> + * @return
> + *   Negative errno value on error, 0 on success.
> + */
> +int rte_eth_dev_owner_new(uint16_t *owner_id);
> +
> +/**
> + * Set an Ethernet device owner.
> + *
> + * @param    port_id
> + *  The identifier of the port to own.
> + * @param    owner
> + *  The owner pointer.
> + * @return
> + *  Negative errno value on error, 0 on success.
> + */
> +int rte_eth_dev_owner_set(const uint16_t port_id,
> +                       const struct rte_eth_dev_owner *owner);
> +
> +/**
> + * Unset Ethernet device owner to make the device ownerless.
> + *
> + * @param    port_id
> + *  The identifier of port to make ownerless.
> + * @param    owner
> + *  The owner identifier.
> + * @return
> + *  0 on success, negative errno value on error.
> + */
> +int rte_eth_dev_owner_unset(const uint16_t port_id, const uint16_t owner_id);
> +
> +/**
> + * Remove owner from all Ethernet devices owned by a specific owner.
> + *
> + * @param    owner
> + *  The owner identifier.
> + */
> +void rte_eth_dev_owner_delete(const uint16_t owner_id);
> +
> +/**
> + * Get the owner of an Ethernet device.
> + *
> + * @param    port_id
> + *  The port identifier.
> + * @param    owner
> + *  The owner structure pointer to fill.
> + * @return
> + *  0 on success, negative errno value on error..
> + */
> +int rte_eth_dev_owner_get(const uint16_t port_id,
> +                       struct rte_eth_dev_owner *owner);
> +
> +/**
>   * Get the total number of Ethernet devices that have been successfully
>   * initialized by the matching Ethernet driver during the PCI probing phase
>   * and that are available for applications to use. These devices must be
> diff --git a/lib/librte_ether/rte_ethdev_version.map 
> b/lib/librte_ether/rte_ethdev_version.map
> index e9681ac..5d20b5f 100644
> --- a/lib/librte_ether/rte_ethdev_version.map
> +++ b/lib/librte_ether/rte_ethdev_version.map
> @@ -198,6 +198,18 @@ DPDK_17.11 {
> 
>  } DPDK_17.08;
> 
> +DPDK_18.02 {
> +     global:
> +
> +     rte_eth_dev_owner_delete;
> +     rte_eth_dev_owner_get;
> +     rte_eth_dev_owner_new;
> +     rte_eth_dev_owner_set;
> +     rte_eth_dev_owner_unset;
> +     rte_eth_find_next_owned_by;
> +
> +} DPDK_17.11;
> +
>  EXPERIMENTAL {
>       global:
> 
> --
> 1.8.3.1

Reply via email to