On Tue, Jun 18, 2019 at 08:15:38PM +0300, Leon Romanovsky wrote:
> From: Yishai Hadas <yish...@mellanox.com>
> 
> Enable subscription for device events over DEVX.
> 
> Each subscription is added to the two level XA data structure according
> to its event number and the DEVX object information in case was given
> with the given target fd.
> 
> Those events will be reported over the given fd once will occur.
> Downstream patches will mange the dispatching to any subscription.
> 
> Signed-off-by: Yishai Hadas <yish...@mellanox.com>
> Signed-off-by: Leon Romanovsky <leo...@mellanox.com>
>  drivers/infiniband/hw/mlx5/devx.c        | 564 ++++++++++++++++++++++-
>  include/uapi/rdma/mlx5_user_ioctl_cmds.h |   9 +
>  2 files changed, 566 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/devx.c 
> b/drivers/infiniband/hw/mlx5/devx.c
> index e9b9ba5a3e9a..304b13e7a265 100644
> +++ b/drivers/infiniband/hw/mlx5/devx.c
> @@ -14,6 +14,7 @@
>  #include <linux/mlx5/driver.h>
>  #include <linux/mlx5/fs.h>
>  #include "mlx5_ib.h"
> +#include <linux/xarray.h>
>  
>  #define UVERBS_MODULE_NAME mlx5_ib
>  #include <rdma/uverbs_named_ioctl.h>
> @@ -33,6 +34,37 @@ struct devx_async_data {
>       struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
>  };
>  
> +/* first level XA value data structure */
> +struct devx_event {
> +     struct xarray object_ids; /* second XA level, Key = object id */
> +     struct list_head unaffiliated_list;
> +};
> +
> +/* second level XA value data structure */
> +struct devx_obj_event {
> +     struct rcu_head rcu;
> +     struct list_head obj_sub_list;
> +};
> +
> +struct devx_event_subscription {
> +     struct list_head file_list; /* headed in private_data->
> +                                  * subscribed_events_list
> +                                  */
> +     struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
> +                                * devx_obj_event->obj_sub_list
> +                                */
> +     struct list_head obj_list; /* headed in devx_object */
> +
> +     u32 xa_key_level1;
> +     u32 xa_key_level2;
> +     struct rcu_head rcu;
> +     u64 cookie;
> +     bool is_obj_related;
> +     struct ib_uobject *fd_uobj;
> +     void *object;   /* May need direct access upon hot unplug */

This should be a 'struct file *' and have a better name.

And I'm unclear why we need to store both the ib_uobject and the
struct file for the same thing? And why are we storing the uobj here
instead of the struct devx_async_event_file *?

Since uobj->object == flip && filp->private_data == uobj, I have a
hard time to understand why we need both things, it seems to me that
if we get the fget on the filp then we can rely on the
filp->private_data to get back to the devx_async_event_file.

> +     struct eventfd_ctx *eventfd;
> +};
> +

>  /*
>   * As the obj_id in the firmware is not globally unique the object type
>   * must be considered upon checking for a valid object id.
> @@ -1143,14 +1275,53 @@ static void devx_cleanup_mkey(struct devx_obj *obj)
>       write_unlock_irqrestore(&table->lock, flags);
>  }
>  
> +static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
> +                                   struct devx_event_subscription *sub)
> +{
> +     list_del_rcu(&sub->file_list);
> +     list_del_rcu(&sub->xa_list);
> +
> +     if (sub->is_obj_related) {

is_obj_related looks like it is just list_empty(obj_list)??

Success oriented flow

> @@ -1523,6 +1700,350 @@ static int 
> UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
>       return err;
>  }
>  
> +static void
> +subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
> +                        u32 key_level1,
> +                        u32 key_level2,
> +                        struct devx_obj_event *alloc_obj_event)
> +{
> +     struct devx_event *event;
> +
> +     /* Level 1 is valid for future use - no need to free */
> +     if (!alloc_obj_event)
> +             return;
> +
> +     event = xa_load(&devx_event_table->event_xa, key_level1);
> +     WARN_ON(!event);
> +
> +     xa_erase(&event->object_ids, key_level2);

Shoulnd't this only erase if the value stored is NULL?

> +     kfree(alloc_obj_event);
> +}
> +
> +static int
> +subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
> +                      u32 key_level1,
> +                      bool is_level2,
> +                      u32 key_level2,
> +                      struct devx_obj_event **alloc_obj_event)
> +{
> +     struct devx_obj_event *obj_event;
> +     struct devx_event *event;
> +     bool new_entry_level1 = false;
> +     int err;
> +
> +     event = xa_load(&devx_event_table->event_xa, key_level1);
> +     if (!event) {
> +             event = kzalloc(sizeof(*event), GFP_KERNEL);
> +             if (!event)
> +                     return -ENOMEM;
> +
> +             new_entry_level1 = true;
> +             INIT_LIST_HEAD(&event->unaffiliated_list);
> +             xa_init(&event->object_ids);
> +
> +             err = xa_insert(&devx_event_table->event_xa,
> +                             key_level1,
> +                             event,
> +                             GFP_KERNEL);
> +             if (err)
> +                     goto end;
> +     }
> +
> +     if (!is_level2)
> +             return 0;
> +
> +     obj_event = xa_load(&event->object_ids, key_level2);
> +     if (!obj_event) {
> +             err = xa_reserve(&event->object_ids, key_level2, GFP_KERNEL);
> +             if (err)
> +                     goto err_level1;
> +
> +             obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
> +             if (!obj_event) {
> +                     err = -ENOMEM;
> +                     goto err_level2;
> +             }
> +
> +             INIT_LIST_HEAD(&obj_event->obj_sub_list);
> +             *alloc_obj_event = obj_event;

This is goofy, just store the empty obj_event in the xa instead of
using xa_reserve, and when you go to do the error unwind just delete
any level2' devx_obj_event' that has a list_empty(obj_sub_list), get
rid of the wonky alloc_obj_event stuff.

The best configuration would be to use devx_cleanup_subscription to
undo the partially ready subscription.

> +     }
> +
> +     return 0;
> +
> +err_level2:
> +     xa_erase(&event->object_ids, key_level2);
> +
> +err_level1:
> +     if (new_entry_level1)
> +             xa_erase(&devx_event_table->event_xa, key_level1);
> +end:
> +     if (new_entry_level1)
> +             kfree(event);

Can't do this, once the level1 is put in the tree it could be referenced by
the irqs. At least it needs a kfree_rcu, most likely it is simpler to
just leave it.

> +#define MAX_NUM_EVENTS 16
> +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
> +     struct uverbs_attr_bundle *attrs)
> +{
> +     struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
> +                             attrs,
> +                             MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
> +     struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
> +             &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
> +     struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
> +     struct ib_uobject *fd_uobj;
> +     struct devx_obj *obj = NULL;
> +     struct devx_async_event_file *ev_file;
> +     struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
> +     u16 *event_type_num_list;
> +     struct devx_event_subscription **event_sub_arr;
> +     struct devx_obj_event  **event_obj_array_alloc;
> +     int redirect_fd;
> +     bool use_eventfd = false;
> +     int num_events;
> +     int num_alloc_xa_entries = 0;
> +     u16 obj_type = 0;
> +     u64 cookie = 0;
> +     u32 obj_id = 0;
> +     int err;
> +     int i;
> +
> +     if (!c->devx_uid)
> +             return -EINVAL;
> +
> +     if (!IS_ERR(devx_uobj)) {
> +             obj = (struct devx_obj *)devx_uobj->object;
> +             if (obj)
> +                     obj_id = get_dec_obj_id(obj->obj_id);
> +     }
> +
> +     fd_uobj = uverbs_attr_get_uobject(attrs,
> +                             MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
> +     if (IS_ERR(fd_uobj))
> +             return PTR_ERR(fd_uobj);
> +
> +     ev_file = container_of(fd_uobj, struct devx_async_event_file,
> +                            uobj);
> +
> +     if (uverbs_attr_is_valid(attrs,
> +                              MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
> +             err = uverbs_copy_from(&redirect_fd, attrs,
> +                            MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
> +             if (err)
> +                     return err;
> +
> +             use_eventfd = true;
> +     }
> +
> +     if (uverbs_attr_is_valid(attrs,
> +                              MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
> +             if (use_eventfd)
> +                     return -EINVAL;
> +
> +             err = uverbs_copy_from(&cookie, attrs,
> +                             MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
> +             if (err)
> +                     return err;
> +     }
> +
> +     num_events = uverbs_attr_ptr_get_array_size(
> +             attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
> +             sizeof(u16));
> +
> +     if (num_events < 0)
> +             return num_events;
> +
> +     if (num_events > MAX_NUM_EVENTS)
> +             return -EINVAL;
> +
> +     event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
> +                     MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
> +
> +     if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
> +             return -EINVAL;
> +
> +     event_sub_arr = uverbs_zalloc(attrs,
> +             MAX_NUM_EVENTS * sizeof(struct devx_event_subscription *));
> +     event_obj_array_alloc = uverbs_zalloc(attrs,
> +             MAX_NUM_EVENTS * sizeof(struct devx_obj_event *));

There are so many list_heads in the devx_event_subscription, why not
use just one of them to store the allocated events instead of this
temp array? ie event_list looks good for this purpose.

> +
> +     if (!event_sub_arr || !event_obj_array_alloc)
> +             return -ENOMEM;
> +
> +     /* Protect from concurrent subscriptions to same XA entries to allow
> +      * both to succeed
> +      */
> +     mutex_lock(&devx_event_table->event_xa_lock);
> +     for (i = 0; i < num_events; i++) {
> +             u32 key_level1;
> +
> +             if (obj)
> +                     obj_type = get_dec_obj_type(obj,
> +                                                 event_type_num_list[i]);
> +             key_level1 = event_type_num_list[i] | obj_type << 16;
> +
> +             err = subscribe_event_xa_alloc(devx_event_table,
> +                                            key_level1,
> +                                            obj ? true : false,
> +                                            obj_id,
> +                                            &event_obj_array_alloc[i]);

Usless ?:

> +             if (err)
> +                     goto err;
> +
> +             num_alloc_xa_entries++;
> +             event_sub_arr[i] = kzalloc(sizeof(*event_sub_arr[i]),
> +                                        GFP_KERNEL);
> +             if (!event_sub_arr[i])
> +                     goto err;
> +
> +             if (use_eventfd) {
> +                     event_sub_arr[i]->eventfd =
> +                             eventfd_ctx_fdget(redirect_fd);
> +
> +                     if (IS_ERR(event_sub_arr[i]->eventfd)) {
> +                             err = PTR_ERR(event_sub_arr[i]->eventfd);
> +                             event_sub_arr[i]->eventfd = NULL;
> +                             goto err;
> +                     }
> +             }
> +
> +             event_sub_arr[i]->cookie = cookie;
> +             event_sub_arr[i]->fd_uobj = fd_uobj;
> +             event_sub_arr[i]->object = fd_uobj->object;
> +             /* May be needed upon cleanup the devx object/subscription */
> +             event_sub_arr[i]->xa_key_level1 = key_level1;
> +             event_sub_arr[i]->xa_key_level2 = obj_id;
> +             event_sub_arr[i]->is_obj_related = obj ? true : false;

Unneeded ?:


Jason

Reply via email to