On 5/8/25 2:34 PM, Daniel Borkmann wrote:
> Extend inhibit=on setting with the option to specify a pinned XSK map
> path along with a starting index (default 0) to push the created XSK
> sockets into. Example usage:
> 
>   # ./build/qemu-system-x86_64 [...] \
>     -netdev 
> af-xdp,ifname=eth0,id=net0,mode=native,queues=2,inhibit=on,map-path=/sys/fs/bpf/foo,map-start-index=2
>     -device virtio-net-pci,netdev=net0 [...]
> 
> This is useful for the case where an existing XDP program with XSK map
> is present on the AF_XDP supported phys device and the XSK map not yet
> populated. Qemu will then push the XSK sockets into the specified map.

Hi, Daniel.

Thanks for the patch!

Could you, please, explain the use case a little more?  Is this patch
aiming to improve usability?  Do you have a specific use case in mind?

The main idea behind 'inhibit' is that the qemu doesn't need to have a lot
of privileges to use the pre-loaded program and the pre-created sockets.
But creating the sockets and setting them into a map doesn't allow us to
run without privileges, IIUC.  May be worth mentioning at least in the
commit message.

Also, isn't map-start-index the same thing as start-queue ?  Do we need
both of them?

I didn't read much into the code yet, but this patch is missing a few
bits of documentation, e.g. it's missing an update for qemu-options.hx.

See a few other quick comment below.

Best regards, Ilya Maximets.

> 
> Signed-off-by: Daniel Borkmann <dan...@iogearbox.net>
> Cc: Ilya Maximets <i.maxim...@ovn.org>
> Cc: Jason Wang <jasow...@redhat.com>
> Cc: Anton Protopopov <as...@isovalent.com>
> ---
>  net/af-xdp.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++------
>  qapi/net.json | 24 +++++++++++++-------
>  2 files changed, 72 insertions(+), 15 deletions(-)
> 
> diff --git a/net/af-xdp.c b/net/af-xdp.c
> index 01c5fb914e..ddc52f1307 100644
> --- a/net/af-xdp.c
> +++ b/net/af-xdp.c
> @@ -51,6 +51,8 @@ typedef struct AFXDPState {
>  
>      uint32_t             n_queues;
>      uint32_t             xdp_flags;
> +    char                 *map_path;
> +    uint32_t             map_start_index;
>      bool                 inhibit;
>  } AFXDPState;
>  
> @@ -261,6 +263,7 @@ static void af_xdp_send(void *opaque)
>  static void af_xdp_cleanup(NetClientState *nc)
>  {
>      AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
> +    int pin_fd, idx;
>  
>      qemu_purge_queued_packets(nc);
>  
> @@ -282,6 +285,22 @@ static void af_xdp_cleanup(NetClientState *nc)
>                  "af-xdp: unable to remove XDP program from '%s', ifindex: 
> %d\n",
>                  s->ifname, s->ifindex);
>      }
> +    if (s->map_path) {
> +        pin_fd = bpf_obj_get(s->map_path);
> +        if (pin_fd < 0) {
> +            fprintf(stderr,
> +                "af-xdp: unable to remove %s's XSK sockets from '%s', 
> ifindex: %d\n",
> +                s->ifname, s->map_path, s->ifindex);
> +        } else {
> +            idx = nc->queue_index;
> +            if (s->map_start_index > 0) {
> +                idx += s->map_start_index;
> +            }
> +            bpf_map_delete_elem(pin_fd, &idx);
> +            close(pin_fd);
> +        }
> +    }
> +    g_free(s->map_path);
>  }
>  
>  static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp)
> @@ -343,7 +362,7 @@ static int af_xdp_socket_create(AFXDPState *s,
>          .bind_flags = XDP_USE_NEED_WAKEUP,
>          .xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
>      };
> -    int queue_id, error = 0;
> +    int queue_id, pin_fd, xsk_fd, idx, error = 0;
>  
>      s->inhibit = opts->has_inhibit && opts->inhibit;
>      if (s->inhibit) {
> @@ -384,6 +403,23 @@ static int af_xdp_socket_create(AFXDPState *s,
>          }
>      }
>  
> +    if (!error && s->map_path) {
> +        pin_fd = bpf_obj_get(s->map_path);
> +        if (pin_fd < 0) {
> +            error = errno;
> +        } else {
> +         xsk_fd = xsk_socket__fd(s->xsk);

Indentation is off.  Tbas mixed with spaces here and in some other
places in the patch.

> +            idx = s->nc.queue_index;
> +            if (s->map_start_index) {
> +                idx += s->map_start_index;
> +            }
> +            if (bpf_map_update_elem(pin_fd, &idx, &xsk_fd, 0)) {
> +                error = errno;
> +            }
> +            close(pin_fd);
> +        }
> +    }
> +
>      if (error) {
>          error_setg_errno(errp, error,
>                           "failed to create AF_XDP socket for %s queue_id: 
> %d",
> @@ -465,8 +501,8 @@ int net_init_af_xdp(const Netdev *netdev,
>          return -1;
>      }
>  
> -    if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) {
> -        error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa");
> +    if ((opts->has_inhibit && opts->inhibit) != !!(opts->sock_fds || 
> opts->map_path)) {
> +        error_setg(errp, "'inhibit=on' requires 'sock-fds' or 'map-path' and 
> vice versa");

This may need some re-wording as 'A requires B or C or vice versa' is
a little hard to understand.

>          return -1;
>      }
>  
> @@ -491,6 +527,12 @@ int net_init_af_xdp(const Netdev *netdev,
>          pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname);
>          s->ifindex = ifindex;
>          s->n_queues = queues;
> +        if (opts->map_path) {
> +            s->map_path = g_strdup(opts->map_path);
> +            if (opts->has_map_start_index && opts->map_start_index > 0) {
> +                s->map_start_index = opts->map_start_index;
> +            }
> +        }
>  
>          if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp)
>              || af_xdp_socket_create(s, opts, errp)) {
> @@ -504,10 +546,17 @@ int net_init_af_xdp(const Netdev *netdev,
>      if (nc0) {
>          s = DO_UPCAST(AFXDPState, nc, nc0);
>          if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || 
> !prog_id) {
> -            error_setg_errno(errp, errno,
> -                             "no XDP program loaded on '%s', ifindex: %d",
> -                             s->ifname, s->ifindex);
> -            goto err;
> +            if (!s->map_path) {
> +                error_setg_errno(errp, errno,
> +                                 "no XDP program loaded on '%s', ifindex: 
> %d",
> +                                 s->ifname, s->ifindex);
> +                goto err;
> +         } else {
> +                warn_report("no XDP program loaded on '%s', ifindex: %d, "
> +                         "only %"PRIi64" XSK socket%s loaded into map %s at 
> this point",
> +                         s->ifname, s->ifindex, queues, queues > 1 ? "s" : 
> "",
> +                         s->map_path);

How a missing program is not an error?  Seems strange.

> +            }
>          }
>      }
>  
> diff --git a/qapi/net.json b/qapi/net.json
> index 310cc4fd19..c750b805e8 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -454,7 +454,7 @@
>  #     (default: 0).
>  #
>  # @inhibit: Don't load a default XDP program, use one already loaded
> -#     to the interface (default: false).  Requires @sock-fds.
> +#     to the interface (default: false).  Requires @sock-fds or @map-path.
>  #
>  # @sock-fds: A colon (:) separated list of file descriptors for
>  #     already open but not bound AF_XDP sockets in the queue order.
> @@ -462,17 +462,25 @@
>  #     into XDP socket map for corresponding queues.  Requires
>  #     @inhibit.
>  #
> +# @map-path: The path to a pinned xsk map to push file descriptors
> +#     for bound AF_XDP sockets into. Requires @inhibit.
> +#
> +# @map-start-index: Use @map-path to insert xsk sockets starting from
> +#     this index number (default: 0). Requires @map-path.

These are new fields that do not exist in the older versions, so
they will need their own 'since' qualifiers.

> +#
>  # Since: 8.2
>  ##
>  { 'struct': 'NetdevAFXDPOptions',
>    'data': {
> -    'ifname':       'str',
> -    '*mode':        'AFXDPMode',
> -    '*force-copy':  'bool',
> -    '*queues':      'int',
> -    '*start-queue': 'int',
> -    '*inhibit':     'bool',
> -    '*sock-fds':    'str' },
> +    'ifname':           'str',
> +    '*mode':            'AFXDPMode',
> +    '*force-copy':      'bool',
> +    '*queues':          'int',
> +    '*start-queue':     'int',
> +    '*inhibit':         'bool',
> +    '*sock-fds':        'str',
> +    '*map-path':        'str',
> +    '*map-start-index': 'int' },
>    'if': 'CONFIG_AF_XDP' }
>  
>  ##


Reply via email to