On Thu, Feb 02, 2023 at 02:49:52PM +0000, Zhang, Qi Z wrote: > > > > -----Original Message----- > > From: Koikkara Reeny, Shibin <shibin.koikkara.re...@intel.com> > > Sent: Thursday, January 19, 2023 11:10 PM > > To: Zhang, Qi Z <qi.z.zh...@intel.com>; dev@dpdk.org; Burakov, Anatoly > > <anatoly.bura...@intel.com>; Richardson, Bruce > > <bruce.richard...@intel.com> > > Cc: Loftus, Ciara <ciara.lof...@intel.com> > > Subject: RE: [PATCH v2] net/af_xdp: AF_XDP PMD CNI Integration > > > > > > > -----Original Message----- > > > From: Zhang, Qi Z <qi.z.zh...@intel.com> > > > Sent: Wednesday, January 18, 2023 12:10 PM > > > To: Koikkara Reeny, Shibin <shibin.koikkara.re...@intel.com>; > > > dev@dpdk.org; Burakov, Anatoly <anatoly.bura...@intel.com>; > > > Richardson, Bruce <bruce.richard...@intel.com> > > > Cc: Loftus, Ciara <ciara.lof...@intel.com> > > > Subject: RE: [PATCH v2] net/af_xdp: AF_XDP PMD CNI Integration > > > > > > > > > > > > > -----Original Message----- > > > > From: Koikkara Reeny, Shibin <shibin.koikkara.re...@intel.com> > > > > Sent: Wednesday, December 14, 2022 11:41 PM > > > > To: dev@dpdk.org; Burakov, Anatoly <anatoly.bura...@intel.com>; > > > > Richardson, Bruce <bruce.richard...@intel.com> > > > > Cc: Loftus, Ciara <ciara.lof...@intel.com>; Zhang, Qi Z > > > > <qi.z.zh...@intel.com>; Koikkara Reeny, Shibin > > > > <shibin.koikkara.re...@intel.com> > > > > Subject: [PATCH v2] net/af_xdp: AF_XDP PMD CNI Integration > > > > > > > > Integrate support for the AF_XDP CNI and device plugin [1] so that > > > > the DPDK AF_XDP PMD can work in an unprivileged container > > environment. > > > > Part of the AF_XDP PMD initialization process involves loading an > > > > eBPF program onto the given netdev. This operation requires > > > > privileges, which prevents the PMD from being able to work in an > > > > unprivileged container (without root access). The plugin CNI handles > > > > the program loading. CNI open Unix Domain Socket (UDS) and waits > > > > listening for a client to make requests over that UDS. The > > > > client(DPDK) connects and a "handshake" occurs, then the File > > > > Descriptor which points to the XSKMAP associated with the loaded > > > > eBPF program is handed over to the client. The client can then > > > > proceed with creating an AF_XDP socket and inserting the socket into > > > > the XSKMAP pointed to by the FD received on the > > > UDS. > > > > > > > > A new vdev arg "use_cni" is created to indicate user wishes to run > > > > the PMD in unprivileged mode and to receive the XSKMAP FD from the > > CNI. > > > > When this flag is set, the XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD > > > > libbpf flag should be used when creating the socket, which tells > > > > libbpf not to load the default libbpf program on the netdev. We tell > > > > libbpf not to do this because the loading is handled by the CNI in this > > scenario. > > > > > > > > [1]: https://github.com/intel/afxdp-plugins-for-kubernetes > > > > > > > > Signed-off-by: Shibin Koikkara Reeny > > > > <shibin.koikkara.re...@intel.com> > > > > --- > > > > drivers/net/af_xdp/rte_eth_af_xdp.c | 337 > > > > +++++++++++++++++++++++++++- > > > > 1 file changed, 325 insertions(+), 12 deletions(-) > > > > > > > > diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c > > > > b/drivers/net/af_xdp/rte_eth_af_xdp.c > > > > index b6ec9bf490..196d98ad97 100644 > > > > --- a/drivers/net/af_xdp/rte_eth_af_xdp.c > > > > +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c > > > > @@ -7,6 +7,7 @@ > > > > #include <string.h> > > > > #include <netinet/in.h> > > > > #include <net/if.h> > > > > +#include <sys/un.h> > > > > #include <sys/socket.h> > > > > #include <sys/ioctl.h> > > > > #include <linux/if_ether.h> > > > > @@ -81,6 +82,24 @@ RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, > > > > NOTICE); > > > > > > > > #define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds" > > > > > > > > +#define MAX_LONG_OPT_SZ 64 > > > > +#define UDS_MAX_FD_NUM 2 > > > > +#define UDS_MAX_CMD_LEN 64 > > > > +#define UDS_MAX_CMD_RESP 128 > > > > +#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd" > > > > +#define UDS_SOCK "/tmp/afxdp.sock" > > > > +#define UDS_CONNECT_MSG "/connect" > > > > +#define UDS_HOST_OK_MSG "/host_ok" > > > > +#define UDS_HOST_NAK_MSG "/host_nak" > > > > +#define UDS_VERSION_MSG "/version" > > > > +#define UDS_XSK_MAP_FD_MSG "/xsk_map_fd" > > > > +#define UDS_XSK_SOCKET_MSG "/xsk_socket" > > > > +#define UDS_FD_ACK_MSG "/fd_ack" > > > > +#define UDS_FD_NAK_MSG "/fd_nak" > > > > +#define UDS_FIN_MSG "/fin" > > > > +#define UDS_FIN_ACK_MSG "/fin_ack" > > > > + > > > > + > > > > static int afxdp_dev_count; > > > > > > > > /* Message header to synchronize fds via IPC */ @@ -151,6 +170,7 @@ > > > > struct pmd_internals { > > > > char prog_path[PATH_MAX]; > > > > bool custom_prog_configured; > > > > bool force_copy; > > > > + bool use_cni; > > > > struct bpf_map *map; > > > > > > > > struct rte_ether_addr eth_addr; > > > > @@ -170,6 +190,7 @@ struct pmd_process_private { > > > > #define ETH_AF_XDP_PROG_ARG "xdp_prog" > > > > #define ETH_AF_XDP_BUDGET_ARG > > > "busy_budget" > > > > #define ETH_AF_XDP_FORCE_COPY_ARG "force_copy" > > > > +#define ETH_AF_XDP_USE_CNI_ARG "use_cni" > > > > > > > > static const char * const valid_arguments[] = { > > > > ETH_AF_XDP_IFACE_ARG, > > > > @@ -179,8 +200,8 @@ static const char * const valid_arguments[] = { > > > > ETH_AF_XDP_PROG_ARG, > > > > ETH_AF_XDP_BUDGET_ARG, > > > > ETH_AF_XDP_FORCE_COPY_ARG, > > > > - NULL > > > > -}; > > > > + ETH_AF_XDP_USE_CNI_ARG, > > > > + NULL}; > > > > > > > > static const struct rte_eth_link pmd_link = { > > > > .link_speed = RTE_ETH_SPEED_NUM_10G, @@ -1129,7 +1150,8 > > @@ > > > > xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, > > > > ret = xsk_umem__create(&umem->umem, base_addr, > > > umem_size, > > > > &rxq->fq, &rxq->cq, &usr_config); > > > > if (ret) { > > > > - AF_XDP_LOG(ERR, "Failed to create umem\n"); > > > > + AF_XDP_LOG(ERR, "Failed to create umem [%d]: > > > > [%s]\n", > > > > + errno, strerror(errno)); > > > > goto err; > > > > } > > > > umem->buffer = base_addr; > > > > @@ -1314,6 +1336,245 @@ configure_preferred_busy_poll(struct > > > > pkt_rx_queue *rxq) > > > > return 0; > > > > } > > > > > > > > +static int > > > > +init_uds_sock(struct sockaddr_un *server) { > > > > + int sock; > > > > + > > > > + sock = socket(AF_UNIX, SOCK_SEQPACKET, 0); > > > > + if (sock < 0) { > > > > + AF_XDP_LOG(ERR, "Failed to opening stream socket\n"); > > > > + return -1; > > > > + } > > > > + > > > > + server->sun_family = AF_UNIX; > > > > + strlcpy(server->sun_path, UDS_SOCK, sizeof(server->sun_path)); > > > > + > > > > + if (connect(sock, (struct sockaddr *)server, sizeof(struct > > > > sockaddr_un)) < 0) { > > > > > > seems the server address is hard coded as "/tmp/afxdp.sock", is any > > > spec we follows, or should we parse this as a devargs? > > > better add some comment or external link that help to explain this > > > > It was already hardcoded in the afxdp-plugins > > https://github.com/intel/afxdp-plugins-for- > > kubernetes/blob/main/constants/constants.go . > > OK, I saw this has been explained in your new doc patch. > > Reviewed-by: Qi Zhang <qi.z.zh...@intel.com> > I would suggest the doc patch should be merged into this patch - code and doc should go together. Please do a v3 with both patches together, thanks.
Assuming Qi is ok with it, you can probably keep his ack on the new version. /Bruce