Add libbpf and libelf dependency in Makefile.
Durring initialization, bpf file "xdpsock_kern.o" will be loaded.
Then the driver will always try to link XDP fd with DRV mode first,
then SKB mode if failed in previoius.
Link will be released during dev_close.

Note: this is workaround solution, af_xdp may remove BPF dependency
in future.

Signed-off-by: Qi Zhang <qi.z.zh...@intel.com>
---
 drivers/net/af_xdp/Makefile         |   6 +-
 drivers/net/af_xdp/bpf_load.c       | 798 ++++++++++++++++++++++++++++++++++++
 drivers/net/af_xdp/bpf_load.h       |  65 +++
 drivers/net/af_xdp/libbpf.h         | 199 +++++++++
 drivers/net/af_xdp/rte_eth_af_xdp.c |  31 +-
 mk/rte.app.mk                       |   2 +-
 6 files changed, 1097 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/af_xdp/bpf_load.c
 create mode 100644 drivers/net/af_xdp/bpf_load.h
 create mode 100644 drivers/net/af_xdp/libbpf.h

diff --git a/drivers/net/af_xdp/Makefile b/drivers/net/af_xdp/Makefile
index ac38e20bf..a642786de 100644
--- a/drivers/net/af_xdp/Makefile
+++ b/drivers/net/af_xdp/Makefile
@@ -42,7 +42,10 @@ EXPORT_MAP := rte_pmd_af_xdp_version.map
 
 LIBABIVER := 1
 
-CFLAGS += -O3 -I/opt/af_xdp/linux_headers/include
+LINUX_HEADER_DIR := /opt/af_xdp/linux_headers/include
+TOOLS_DIR := /root/af_xdp/npg_dna-dna-linux/tools
+
+CFLAGS += -O3 -I$(LINUX_HEADER_DIR) -I$(TOOLS_DIR)/perf -I$(TOOLS_DIR)/include 
-Wno-error=sign-compare -Wno-error=cast-qual
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
@@ -52,5 +55,6 @@ LDLIBS += -lrte_bus_vdev
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += rte_eth_af_xdp.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += bpf_load.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/af_xdp/bpf_load.c b/drivers/net/af_xdp/bpf_load.c
new file mode 100644
index 000000000..aa632207f
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.c
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
+#include <ctype.h>
+#include <assert.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "perf-sys.h"
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+static char license[128];
+static int kern_version;
+static bool processed_sec[128];
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+int map_fd[MAX_MAPS];
+int prog_fd[MAX_PROGS];
+int event_fd[MAX_PROGS];
+int prog_cnt;
+int prog_array_fd = -1;
+
+struct bpf_map_data map_data[MAX_MAPS];
+int map_data_count = 0;
+
+static int populate_prog_array(const char *event, int prog_fd)
+{
+       int ind = atoi(event), err;
+
+       err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
+       if (err < 0) {
+               printf("failed to store prog_fd in prog_array\n");
+               return -1;
+       }
+       return 0;
+}
+
+static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
+{
+       bool is_socket = strncmp(event, "socket", 6) == 0;
+       bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
+       bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+       bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
+       bool is_xdp = strncmp(event, "xdp", 3) == 0;
+       bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
+       bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
+       bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
+       bool is_sockops = strncmp(event, "sockops", 7) == 0;
+       bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
+       size_t insns_cnt = size / sizeof(struct bpf_insn);
+       enum bpf_prog_type prog_type;
+       char buf[256];
+       int fd, efd, err, id;
+       struct perf_event_attr attr = {};
+
+       attr.type = PERF_TYPE_TRACEPOINT;
+       attr.sample_type = PERF_SAMPLE_RAW;
+       attr.sample_period = 1;
+       attr.wakeup_events = 1;
+
+       if (is_socket) {
+               prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+       } else if (is_kprobe || is_kretprobe) {
+               prog_type = BPF_PROG_TYPE_KPROBE;
+       } else if (is_tracepoint) {
+               prog_type = BPF_PROG_TYPE_TRACEPOINT;
+       } else if (is_xdp) {
+               prog_type = BPF_PROG_TYPE_XDP;
+       } else if (is_perf_event) {
+               prog_type = BPF_PROG_TYPE_PERF_EVENT;
+       } else if (is_cgroup_skb) {
+               prog_type = BPF_PROG_TYPE_CGROUP_SKB;
+       } else if (is_cgroup_sk) {
+               prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+       } else if (is_sockops) {
+               prog_type = BPF_PROG_TYPE_SOCK_OPS;
+       } else if (is_sk_skb) {
+               prog_type = BPF_PROG_TYPE_SK_SKB;
+       } else {
+               printf("Unknown event '%s'\n", event);
+               return -1;
+       }
+
+       fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
+                             bpf_log_buf, BPF_LOG_BUF_SIZE);
+       if (fd < 0) {
+               printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
+               return -1;
+       }
+
+       prog_fd[prog_cnt++] = fd;
+
+       if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
+               return 0;
+
+       if (is_socket || is_sockops || is_sk_skb) {
+               if (is_socket)
+                       event += 6;
+               else
+                       event += 7;
+               if (*event != '/')
+                       return 0;
+               event++;
+               if (!isdigit(*event)) {
+                       printf("invalid prog number\n");
+                       return -1;
+               }
+               return populate_prog_array(event, fd);
+       }
+
+       if (is_kprobe || is_kretprobe) {
+               if (is_kprobe)
+                       event += 7;
+               else
+                       event += 10;
+
+               if (*event == 0) {
+                       printf("event name cannot be empty\n");
+                       return -1;
+               }
+
+               if (isdigit(*event))
+                       return populate_prog_array(event, fd);
+
+               snprintf(buf, sizeof(buf),
+                        "echo '%c:%s %s' >> 
/sys/kernel/debug/tracing/kprobe_events",
+                        is_kprobe ? 'p' : 'r', event, event);
+               err = system(buf);
+               if (err < 0) {
+                       printf("failed to create kprobe '%s' error '%s'\n",
+                              event, strerror(errno));
+                       return -1;
+               }
+
+               strcpy(buf, DEBUGFS);
+               strcat(buf, "events/kprobes/");
+               strcat(buf, event);
+               strcat(buf, "/id");
+       } else if (is_tracepoint) {
+               event += 11;
+
+               if (*event == 0) {
+                       printf("event name cannot be empty\n");
+                       return -1;
+               }
+               strcpy(buf, DEBUGFS);
+               strcat(buf, "events/");
+               strcat(buf, event);
+               strcat(buf, "/id");
+       }
+
+       efd = open(buf, O_RDONLY, 0);
+       if (efd < 0) {
+               printf("failed to open event %s\n", event);
+               return -1;
+       }
+
+       err = read(efd, buf, sizeof(buf));
+       if (err < 0 || err >= sizeof(buf)) {
+               printf("read from '%s' failed '%s'\n", event, strerror(errno));
+               return -1;
+       }
+
+       close(efd);
+
+       buf[err] = 0;
+       id = atoi(buf);
+       attr.config = id;
+
+       efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 
0);
+       if (efd < 0) {
+               printf("event %d fd %d err %s\n", id, efd, strerror(errno));
+               return -1;
+       }
+       event_fd[prog_cnt - 1] = efd;
+       err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+       if (err < 0) {
+               printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
+                      strerror(errno));
+               return -1;
+       }
+       err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+       if (err < 0) {
+               printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n",
+                      strerror(errno));
+               return -1;
+       }
+
+       return 0;
+}
+
+static int load_maps(struct bpf_map_data *maps, int nr_maps,
+                    fixup_map_cb fixup_map)
+{
+       int i, numa_node;
+
+       for (i = 0; i < nr_maps; i++) {
+               if (fixup_map) {
+                       fixup_map(&maps[i], i);
+                       /* Allow userspace to assign map FD prior to creation */
+                       if (maps[i].fd != -1) {
+                               map_fd[i] = maps[i].fd;
+                               continue;
+                       }
+               }
+
+               numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ?
+                       maps[i].def.numa_node : -1;
+
+               if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+                   maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+                       int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
+
+                       map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
+                                                       maps[i].name,
+                                                       maps[i].def.key_size,
+                                                       inner_map_fd,
+                                                       maps[i].def.max_entries,
+                                                       maps[i].def.map_flags,
+                                                       numa_node);
+               } else {
+                       map_fd[i] = bpf_create_map_node(maps[i].def.type,
+                                                       maps[i].name,
+                                                       maps[i].def.key_size,
+                                                       maps[i].def.value_size,
+                                                       maps[i].def.max_entries,
+                                                       maps[i].def.map_flags,
+                                                       numa_node);
+               }
+               if (map_fd[i] < 0) {
+                       printf("failed to create a map: %d %s\n",
+                              errno, strerror(errno));
+                       return 1;
+               }
+               maps[i].fd = map_fd[i];
+
+               if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
+                       prog_array_fd = map_fd[i];
+       }
+       return 0;
+}
+
+static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
+                  GElf_Shdr *shdr, Elf_Data **data)
+{
+       Elf_Scn *scn;
+
+       scn = elf_getscn(elf, i);
+       if (!scn)
+               return 1;
+
+       if (gelf_getshdr(scn, shdr) != shdr)
+               return 2;
+
+       *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
+       if (!*shname || !shdr->sh_size)
+               return 3;
+
+       *data = elf_getdata(scn, 0);
+       if (!*data || elf_getdata(scn, *data) != NULL)
+               return 4;
+
+       return 0;
+}
+
+static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
+                               GElf_Shdr *shdr, struct bpf_insn *insn,
+                               struct bpf_map_data *maps, int nr_maps)
+{
+       int i, nrels;
+
+       nrels = shdr->sh_size / shdr->sh_entsize;
+
+       for (i = 0; i < nrels; i++) {
+               GElf_Sym sym;
+               GElf_Rel rel;
+               unsigned int insn_idx;
+               bool match = false;
+               int map_idx;
+
+               gelf_getrel(data, i, &rel);
+
+               insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+
+               gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
+
+               if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+                       printf("invalid relo for insn[%d].code 0x%x\n",
+                              insn_idx, insn[insn_idx].code);
+                       return 1;
+               }
+               insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+
+               /* Match FD relocation against recorded map_data[] offset */
+               for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+                       if (maps[map_idx].elf_offset == sym.st_value) {
+                               match = true;
+                               break;
+                       }
+               }
+               if (match) {
+                       insn[insn_idx].imm = maps[map_idx].fd;
+               } else {
+                       printf("invalid relo for insn[%d] no map_data match\n",
+                              insn_idx);
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+static int cmp_symbols(const void *l, const void *r)
+{
+       const GElf_Sym *lsym = (const GElf_Sym *)l;
+       const GElf_Sym *rsym = (const GElf_Sym *)r;
+
+       if (lsym->st_value < rsym->st_value)
+               return -1;
+       else if (lsym->st_value > rsym->st_value)
+               return 1;
+       else
+               return 0;
+}
+
+static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
+                                Elf *elf, Elf_Data *symbols, int strtabidx)
+{
+       int map_sz_elf, map_sz_copy;
+       bool validate_zero = false;
+       Elf_Data *data_maps;
+       int i, nr_maps;
+       GElf_Sym *sym;
+       Elf_Scn *scn;
+
+       if (maps_shndx < 0)
+               return -EINVAL;
+       if (!symbols)
+               return -EINVAL;
+
+       /* Get data for maps section via elf index */
+       scn = elf_getscn(elf, maps_shndx);
+       if (scn)
+               data_maps = elf_getdata(scn, NULL);
+       if (!scn || !data_maps) {
+               printf("Failed to get Elf_Data from maps section %d\n",
+                      maps_shndx);
+               return -EINVAL;
+       }
+
+       /* For each map get corrosponding symbol table entry */
+       sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
+       for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+               assert(nr_maps < MAX_MAPS+1);
+               if (!gelf_getsym(symbols, i, &sym[nr_maps]))
+                       continue;
+               if (sym[nr_maps].st_shndx != maps_shndx)
+                       continue;
+               /* Only increment iif maps section */
+               nr_maps++;
+       }
+
+       /* Align to map_fd[] order, via sort on offset in sym.st_value */
+       qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
+
+       /* Keeping compatible with ELF maps section changes
+        * ------------------------------------------------
+        * The program size of struct bpf_map_def is known by loader
+        * code, but struct stored in ELF file can be different.
+        *
+        * Unfortunately sym[i].st_size is zero.  To calculate the
+        * struct size stored in the ELF file, assume all struct have
+        * the same size, and simply divide with number of map
+        * symbols.
+        */
+       map_sz_elf = data_maps->d_size / nr_maps;
+       map_sz_copy = sizeof(struct bpf_map_def);
+       if (map_sz_elf < map_sz_copy) {
+               /*
+                * Backward compat, loading older ELF file with
+                * smaller struct, keeping remaining bytes zero.
+                */
+               map_sz_copy = map_sz_elf;
+       } else if (map_sz_elf > map_sz_copy) {
+               /*
+                * Forward compat, loading newer ELF file with larger
+                * struct with unknown features. Assume zero means
+                * feature not used.  Thus, validate rest of struct
+                * data is zero.
+                */
+               validate_zero = true;
+       }
+
+       /* Memcpy relevant part of ELF maps data to loader maps */
+       for (i = 0; i < nr_maps; i++) {
+               unsigned char *addr, *end;
+               struct bpf_map_def *def;
+               const char *map_name;
+               size_t offset;
+
+               map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
+               maps[i].name = strdup(map_name);
+               if (!maps[i].name) {
+                       printf("strdup(%s): %s(%d)\n", map_name,
+                              strerror(errno), errno);
+                       free(sym);
+                       return -errno;
+               }
+
+               /* Symbol value is offset into ELF maps section data area */
+               offset = sym[i].st_value;
+               def = (struct bpf_map_def *)((uint8_t *)data_maps->d_buf + 
offset);
+               maps[i].elf_offset = offset;
+               memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
+               memcpy(&maps[i].def, def, map_sz_copy);
+
+               /* Verify no newer features were requested */
+               if (validate_zero) {
+                       addr = (unsigned char*) def + map_sz_copy;
+                       end  = (unsigned char*) def + map_sz_elf;
+                       for (; addr < end; addr++) {
+                               if (*addr != 0) {
+                                       free(sym);
+                                       return -EFBIG;
+                               }
+                       }
+               }
+       }
+
+       free(sym);
+       return nr_maps;
+}
+
+static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
+{
+       int fd, i, ret, maps_shndx = -1, strtabidx = -1;
+       Elf *elf;
+       GElf_Ehdr ehdr;
+       GElf_Shdr shdr, shdr_prog;
+       Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
+       char *shname, *shname_prog;
+       int nr_maps = 0;
+
+       /* reset global variables */
+       kern_version = 0;
+       memset(license, 0, sizeof(license));
+       memset(processed_sec, 0, sizeof(processed_sec));
+
+       if (elf_version(EV_CURRENT) == EV_NONE)
+               return 1;
+
+       fd = open(path, O_RDONLY, 0);
+       if (fd < 0)
+               return 1;
+
+       elf = elf_begin(fd, ELF_C_READ, NULL);
+
+       if (!elf)
+               return 1;
+
+       if (gelf_getehdr(elf, &ehdr) != &ehdr)
+               return 1;
+
+       /* clear all kprobes */
+       i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+
+       /* scan over all elf sections to get license and map info */
+       for (i = 1; i < ehdr.e_shnum; i++) {
+
+               if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+                       continue;
+
+               if (0) /* helpful for llvm debugging */
+                       printf("section %d:%s data %p size %zd link %d flags 
%d\n",
+                              i, shname, data->d_buf, data->d_size,
+                              shdr.sh_link, (int) shdr.sh_flags);
+
+               if (strcmp(shname, "license") == 0) {
+                       processed_sec[i] = true;
+                       memcpy(license, data->d_buf, data->d_size);
+               } else if (strcmp(shname, "version") == 0) {
+                       processed_sec[i] = true;
+                       if (data->d_size != sizeof(int)) {
+                               printf("invalid size of version section %zd\n",
+                                      data->d_size);
+                               return 1;
+                       }
+                       memcpy(&kern_version, data->d_buf, sizeof(int));
+               } else if (strcmp(shname, "maps") == 0) {
+                       int j;
+
+                       maps_shndx = i;
+                       data_maps = data;
+                       for (j = 0; j < MAX_MAPS; j++)
+                               map_data[j].fd = -1;
+               } else if (shdr.sh_type == SHT_SYMTAB) {
+                       strtabidx = shdr.sh_link;
+                       symbols = data;
+               }
+       }
+
+       ret = 1;
+
+       if (!symbols) {
+               printf("missing SHT_SYMTAB section\n");
+               goto done;
+       }
+
+       if (data_maps) {
+               nr_maps = load_elf_maps_section(map_data, maps_shndx,
+                                               elf, symbols, strtabidx);
+               if (nr_maps < 0) {
+                       printf("Error: Failed loading ELF maps (errno:%d):%s\n",
+                              nr_maps, strerror(-nr_maps));
+                       ret = 1;
+                       goto done;
+               }
+               if (load_maps(map_data, nr_maps, fixup_map))
+                       goto done;
+               map_data_count = nr_maps;
+
+               processed_sec[maps_shndx] = true;
+       }
+
+       /* process all relo sections, and rewrite bpf insns for maps */
+       for (i = 1; i < ehdr.e_shnum; i++) {
+               if (processed_sec[i])
+                       continue;
+
+               if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+                       continue;
+
+               if (shdr.sh_type == SHT_REL) {
+                       struct bpf_insn *insns;
+
+                       /* locate prog sec that need map fixup (relocations) */
+                       if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
+                                   &shdr_prog, &data_prog))
+                               continue;
+
+                       if (shdr_prog.sh_type != SHT_PROGBITS ||
+                           !(shdr_prog.sh_flags & SHF_EXECINSTR))
+                               continue;
+
+                       insns = (struct bpf_insn *) data_prog->d_buf;
+                       processed_sec[i] = true; /* relo section */
+
+                       if (parse_relo_and_apply(data, symbols, &shdr, insns,
+                                                map_data, nr_maps))
+                               continue;
+               }
+       }
+
+       /* load programs */
+       for (i = 1; i < ehdr.e_shnum; i++) {
+
+               if (processed_sec[i])
+                       continue;
+
+               if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+                       continue;
+
+               if (memcmp(shname, "kprobe/", 7) == 0 ||
+                   memcmp(shname, "kretprobe/", 10) == 0 ||
+                   memcmp(shname, "tracepoint/", 11) == 0 ||
+                   memcmp(shname, "xdp", 3) == 0 ||
+                   memcmp(shname, "perf_event", 10) == 0 ||
+                   memcmp(shname, "socket", 6) == 0 ||
+                   memcmp(shname, "cgroup/", 7) == 0 ||
+                   memcmp(shname, "sockops", 7) == 0 ||
+                   memcmp(shname, "sk_skb", 6) == 0) {
+                       ret = load_and_attach(shname, data->d_buf,
+                                             data->d_size);
+                       if (ret != 0)
+                               goto done;
+               }
+       }
+
+       ret = 0;
+done:
+       close(fd);
+       return ret;
+}
+
+int load_bpf_file(const char *path)
+{
+       return do_load_bpf_file(path, NULL);
+}
+
+int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
+{
+       return do_load_bpf_file(path, fixup_map);
+}
+
+void read_trace_pipe(void)
+{
+       int trace_fd;
+
+       trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+       if (trace_fd < 0)
+               return;
+
+       while (1) {
+               static char buf[4096];
+               ssize_t sz;
+
+               sz = read(trace_fd, buf, sizeof(buf));
+               if (sz > 0) {
+                       buf[sz] = 0;
+                       puts(buf);
+               }
+       }
+}
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+       return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+       FILE *f = fopen("/proc/kallsyms", "r");
+       char func[256], buf[256];
+       char symbol;
+       void *addr;
+       int i = 0;
+
+       if (!f)
+               return -ENOENT;
+
+       while (!feof(f)) {
+               if (!fgets(buf, sizeof(buf), f))
+                       break;
+               if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+                       break;
+               if (!addr)
+                       continue;
+               syms[i].addr = (long) addr;
+               syms[i].name = strdup(func);
+               i++;
+       }
+       sym_cnt = i;
+       qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+       return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+       int start = 0, end = sym_cnt;
+       int result;
+
+       while (start < end) {
+               size_t mid = start + (end - start) / 2;
+
+               result = key - syms[mid].addr;
+               if (result < 0)
+                       end = mid;
+               else if (result > 0)
+                       start = mid + 1;
+               else
+                       return &syms[mid];
+       }
+
+       if (start >= 1 && syms[start - 1].addr < key &&
+           key < syms[start].addr)
+               /* valid ksym */
+               return &syms[start - 1];
+
+       /* out of range. return _stext */
+       return &syms[0];
+}
+
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+       struct sockaddr_nl sa;
+       int sock, seq = 0, len, ret = -1;
+       char buf[4096];
+       struct nlattr *nla, *nla_xdp;
+       struct {
+               struct nlmsghdr  nh;
+               struct ifinfomsg ifinfo;
+               char             attrbuf[64];
+       } req;
+       struct nlmsghdr *nh;
+       struct nlmsgerr *err;
+
+       memset(&sa, 0, sizeof(sa));
+       sa.nl_family = AF_NETLINK;
+
+       sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+       if (sock < 0) {
+               printf("open netlink socket: %s\n", strerror(errno));
+               return -1;
+       }
+
+       if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+               printf("bind to netlink: %s\n", strerror(errno));
+               goto cleanup;
+       }
+
+       memset(&req, 0, sizeof(req));
+       req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+       req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       req.nh.nlmsg_type = RTM_SETLINK;
+       req.nh.nlmsg_pid = 0;
+       req.nh.nlmsg_seq = ++seq;
+       req.ifinfo.ifi_family = AF_UNSPEC;
+       req.ifinfo.ifi_index = ifindex;
+
+       /* started nested attribute for XDP */
+       nla = (struct nlattr *)(((char *)&req)
+                               + NLMSG_ALIGN(req.nh.nlmsg_len));
+       nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+       nla->nla_len = NLA_HDRLEN;
+
+       /* add XDP fd */
+       nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+       nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+       nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+       memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+       nla->nla_len += nla_xdp->nla_len;
+
+       /* if user passed in any flags, add those too */
+       if (flags) {
+               nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+               nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
+               nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+               memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+               nla->nla_len += nla_xdp->nla_len;
+       }
+
+       req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+       if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+               printf("send to netlink: %s\n", strerror(errno));
+               goto cleanup;
+       }
+
+       len = recv(sock, buf, sizeof(buf), 0);
+       if (len < 0) {
+               printf("recv from netlink: %s\n", strerror(errno));
+               goto cleanup;
+       }
+
+       for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+            nh = NLMSG_NEXT(nh, len)) {
+               if (nh->nlmsg_pid != getpid()) {
+                       printf("Wrong pid %d, expected %d\n",
+                              nh->nlmsg_pid, getpid());
+                       goto cleanup;
+               }
+               if (nh->nlmsg_seq != seq) {
+                       printf("Wrong seq %d, expected %d\n",
+                              nh->nlmsg_seq, seq);
+                       goto cleanup;
+               }
+               switch (nh->nlmsg_type) {
+               case NLMSG_ERROR:
+                       err = (struct nlmsgerr *)NLMSG_DATA(nh);
+                       if (!err->error)
+                               continue;
+                       printf("nlmsg error %s\n", strerror(-err->error));
+                       goto cleanup;
+               case NLMSG_DONE:
+                       break;
+               }
+       }
+
+       ret = 0;
+
+cleanup:
+       close(sock);
+       return ret;
+}
diff --git a/drivers/net/af_xdp/bpf_load.h b/drivers/net/af_xdp/bpf_load.h
new file mode 100644
index 000000000..5450e8b19
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_LOAD_H
+#define __BPF_LOAD_H
+
+#include "libbpf.h"
+
+#define MAX_MAPS 32
+#define MAX_PROGS 32
+
+struct bpf_map_def {
+       unsigned int type;
+       unsigned int key_size;
+       unsigned int value_size;
+       unsigned int max_entries;
+       unsigned int map_flags;
+       unsigned int inner_map_idx;
+       unsigned int numa_node;
+};
+
+struct bpf_map_data {
+       int fd;
+       char *name;
+       size_t elf_offset;
+       struct bpf_map_def def;
+};
+
+typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
+
+extern int prog_fd[MAX_PROGS];
+extern int event_fd[MAX_PROGS];
+extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
+extern int prog_cnt;
+
+/* There is a one-to-one mapping between map_fd[] and map_data[].
+ * The map_data[] just contains more rich info on the given map.
+ */
+extern int map_fd[MAX_MAPS];
+extern struct bpf_map_data map_data[MAX_MAPS];
+extern int map_data_count;
+
+/* parses elf file compiled by llvm .c->.o
+ * . parses 'maps' section and creates maps via BPF syscall
+ * . parses 'license' section and passes it to syscall
+ * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
+ *   storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
+ * . loads eBPF programs via BPF syscall
+ *
+ * One ELF file can contain multiple BPF programs which will be loaded
+ * and their FDs stored stored in prog_fd array
+ *
+ * returns zero on success
+ */
+int load_bpf_file(const char *path);
+int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
+
+void read_trace_pipe(void);
+struct ksym {
+       long addr;
+       char *name;
+};
+
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+#endif
diff --git a/drivers/net/af_xdp/libbpf.h b/drivers/net/af_xdp/libbpf.h
new file mode 100644
index 000000000..18bfee5aa
--- /dev/null
+++ b/drivers/net/af_xdp/libbpf.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+#include <bpf/bpf.h>
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC)                            \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,        \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = 0,                                     \
+               .imm   = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC)                            \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU | BPF_OP(OP) | BPF_X,          \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = 0,                                     \
+               .imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM)                            \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,        \
+               .dst_reg = DST,                                 \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM)                            \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU | BPF_OP(OP) | BPF_K,          \
+               .dst_reg = DST,                                 \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC)                                        \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU64 | BPF_MOV | BPF_X,           \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = 0,                                     \
+               .imm   = 0 })
+
+#define BPF_MOV32_REG(DST, SRC)                                        \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU | BPF_MOV | BPF_X,             \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = 0,                                     \
+               .imm   = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM)                                        \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU64 | BPF_MOV | BPF_K,           \
+               .dst_reg = DST,                                 \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM)                                        \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU | BPF_MOV | BPF_K,             \
+               .dst_reg = DST,                                 \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)                                 \
+       BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)                                \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_LD | BPF_DW | BPF_IMM,             \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = 0,                                     \
+               .imm   = (__u32) (IMM) }),                      \
+       ((struct bpf_insn) {                                    \
+               .code  = 0, /* zero is reserved opcode */       \
+               .dst_reg = 0,                                   \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD     1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)                             \
+       BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM)                                  \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,     \
+               .dst_reg = 0,                                   \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)                       \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,    \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = OFF,                                   \
+               .imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)                       \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,    \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = OFF,                                   \
+               .imm   = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)                      \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,   \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = OFF,                                   \
+               .imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)                                \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,     \
+               .dst_reg = DST,                                 \
+               .src_reg = 0,                                   \
+               .off   = OFF,                                   \
+               .imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + 
off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF)                         \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_JMP | BPF_OP(OP) | BPF_X,          \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = OFF,                                   \
+               .imm   = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + 
off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)                         \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_JMP | BPF_OP(OP) | BPF_K,          \
+               .dst_reg = DST,                                 \
+               .src_reg = 0,                                   \
+               .off   = OFF,                                   \
+               .imm   = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)                 \
+       ((struct bpf_insn) {                                    \
+               .code  = CODE,                                  \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = OFF,                                   \
+               .imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()                                                \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_JMP | BPF_EXIT,                    \
+               .dst_reg = 0,                                   \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = 0 })
+
+#endif
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index d0939022b..903ca0d01 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -15,6 +15,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_xdp.h>
+#include <linux/if_link.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <sys/types.h>
@@ -24,6 +25,7 @@
 #include <unistd.h>
 #include <poll.h>
 #include "xdpsock_queue.h"
+#include "bpf_load.h"
 
 #ifndef SOL_XDP
 #define SOL_XDP 283
@@ -85,6 +87,8 @@ struct pmd_internals {
        uint16_t port_id;
        uint16_t queue_idx;
        int ring_size;
+
+       uint32_t xdp_flags;
 };
 
 static const char * const valid_arguments[] = {
@@ -382,8 +386,12 @@ eth_stats_reset(struct rte_eth_dev *dev)
 }
 
 static void
-eth_dev_close(struct rte_eth_dev *dev __rte_unused)
+eth_dev_close(struct rte_eth_dev *dev)
 {
+       struct pmd_internals *internals = dev->data->dev_private;
+
+       if (internals->xdp_flags)
+               set_link_xdp_fd(internals->if_index, -1, internals->xdp_flags);
 }
 
 static void
@@ -745,9 +753,25 @@ init_internals(struct rte_vdev_device *dev,
        if (ret)
                goto error_3;
 
+       /* need fix: hard coded bpf file */
+       if (load_bpf_file("xdpsock_kern.o")) {
+               printf("load bpf file failed\n");
+               goto error_3;
+       }
+       RTE_ASSERT(prog_fd[0]);
+
+       if (!set_link_xdp_fd(internals->if_index, prog_fd[0],
+                            XDP_FLAGS_DRV_MODE))
+               internals->xdp_flags = XDP_FLAGS_DRV_MODE;
+       else if (!set_link_xdp_fd(internals->if_index, prog_fd[0],
+                                 XDP_FLAGS_SKB_MODE))
+               internals->xdp_flags = XDP_FLAGS_SKB_MODE;
+       else
+               goto error_3;
+
        eth_dev = rte_eth_vdev_allocate(dev, 0);
        if (!eth_dev)
-               goto error_3;
+               goto error_4;
 
        rte_memcpy(data, eth_dev->data, sizeof(*data));
        internals->port_id = eth_dev->data->port_id;
@@ -765,6 +789,9 @@ init_internals(struct rte_vdev_device *dev,
 
        return 0;
 
+error_4:
+       set_link_xdp_fd(internals->if_index, -1, internals->xdp_flags);
+
 error_3:
        close(internals->sfd);
 
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index bc26e1457..d05e6c0e4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,7 +120,7 @@ ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
 _LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK)  += -lrte_mempool_stack
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET)  += -lrte_pmd_af_packet
-_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP)     += -lrte_pmd_af_xdp
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP)     += -lrte_pmd_af_xdp -lelf -lbpf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD)        += -lrte_pmd_ark
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVF_PMD)        += -lrte_pmd_avf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD)        += -lrte_pmd_avp
-- 
2.13.6

Reply via email to