On 6/24/20 5:33 PM, Lorenzo Bianconi wrote:
Introduce XDP_REDIRECT support for eBPF programs attached to cpumap
entries.
This patch has been tested on Marvell ESPRESSObin using a modified
version of xdp_redirect_cpu sample in order to attach a XDP program
to CPUMAP entries to perform a redirect on the mvneta interface.
In particular the following scenario has been tested:

rq (cpu0) --> mvneta - XDP_REDIRECT (cpu0) --> CPUMAP - XDP_REDIRECT (cpu1) --> 
mvneta

$./xdp_redirect_cpu -p xdp_cpu_map0 -d eth0 -c 1 -e xdp_redirect \
        -f xdp_redirect_kern.o -m tx_port -r eth0

tx: 285.2 Kpps rx: 285.2 Kpps

Attaching a simple XDP program on eth0 to perform XDP_TX gives
comparable results:

tx: 288.4 Kpps rx: 288.4 Kpps

Acked-by: Jesper Dangaard Brouer <bro...@redhat.com>
Co-developed-by: Jesper Dangaard Brouer <bro...@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com>
Signed-off-by: Lorenzo Bianconi <lore...@kernel.org>
---
  include/net/xdp.h          |  1 +
  include/trace/events/xdp.h |  6 ++++--
  kernel/bpf/cpumap.c        | 17 +++++++++++++++--
  3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 83b9e0142b52..5be0d4d65b94 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -99,6 +99,7 @@ struct xdp_frame {
  };
struct xdp_cpumap_stats {
+       unsigned int redirect;
        unsigned int pass;
        unsigned int drop;
  };
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index e2c99f5bee39..cd24e8a59529 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -190,6 +190,7 @@ TRACE_EVENT(xdp_cpumap_kthread,
                __field(int, sched)
                __field(unsigned int, xdp_pass)
                __field(unsigned int, xdp_drop)
+               __field(unsigned int, xdp_redirect)
        ),
TP_fast_assign(
@@ -201,18 +202,19 @@ TRACE_EVENT(xdp_cpumap_kthread,
                __entry->sched       = sched;
                __entry->xdp_pass    = xdp_stats->pass;
                __entry->xdp_drop    = xdp_stats->drop;
+               __entry->xdp_redirect        = xdp_stats->redirect;
        ),
TP_printk("kthread"
                  " cpu=%d map_id=%d action=%s"
                  " processed=%u drops=%u"
                  " sched=%d"
-                 " xdp_pass=%u xdp_drop=%u",
+                 " xdp_pass=%u xdp_drop=%u xdp_redirect=%u",
                  __entry->cpu, __entry->map_id,
                  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
                  __entry->processed, __entry->drops,
                  __entry->sched,
-                 __entry->xdp_pass, __entry->xdp_drop)
+                 __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect)
  );
TRACE_EVENT(xdp_cpumap_enqueue,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 4e4cd240f07b..c0b2f265ccb2 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -240,7 +240,7 @@ static int cpu_map_bpf_prog_run_xdp(struct 
bpf_cpu_map_entry *rcpu,
        xdp_set_return_frame_no_direct();
        xdp.rxq = &rxq;
- rcu_read_lock();
+       rcu_read_lock_bh();
prog = READ_ONCE(rcpu->prog);
        for (i = 0; i < n; i++) {
@@ -266,6 +266,16 @@ static int cpu_map_bpf_prog_run_xdp(struct 
bpf_cpu_map_entry *rcpu,
                                stats->pass++;
                        }
                        break;
+               case XDP_REDIRECT:
+                       err = xdp_do_redirect(xdpf->dev_rx, &xdp,
+                                             prog);
+                       if (unlikely(err)) {
+                               xdp_return_frame(xdpf);
+                               stats->drop++;
+                       } else {
+                               stats->redirect++;
+                       }

Could we do better with all the accounting and do this from /inside/ BPF 
tracing prog
instead (otherwise too bad we need to have it here even if the tracepoint is 
disabled)?

+                       break;
                default:
                        bpf_warn_invalid_xdp_action(act);
                        /* fallthrough */
@@ -276,7 +286,10 @@ static int cpu_map_bpf_prog_run_xdp(struct 
bpf_cpu_map_entry *rcpu,
                }
        }
- rcu_read_unlock();
+       if (stats->redirect)
+               xdp_do_flush_map();
+
+       rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
        xdp_clear_return_frame_no_direct();

Hm, this looks incorrect. Why do you call the 
xdp_clear_return_frame_no_direct() /after/
the possibility where there is a rescheduling point for softirq?

        return nframes;


Reply via email to