On 02/13, Peter Oskolkov wrote: > This patch adds a bpf self-test to cover BPF_LWT_ENCAP_IP mode > in bpf_lwt_push_encap. > > Covered: > - encapping in LWT_IN and LWT_XMIT > - IPv4 and IPv6 > > A follow-up patch will add GSO and VRF-enabled tests. > > Signed-off-by: Peter Oskolkov <p...@google.com> > --- > tools/testing/selftests/bpf/Makefile | 3 +- > .../selftests/bpf/progs/test_lwt_ip_encap.c | 85 +++++ > .../selftests/bpf/test_lwt_ip_encap.sh | 311 ++++++++++++++++++ > 3 files changed, 398 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c > create mode 100755 tools/testing/selftests/bpf/test_lwt_ip_encap.sh > > diff --git a/tools/testing/selftests/bpf/Makefile > b/tools/testing/selftests/bpf/Makefile > index c3edf47da05d..ccffaa0a0787 100644 > --- a/tools/testing/selftests/bpf/Makefile > +++ b/tools/testing/selftests/bpf/Makefile > @@ -50,7 +50,8 @@ TEST_PROGS := test_kmod.sh \ > test_lirc_mode2.sh \ > test_skb_cgroup_id.sh \ > test_flow_dissector.sh \ > - test_xdp_vlan.sh > + test_xdp_vlan.sh \ > + test_lwt_ip_encap.sh > > TEST_PROGS_EXTENDED := with_addr.sh \ > with_tunnels.sh \ > diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c > b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c > new file mode 100644 > index 000000000000..c957d6dfe6d7 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c > @@ -0,0 +1,85 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <stddef.h> > +#include <string.h> > +#include <linux/bpf.h> > +#include <linux/ip.h> > +#include <linux/ipv6.h> > +#include "bpf_helpers.h" > +#include "bpf_endian.h" > + > +struct grehdr { > + __be16 flags; > + __be16 protocol; > +}; > + > +SEC("encap_gre") > +int bpf_lwt_encap_gre(struct __sk_buff *skb) > +{ > + struct encap_hdr { > + struct iphdr iph; > + struct grehdr greh; > + } hdr; > + int err; > + > + memset(&hdr, 0, sizeof(struct encap_hdr)); > + > + hdr.iph.ihl = 5; > + hdr.iph.version = 4; > + hdr.iph.ttl = 0x40; > + hdr.iph.protocol = 47; /* IPPROTO_GRE */
[...] > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ > + hdr.iph.saddr = 0x640110ac; /* 172.16.1.100 */ > + hdr.iph.daddr = 0x641010ac; /* 172.16.16.100 */ > +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ > + hdr.iph.saddr = 0xac100164; /* 172.16.1.100 */ > + hdr.iph.daddr = 0xac101064; /* 172.16.16.100 */ > +#else > +#error "Fix your compiler's __BYTE_ORDER__?!" > +#endif Nit, why not just: hdr.iph.saddr = bpf_htonl(0xac100164); /* 172.16.1.100 */ hdr.iph.daddr = bpf_htonl(0xac101064); /* 172.16.16.100 */ ? > + hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr)); > + > + hdr.greh.protocol = skb->protocol; > + > + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, > + sizeof(struct encap_hdr)); > + if (err) > + return BPF_DROP; > + > + return BPF_LWT_REROUTE; > +} > + > +SEC("encap_gre6") > +int bpf_lwt_encap_gre6(struct __sk_buff *skb) > +{ > + struct encap_hdr { > + struct ipv6hdr ip6hdr; > + struct grehdr greh; > + } hdr; > + int err; > + > + memset(&hdr, 0, sizeof(struct encap_hdr)); > + > + hdr.ip6hdr.version = 6; > + hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr)); > + hdr.ip6hdr.nexthdr = 47; /* IPPROTO_GRE */ > + hdr.ip6hdr.hop_limit = 0x40; > + /* fb01::1 */ > + hdr.ip6hdr.saddr.s6_addr[0] = 0xfb; > + hdr.ip6hdr.saddr.s6_addr[1] = 1; > + hdr.ip6hdr.saddr.s6_addr[15] = 1; > + /* fb10::1 */ > + hdr.ip6hdr.daddr.s6_addr[0] = 0xfb; > + hdr.ip6hdr.daddr.s6_addr[1] = 0x10; > + hdr.ip6hdr.daddr.s6_addr[15] = 1; > + > + hdr.greh.protocol = skb->protocol; > + > + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, > + sizeof(struct encap_hdr)); > + if (err) > + return BPF_DROP; > + > + return BPF_LWT_REROUTE; > +} > + > +char _license[] SEC("license") = "GPL"; > diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh > b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh > new file mode 100755 > index 000000000000..4ca714e23ab0 > --- /dev/null > +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh > @@ -0,0 +1,311 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# > +# Setup/topology: > +# > +# NS1 NS2 NS3 > +# veth1 <---> veth2 veth3 <---> veth4 (the top route) > +# veth5 <---> veth6 veth7 <---> veth8 (the bottom route) > +# > +# each vethN gets IPv[4|6]_N address > +# > +# IPv*_SRC = IPv*_1 > +# IPv*_DST = IPv*_4 > +# > +# all tests test pings from IPv*_SRC to IPv*_DST > +# > +# by default, routes are configured to allow packets to go > +# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route) > +# > +# a GRE device is installed in NS3 with IPv*_GRE, and > +# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8 > +# (the bottom route) > +# > +# Tests: > +# > +# 1. routes NS2->IPv*_DST are brought down, so the only way a ping > +# from IP*_SRC to IP*_DST can work is via IPv*_GRE > +# > +# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1 > +# that encaps the packets with an IP/GRE header to route to IPv*_GRE > +# > +# ping: SRC->[encap at veth1:egress]->GRE:decap->DST > +# ping replies go DST->SRC directly > +# > +# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2 > +# that encaps the packets with an IP/GRE header to route to IPv*_GRE > +# > +# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST > +# ping replies go DST->SRC directly > + > +set -e # exit on error > + > +if [[ $EUID -ne 0 ]]; then > + echo "This script must be run as root" > + echo "FAIL" > + exit 1 > +fi > + > +readonly NS1="ns1-$(mktemp -u XXXXXX)" > +readonly NS2="ns2-$(mktemp -u XXXXXX)" > +readonly NS3="ns3-$(mktemp -u XXXXXX)" > + > +readonly IPv4_1="172.16.1.100" > +readonly IPv4_2="172.16.2.100" > +readonly IPv4_3="172.16.3.100" > +readonly IPv4_4="172.16.4.100" > +readonly IPv4_5="172.16.5.100" > +readonly IPv4_6="172.16.6.100" > +readonly IPv4_7="172.16.7.100" > +readonly IPv4_8="172.16.8.100" > +readonly IPv4_GRE="172.16.16.100" > + > +readonly IPv4_SRC=$IPv4_1 > +readonly IPv4_DST=$IPv4_4 > + > +readonly IPv6_1="fb01::1" > +readonly IPv6_2="fb02::1" > +readonly IPv6_3="fb03::1" > +readonly IPv6_4="fb04::1" > +readonly IPv6_5="fb05::1" > +readonly IPv6_6="fb06::1" > +readonly IPv6_7="fb07::1" > +readonly IPv6_8="fb08::1" > +readonly IPv6_GRE="fb10::1" > + > +readonly IPv6_SRC=$IPv6_1 > +readonly IPv6_DST=$IPv6_4 > + > +setup() { > +set -e # exit on error > + # create devices and namespaces > + ip netns add "${NS1}" > + ip netns add "${NS2}" > + ip netns add "${NS3}" > + > + ip link add veth1 type veth peer name veth2 > + ip link add veth3 type veth peer name veth4 > + ip link add veth5 type veth peer name veth6 > + ip link add veth7 type veth peer name veth8 > + > + ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1 > + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1 > + > + ip link set veth1 netns ${NS1} > + ip link set veth2 netns ${NS2} > + ip link set veth3 netns ${NS2} > + ip link set veth4 netns ${NS3} > + ip link set veth5 netns ${NS1} > + ip link set veth6 netns ${NS2} > + ip link set veth7 netns ${NS2} > + ip link set veth8 netns ${NS3} > + > + # configure addesses: the top route (1-2-3-4) > + ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1 > + ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2 > + ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3 > + ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4 > + ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1 > + ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2 > + ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3 > + ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4 > + > + # configure addresses: the bottom route (5-6-7-8) > + ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5 > + ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6 > + ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7 > + ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8 > + ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5 > + ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6 > + ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7 > + ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8 > + > + > + ip -netns ${NS1} link set dev veth1 up > + ip -netns ${NS2} link set dev veth2 up > + ip -netns ${NS2} link set dev veth3 up > + ip -netns ${NS3} link set dev veth4 up > + ip -netns ${NS1} link set dev veth5 up > + ip -netns ${NS2} link set dev veth6 up > + ip -netns ${NS2} link set dev veth7 up > + ip -netns ${NS3} link set dev veth8 up > + > + # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default; > + # the bottom route to specific bottom addresses > + > + # NS1 > + # top route > + ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 > + ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} # go top > by default > + ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 > + ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} # go top > by default > + # bottom route > + ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 > + ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} > + ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} > + ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 > + ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} > + ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} > + > + # NS2 > + # top route > + ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 > + ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 > + ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 > + ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 > + # bottom route > + ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 > + ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 > + ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 > + ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 > + > + # NS3 > + # top route > + ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4 > + ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3} > + ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3} > + ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4 > + ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3} > + ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3} > + # bottom route > + ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8 > + ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7} > + ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7} > + ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8 > + ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7} > + ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7} > + > + # configure IPv4 GRE device in NS3, and a route to it via the "bottom" > route > + ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local > ${IPv4_GRE} ttl 255 > + ip -netns ${NS3} link set gre_dev up > + ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev > + ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} > + ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} > + > + > + # configure IPv6 GRE device in NS3, and a route to it via the "bottom" > route > + ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote > ${IPv6_1} local ${IPv6_GRE} ttl 255 > + ip -netns ${NS3} link set gre6_dev up > + ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev > + ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} > + ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} > + > + # rp_filter gets confused by what these tests are doing, so disable it > + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 > + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 > + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 > +} > + > +cleanup() { > + ip netns del ${NS1} 2> /dev/null > + ip netns del ${NS2} 2> /dev/null > + ip netns del ${NS3} 2> /dev/null > +} > + > +trap cleanup EXIT > + > +test_ping() { > + local readonly PROTO=$1 > + local readonly EXPECTED=$2 > + local RET=0 > + > + set +e > + if [ "${PROTO}" == "IPv4" ] ; then > + ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} > 2>&1 > /dev/null > + RET=$? > + elif [ "${PROTO}" == "IPv6" ] ; then > + ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} > 2>&1 > /dev/null > + RET=$? > + else > + echo "test_ping: unknown PROTO: ${PROTO}" > + exit 1 > + fi > + set -e > + > + if [ "0" != "${RET}" ]; then > + RET=1 > + fi > + > + if [ "${EXPECTED}" != "${RET}" ] ; then > + echo "FAIL: test_ping: ${RET}" > + exit 1 > + fi > +} > + > +test_egress() { > + local readonly ENCAP=$1 > + echo "starting egress ${ENCAP} encap test" > + setup > + > + # need to wait a bit for IPv6 to autoconf, otherwise > + # ping6 sometimes fails with "unable to bind to address" > + > + # by default, pings work > + test_ping IPv4 0 > + test_ping IPv6 0 > + > + # remove NS2->DST routes, ping fails > + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 > + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 > + test_ping IPv4 1 > + test_ping IPv6 1 > + > + # install replacement routes (LWT/eBPF), pings succeed > + if [ "${ENCAP}" == "IPv4" ] ; then > + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj > test_lwt_ip_encap.o sec encap_gre dev veth1 > + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj > test_lwt_ip_encap.o sec encap_gre dev veth1 > + elif [ "${ENCAP}" == "IPv6" ] ; then > + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj > test_lwt_ip_encap.o sec encap_gre6 dev veth1 > + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj > test_lwt_ip_encap.o sec encap_gre6 dev veth1 > + else > + echo "FAIL: unknown encap ${ENCAP}" > + fi > + test_ping IPv4 0 > + test_ping IPv6 0 > + > + cleanup > + echo "PASS" > +} > + > +test_ingress() { > + local readonly ENCAP=$1 > + echo "starting ingress ${ENCAP} encap test" > + setup > + > + # need to wait a bit for IPv6 to autoconf, otherwise > + # ping6 sometimes fails with "unable to bind to address" > + > + # by default, pings work > + test_ping IPv4 0 > + test_ping IPv6 0 > + > + # remove NS2->DST routes, pings fail > + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 > + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 > + test_ping IPv4 1 > + test_ping IPv6 1 > + > + # install replacement routes (LWT/eBPF), pings succeed > + if [ "${ENCAP}" == "IPv4" ] ; then > + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj > test_lwt_ip_encap.o sec encap_gre dev veth2 > + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj > test_lwt_ip_encap.o sec encap_gre dev veth2 > + elif [ "${ENCAP}" == "IPv6" ] ; then > + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj > test_lwt_ip_encap.o sec encap_gre6 dev veth2 > + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj > test_lwt_ip_encap.o sec encap_gre6 dev veth2 > + else > + echo "FAIL: unknown encap ${ENCAP}" > + fi > + test_ping IPv4 0 > + test_ping IPv6 0 > + > + cleanup > + echo "PASS" > +} > + > +test_egress IPv4 > +test_egress IPv6 > + > +test_ingress IPv4 > +test_ingress IPv6 > + > +echo "all tests passed" > -- > 2.20.1.791.gb4d0f1c61a-goog >