Add netlink KRT dump filter on Linux to avoid PMTU cache records from FNHE table dump along with KRT.
Linux Kernel added FNHE table dump to the netlink API in patch https://patchwork.ozlabs.org/project/netdev/patch/8d3b68cd37fb5fddc470904cdd6793fcf480c6c1.1561131177.git.sbri...@redhat.com/ The filter mitigates the risk of receiving unknown and potentially large number of FNHE records that would block BIRD I/O in each sync. There is a known issue caused by the GRE tunnels on Linux that seems to be creating one FNHE record for each destination IP address that is routed through the tunnel, even when the PMTU equals to GRE interface MTU (tested with kernel 5.5 - 5.16-rc7). --- sysdep/linux/netlink.c | 44 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index f85bcf35..79414122 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -128,7 +128,7 @@ struct nl_sock uint last_size; }; -#define NL_RX_SIZE 8192 +#define NL_RX_SIZE 32768 #define NL_OP_DELETE 0 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL) @@ -143,11 +143,18 @@ static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */ static void nl_open_sock(struct nl_sock *nl) { + int sndbuf = 32768; + int rcvbuf = 1024*1024; + if (nl->fd < 0) { - nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + nl->fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); if (nl->fd < 0) die("Unable to open rtnetlink socket: %m"); + + setsockopt(nl->fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf)); + setsockopt(nl->fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)); + nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */ nl->rx_buffer = xmalloc(NL_RX_SIZE); nl->last_hdr = NULL; @@ -155,6 +162,12 @@ nl_open_sock(struct nl_sock *nl) } } +static void +nl_set_strict_dump(struct nl_sock *nl, int strict) +{ + setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict)); +} + static void nl_open(void) { @@ -192,6 +205,29 @@ nl_request_dump(int af, int cmd) nl_send(&nl_scan, &req.nh); } +static void +nl_request_dump_rt(int af, int cmd) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rtm; + char buf[128]; + } req = { + .nh.nlmsg_type = cmd, + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .nh.nlmsg_seq = ++(nl_scan.seq), + .nh.nlmsg_pid = 0, + .rtm.rtm_protocol = RTPROT_UNSPEC, + .rtm.rtm_family = af + /* .rtm.rtm_flags is defaults to zero, hence RTM_F_CLONED is not set */ + }; + + send(nl_scan.fd, &req, sizeof(req), 0); + nl_scan.last_hdr = NULL; +} + + static struct nlmsghdr * nl_get_reply(struct nl_sock *nl) { @@ -1864,13 +1900,15 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL struct nl_parse_state s; nl_parse_begin(&s, 1); - nl_request_dump(AF_UNSPEC, RTM_GETROUTE); + nl_set_strict_dump(&nl_scan, 1); + nl_request_dump_rt(AF_UNSPEC, RTM_GETROUTE); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) nl_parse_route(&s, h); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); nl_parse_end(&s); + nl_set_strict_dump(&nl_scan, 0); } /* -- 2.25.1