Author: glebius Date: Thu Mar 14 22:52:16 2019 New Revision: 345166 URL: https://svnweb.freebsd.org/changeset/base/345166
Log: PFIL_MEMPTR for ipfw link level hook With new pfil(9) KPI it is possible to pass a void pointer with length instead of mbuf pointer to a packet filter. Until this commit no filters supported that, so pfil run through a shim function pfil_fake_mbuf(). Now the ipfw(4) hook named "default-link", that is instantiated when net.link.ether.ipfw sysctl is on, supports processing pointer/length packets natively. - ip_fw_args now has union for either mbuf or void *, and if flags have non-zero length, then we use the void *. - through ipfw_chk() we handle mem/mbuf cases differently. - ether_header goes away from args. It is ipfw_chk() responsibility to do parsing of Ethernet header. - ipfw_log() now uses different bpf APIs to log packets. Although ipfw_chk() is now capable to process pointer/length packets, this commit adds support for the link level hook only, see ipfw_check_frame(). Potentially the IP processing hook ipfw_check_packet() can be improved too, but that requires more changes since the hook supports more complex actions: NAT, divert, etc. Reviewed by: ae Differential Revision: https://reviews.freebsd.org/D19357 Modified: head/sys/netpfil/ipfw/ip_fw2.c head/sys/netpfil/ipfw/ip_fw_bpf.c head/sys/netpfil/ipfw/ip_fw_log.c head/sys/netpfil/ipfw/ip_fw_pfil.c head/sys/netpfil/ipfw/ip_fw_private.h Modified: head/sys/netpfil/ipfw/ip_fw2.c ============================================================================== --- head/sys/netpfil/ipfw/ip_fw2.c Thu Mar 14 22:32:50 2019 (r345165) +++ head/sys/netpfil/ipfw/ip_fw2.c Thu Mar 14 22:52:16 2019 (r345166) @@ -1258,7 +1258,6 @@ jump_linear(struct ip_fw_chain *chain, struct ip_fw *f * * args->m (in/out) The packet; we set to NULL when/if we nuke it. * Starts with the IP header. - * args->eh (in) Mac header if present, NULL for layer3 packet. * args->L3offset Number of bytes bypassed if we came from L2. * e.g. often sizeof(eh) ** NOTYET ** * args->ifp Incoming or outgoing interface. @@ -1297,23 +1296,19 @@ ipfw_chk(struct ip_fw_args *args) * the implementation of the various instructions to make sure * that they still work. * - * args->eh The MAC header. It is non-null for a layer2 - * packet, it is NULL for a layer-3 packet. - * **notyet** - * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. - * * m | args->m Pointer to the mbuf, as received from the caller. * It may change if ipfw_chk() does an m_pullup, or if it * consumes the packet because it calls send_reject(). * XXX This has to change, so that ipfw_chk() never modifies * or consumes the buffer. - * ip is the beginning of the ip(4 or 6) header. - * Calculated by adding the L3offset to the start of data. - * (Until we start using L3offset, the packet is - * supposed to start with the ip header). + * OR + * args->mem Pointer to contigous memory chunk. + * ip Is the beginning of the ip(4 or 6) header. + * eh Ethernet header in case if input is Layer2. */ - struct mbuf *m = args->m; - struct ip *ip = mtod(m, struct ip *); + struct mbuf *m; + struct ip *ip; + struct ether_header *eh; /* * For rules which contain uid/gid or jail constraints, cache @@ -1370,7 +1365,6 @@ ipfw_chk(struct ip_fw_args *args) struct in_addr src_ip, dst_ip; /* NOTE: network format */ int iplen = 0; int pktlen; - uint16_t etype; /* Host order stored ether type */ struct ipfw_dyn_info dyn_info; struct ip_fw *q = NULL; @@ -1394,14 +1388,45 @@ ipfw_chk(struct ip_fw_args *args) int done = 0; /* flag to exit the outer loop */ IPFW_RLOCK_TRACKER; + bool mem; - if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) - return (IP_FW_PASS); /* accept */ + if ((mem = (args->flags & IPFW_ARGS_LENMASK))) { + if (args->flags & IPFW_ARGS_ETHER) { + eh = (struct ether_header *)args->mem; + if (eh->ether_type == htons(ETHERTYPE_VLAN)) + ip = (struct ip *) + ((struct ether_vlan_header *)eh + 1); + else + ip = (struct ip *)(eh + 1); + } else { + eh = NULL; + ip = (struct ip *)args->mem; + } + pktlen = IPFW_ARGS_LENGTH(args->flags); + args->f_id.fib = args->ifp->if_fib; /* best guess */ + } else { + m = args->m; + if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) + return (IP_FW_PASS); /* accept */ + if (args->flags & IPFW_ARGS_ETHER) { + /* We need some amount of data to be contiguous. */ + if (m->m_len < min(m->m_pkthdr.len, max_protohdr) && + (args->m = m = m_pullup(m, min(m->m_pkthdr.len, + max_protohdr))) == NULL) + goto pullup_failed; + eh = mtod(m, struct ether_header *); + ip = (struct ip *)(eh + 1); + } else { + eh = NULL; + ip = mtod(m, struct ip *); + } + pktlen = m->m_pkthdr.len; + args->f_id.fib = M_GETFIB(m); /* mbuf not altered */ + } dst_ip.s_addr = 0; /* make sure it is initialized */ src_ip.s_addr = 0; /* make sure it is initialized */ src_port = dst_port = 0; - pktlen = m->m_pkthdr.len; DYN_INFO_INIT(&dyn_info); /* @@ -1411,28 +1436,41 @@ ipfw_chk(struct ip_fw_args *args) * this way). */ #define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) +#define EHLEN (eh != NULL ? ((char *)ip - (char *)eh) : 0) #define PULLUP_LEN(_len, p, T) \ do { \ - int x = (_len) + T; \ - if ((m)->m_len < x) { \ - args->m = m = m_pullup(m, x); \ - if (m == NULL) \ - goto pullup_failed; \ + int x = (_len) + T + EHLEN; \ + if (mem) { \ + MPASS(pktlen >= x); \ + p = (char *)args->mem + (_len) + EHLEN; \ + } else { \ + if (__predict_false((m)->m_len < x)) { \ + args->m = m = m_pullup(m, x); \ + if (m == NULL) \ + goto pullup_failed; \ + } \ + p = mtod(m, char *) + (_len) + EHLEN; \ } \ - p = (mtod(m, char *) + (_len)); \ } while (0) +/* + * In case pointers got stale after pullups, update them. + */ +#define UPDATE_POINTERS() \ +do { \ + if (!mem) { \ + if (eh != NULL) { \ + eh = mtod(m, struct ether_header *); \ + ip = (struct ip *)(eh + 1); \ + } else \ + ip = mtod(m, struct ip *); \ + args->m = m; \ + } \ +} while (0) - /* - * if we have an ether header, - */ - if (args->flags & IPFW_ARGS_ETHER) - etype = ntohs(args->eh->ether_type); - else - etype = 0; - /* Identify IP packets and fill up variables. */ if (pktlen >= sizeof(struct ip6_hdr) && - (etype == 0 || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { + (eh == NULL || eh->ether_type == htons(ETHERTYPE_IPV6)) && + ip->ip_v == 6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; is_ipv6 = 1; @@ -1609,7 +1647,7 @@ do { \ break; } /*switch */ } - ip = mtod(m, struct ip *); + UPDATE_POINTERS(); ip6 = (struct ip6_hdr *)ip; args->f_id.addr_type = 6; args->f_id.src_ip6 = ip6->ip6_src; @@ -1617,7 +1655,8 @@ do { \ args->f_id.flow_id6 = ntohl(ip6->ip6_flow); iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6); } else if (pktlen >= sizeof(struct ip) && - (etype == 0 || etype == ETHERTYPE_IP) && ip->ip_v == 4) { + (eh == NULL || eh->ether_type == htons(ETHERTYPE_IP)) && + ip->ip_v == 4) { is_ipv4 = 1; args->flags |= IPFW_ARGS_IP4; hlen = ip->ip_hl << 2; @@ -1675,7 +1714,7 @@ do { \ } } - ip = mtod(m, struct ip *); + UPDATE_POINTERS(); args->f_id.addr_type = 4; args->f_id.src_ip = ntohl(src_ip.s_addr); args->f_id.dst_ip = ntohl(dst_ip.s_addr); @@ -1692,7 +1731,6 @@ do { \ args->f_id.proto = proto; args->f_id.src_port = src_port = ntohs(src_port); args->f_id.dst_port = dst_port = ntohs(dst_port); - args->f_id.fib = M_GETFIB(m); IPFW_PF_RLOCK(chain); if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ @@ -1720,7 +1758,7 @@ do { \ oif = NULL; } else { MPASS(args->flags & IPFW_ARGS_OUT); - iif = m->m_pkthdr.rcvif; + iif = mem ? NULL : m->m_pkthdr.rcvif; oif = args->ifp; } @@ -1840,7 +1878,7 @@ do { \ ((ipfw_insn_mac *)cmd)->addr; u_int32_t *mask = (u_int32_t *) ((ipfw_insn_mac *)cmd)->mask; - u_int32_t *hdr = (u_int32_t *)args->eh; + u_int32_t *hdr = (u_int32_t *)eh; match = ( want[0] == (hdr[0] & mask[0]) && @@ -1857,8 +1895,11 @@ do { \ for (i = cmdlen - 1; !match && i>0; i--, p += 2) - match = (etype >= p[0] && - etype <= p[1]); + match = + (ntohs(eh->ether_type) >= + p[0] && + ntohs(eh->ether_type) <= + p[1]); } break; @@ -2332,7 +2373,7 @@ do { \ } case O_LOG: - ipfw_log(chain, f, hlen, args, m, + ipfw_log(chain, f, hlen, args, offset | ip6f_mf, tablearg, ip); match = 1; break; Modified: head/sys/netpfil/ipfw/ip_fw_bpf.c ============================================================================== --- head/sys/netpfil/ipfw/ip_fw_bpf.c Thu Mar 14 22:32:50 2019 (r345165) +++ head/sys/netpfil/ipfw/ip_fw_bpf.c Thu Mar 14 22:52:16 2019 (r345166) @@ -161,6 +161,28 @@ ipfwlog_clone_create(struct if_clone *ifc, int unit, c } void +ipfw_bpf_tap(u_char *pkt, u_int pktlen) +{ + LOGIF_RLOCK_TRACKER; + + LOGIF_RLOCK(); + if (V_log_if != NULL) + BPF_TAP(V_log_if, pkt, pktlen); + LOGIF_RUNLOCK(); +} + +void +ipfw_bpf_mtap(struct mbuf *m) +{ + LOGIF_RLOCK_TRACKER; + + LOGIF_RLOCK(); + if (V_log_if != NULL) + BPF_MTAP(V_log_if, m); + LOGIF_RUNLOCK(); +} + +void ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m) { struct ifnet *logif; Modified: head/sys/netpfil/ipfw/ip_fw_log.c ============================================================================== --- head/sys/netpfil/ipfw/ip_fw_log.c Thu Mar 14 22:32:50 2019 (r345165) +++ head/sys/netpfil/ipfw/ip_fw_log.c Thu Mar 14 22:52:16 2019 (r345166) @@ -99,30 +99,32 @@ __FBSDID("$FreeBSD$"); */ void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, - struct ip_fw_args *args, struct mbuf *m, - u_short offset, uint32_t tablearg, struct ip *ip) + struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip) { char *action; int limit_reached = 0; char action2[92], proto[128], fragment[32]; if (V_fw_verbose == 0) { - if (args->flags & IPFW_ARGS_ETHER) /* layer2, use orig hdr */ - ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m); + if (args->flags & IPFW_ARGS_LENMASK) + ipfw_bpf_tap(args->mem, IPFW_ARGS_LENGTH(args->flags)); + else if (args->flags & IPFW_ARGS_ETHER) + /* layer2, use orig hdr */ + ipfw_bpf_mtap(args->m); else { /* Add fake header. Later we will store * more info in the header. */ if (ip->ip_v == 4) ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00", - ETHER_HDR_LEN, m); + ETHER_HDR_LEN, args->m); else if (ip->ip_v == 6) ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd", - ETHER_HDR_LEN, m); + ETHER_HDR_LEN, args->m); else /* Obviously bogus EtherType. */ ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff", - ETHER_HDR_LEN, m); + ETHER_HDR_LEN, args->m); } return; } Modified: head/sys/netpfil/ipfw/ip_fw_pfil.c ============================================================================== --- head/sys/netpfil/ipfw/ip_fw_pfil.c Thu Mar 14 22:32:50 2019 (r345165) +++ head/sys/netpfil/ipfw/ip_fw_pfil.c Thu Mar 14 22:52:16 2019 (r345166) @@ -328,69 +328,50 @@ again: * ipfw processing for ethernet packets (in and out). */ static pfil_return_t -ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags, +ipfw_check_frame(pfil_packet_t p, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; - struct ether_header save_eh; - struct ether_header *eh; - struct m_tag *mtag; - struct mbuf *m; pfil_return_t ret; - int i; + bool mem, realloc; + int ipfw; - args.flags = IPFW_ARGS_ETHER; - args.flags |= (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT; -again: - /* fetch start point from rule, if any. remove the tag if present. */ - mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL); - if (mtag != NULL) { - args.rule = *((struct ipfw_rule_ref *)(mtag+1)); - m_tag_delete(*m0, mtag); - if (args.rule.info & IPFW_ONEPASS) - return (0); - args.flags |= IPFW_ARGS_REF; + if (flags & PFIL_MEMPTR) { + mem = true; + realloc = false; + args.flags = PFIL_LENGTH(flags) | IPFW_ARGS_ETHER; + args.mem = p.mem; + } else { + mem = realloc = false; + args.flags = IPFW_ARGS_ETHER; } - - /* I need some amt of data to be contiguous */ - m = *m0; - i = min(m->m_pkthdr.len, max_protohdr); - if (m->m_len < i) { - m = m_pullup(m, i); - if (m == NULL) { - *m0 = m; - return (0); - } - } - eh = mtod(m, struct ether_header *); - save_eh = *eh; /* save copy for restore below */ - m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ - - args.m = m; /* the packet we are looking at */ + args.flags |= (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT; args.ifp = ifp; - args.eh = &save_eh; /* MAC header for bridged/MAC packets */ - args.inp = inp; /* used by ipfw uid/gid/jail rules */ - i = ipfw_chk(&args); - m = args.m; - if (m != NULL) { + args.inp = inp; + +again: + if (!mem) { /* - * Restore Ethernet header, as needed, in case the - * mbuf chain was replaced by ipfw. + * Fetch start point from rule, if any. + * Remove the tag if present. */ - M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); - if (m == NULL) { - *m0 = NULL; - return (0); + struct m_tag *mtag; + + mtag = m_tag_locate(*p.m, MTAG_IPFW_RULE, 0, NULL); + if (mtag != NULL) { + args.rule = *((struct ipfw_rule_ref *)(mtag+1)); + m_tag_delete(*p.m, mtag); + if (args.rule.info & IPFW_ONEPASS) + return (PFIL_PASS); + args.flags |= IPFW_ARGS_REF; } - if (eh != mtod(m, struct ether_header *)) - bcopy(&save_eh, mtod(m, struct ether_header *), - ETHER_HDR_LEN); + args.m = *p.m; } - *m0 = m; + ipfw = ipfw_chk(&args); + ret = PFIL_PASS; - /* Check result of ipfw_chk() */ - switch (i) { + switch (ipfw) { case IP_FW_PASS: break; @@ -403,9 +384,16 @@ again: ret = PFIL_DROPPED; break; } - *m0 = NULL; + if (mem) { + if (pfil_realloc(&p, flags, ifp) != 0) { + ret = PFIL_DROPPED; + break; + } + mem = false; + realloc = true; + } MPASS(args.flags & IPFW_ARGS_REF); - ip_dn_io_ptr(&m, &args); + ip_dn_io_ptr(p.m, &args); return (PFIL_CONSUMED); case IP_FW_NGTEE: @@ -414,9 +402,17 @@ again: ret = PFIL_DROPPED; break; } + if (mem) { + if (pfil_realloc(&p, flags, ifp) != 0) { + ret = PFIL_DROPPED; + break; + } + mem = false; + realloc = true; + } MPASS(args.flags & IPFW_ARGS_REF); - (void )ng_ipfw_input_p(m0, &args, i == IP_FW_NGTEE); - if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */ + (void )ng_ipfw_input_p(p.m, &args, ipfw == IP_FW_NGTEE); + if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ ret = PFIL_CONSUMED; break; @@ -425,12 +421,15 @@ again: KASSERT(0, ("%s: unknown retval", __func__)); } - if (ret != PFIL_PASS) { - if (*m0) - FREE_PKT(*m0); - *m0 = NULL; + if (!mem && ret != PFIL_PASS) { + if (*p.m) + FREE_PKT(*p.m); + *p.m = NULL; } + if (realloc && ret == PFIL_PASS) + ret = PFIL_REALLOCED; + return (ret); } @@ -545,7 +544,7 @@ ipfw_hook(int onoff, int pf) pfil_hook_t *h; pha.pa_version = PFIL_VERSION; - pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_flags = PFIL_IN | PFIL_OUT | PFIL_MEMPTR; pha.pa_modname = "ipfw"; pha.pa_ruleset = NULL; Modified: head/sys/netpfil/ipfw/ip_fw_private.h ============================================================================== --- head/sys/netpfil/ipfw/ip_fw_private.h Thu Mar 14 22:32:50 2019 (r345165) +++ head/sys/netpfil/ipfw/ip_fw_private.h Thu Mar 14 22:52:16 2019 (r345166) @@ -111,14 +111,11 @@ struct ip_fw_args { struct inpcb *inp; union { /* - * We don't support forwarding on layer2, thus we can - * keep eh pointer in this union. * next_hop[6] pointers can be used to point to next hop * stored in rule's opcode to avoid copying into hopstore. * Also, it is expected that all 0x1-0x10 flags are mutually * exclusive. */ - struct ether_header *eh; /* for bridged packets */ struct sockaddr_in *next_hop; struct sockaddr_in6 *next_hop6; /* ipfw next hop storage */ @@ -129,8 +126,10 @@ struct ip_fw_args { uint16_t sin6_port; } hopstore6; }; - - struct mbuf *m; /* the mbuf chain */ + union { + struct mbuf *m; /* the mbuf chain */ + void *mem; /* or memory pointer */ + }; struct ipfw_flow_id f_id; /* grabbed from IP header */ }; @@ -164,10 +163,11 @@ struct ip_fw_chain; void ipfw_bpf_init(int); void ipfw_bpf_uninit(int); +void ipfw_bpf_tap(u_char *, u_int); +void ipfw_bpf_mtap(struct mbuf *); void ipfw_bpf_mtap2(void *, u_int, struct mbuf *); void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, - struct ip_fw_args *args, struct mbuf *m, - u_short offset, uint32_t tablearg, struct ip *ip); + struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip); VNET_DECLARE(u_int64_t, norule_counter); #define V_norule_counter VNET(norule_counter) VNET_DECLARE(int, verbose_limit); _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"