This is AF_PACKET mmap compat support. I confirmed that 32-bit and 64-bit tcpdump binaries using AF_PACKET mmap'ed pcap can dump packets properly with a 64-bit kernel.
Thanks to David for all your help. Signed-off-by: FUJITA Tomonori <[EMAIL PROTECTED]> --- net/packet/af_packet.c | 156 +++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 128 insertions(+), 28 deletions(-) a4fd7acdecf12871d91976610cd3d1442807b206 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9db7dbd..71ab97a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -78,6 +78,7 @@ #include <linux/poll.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/compat.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -182,6 +183,31 @@ struct packet_mreq_max #endif #ifdef CONFIG_PACKET_MMAP static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing); + +#ifdef CONFIG_COMPAT +struct tpacket_hdr_compat { + compat_ulong_t tp_status; + compat_uint_t tp_len; + compat_uint_t tp_snaplen; + unsigned short tp_mac; + unsigned short tp_net; + compat_uint_t tp_sec; + compat_uint_t tp_usec; +}; + +#ifdef CONFIG_X86_64 +# define COMPAT_TEST is_compat_task() +#elif defined(CONFIG_IA64) +# define COMPAT_TEST IS_IA32_PROCESS(task_pt_regs(current)) +#elif defined(CONFIG_S390) +# define COMPAT_TEST test_thread_flag(TIF_31BIT) +#elif defined(CONFIG_MIPS) +# define COMPAT_TEST (current->thread.mflags & MF_32BIT_ADDR) +#else +# define COMPAT_TEST test_thread_flag(TIF_32BIT) +#endif +#endif + #endif static void packet_flush_mclist(struct sock *sk); @@ -211,6 +237,9 @@ struct packet_sock { unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; +#ifdef CONFIG_COMPAT + int compat; +#endif #endif }; @@ -562,15 +591,93 @@ drop: } #ifdef CONFIG_PACKET_MMAP +#define __tpacket_hdr_update(h, skb, snaplen, mac, net, status) \ +do { \ + h->tp_len = (skb)->len; \ + h->tp_snaplen = snaplen; \ + h->tp_mac = mac; \ + h->tp_net = net; \ + h->tp_sec = (skb)->tstamp.off_sec; \ + h->tp_usec = (skb)->tstamp.off_usec; \ + h->tp_status = status; \ +} while (0) + +#ifdef CONFIG_COMPAT +static void tpacket_hdr_update(struct packet_sock *po, unsigned int idx, + struct sk_buff *skb, + unsigned int snaplen, unsigned short mac, + unsigned short net, unsigned long status) +{ + if (po->compat) { + struct tpacket_hdr_compat *h; + h = (struct tpacket_hdr_compat *) + packet_lookup_frame(po, idx); + __tpacket_hdr_update(h, skb, snaplen, mac, net, status); + } else { + struct tpacket_hdr *h; + h = (struct tpacket_hdr *) packet_lookup_frame(po, idx); + __tpacket_hdr_update(h, skb, snaplen, mac, net, status); + } + mb(); +} + +static unsigned int tpacket_hdr_status(struct packet_sock *po, unsigned int idx) +{ + if (po->compat) { + struct tpacket_hdr_compat *h; + h = (struct tpacket_hdr_compat *) + packet_lookup_frame(po, idx); + return h->tp_status; + } else { + struct tpacket_hdr *h; + h = (struct tpacket_hdr *) packet_lookup_frame(po, idx); + return h->tp_status; + } +} + +static size_t tpacket_hdr_size(struct packet_sock *po) +{ + return po->compat ? sizeof(struct tpacket_hdr_compat) : + sizeof(struct tpacket_hdr); +} +#else + +static void tpacket_hdr_update(struct packet_sock *po, unsigned int idx, + struct sk_buff *skb, + unsigned int snaplen, unsigned short mac, + unsigned short net, unsigned long status) +{ + struct tpacket_hdr *h; + h = (struct tpacket_hdr *) packet_lookup_frame(po, idx); + __tpacket_hdr_update(h, skb, snaplen, mac, net, status); +} + +static unsigned long tpacket_hdr_status(struct packet_sock *po, unsigned int idx) +{ + struct tpacket_hdr *h; + h = (struct tpacket_hdr *) packet_lookup_frame(po, idx); + return h->tp_status; +} + +static size_t tpacket_hdr_size(struct packet_sock *po) +{ + return sizeof(struct tpacket_hdr); +} +#endif + +static int tpacket_hdrlen(struct packet_sock *po) +{ + return TPACKET_ALIGN(tpacket_hdr_size(po)) + sizeof(struct sockaddr_ll); +} + static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; - struct tpacket_hdr *h; - u8 * skb_head = skb->data; + u8 * skb_head = skb->data, *cur_frame; int skb_len = skb->len; - unsigned snaplen; + unsigned snaplen, idx; unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; unsigned short macoff, netoff; struct sk_buff *copy_skb = NULL; @@ -603,10 +710,10 @@ static int tpacket_rcv(struct sk_buff *s } if (sk->sk_type == SOCK_DGRAM) { - macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; + macoff = netoff = TPACKET_ALIGN(tpacket_hdrlen(po)) + 16; } else { unsigned maclen = skb->nh.raw - skb->data; - netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen)); + netoff = TPACKET_ALIGN(tpacket_hdrlen(po) + (maclen < 16 ? 16 : maclen)); macoff = netoff - maclen; } @@ -631,9 +738,9 @@ static int tpacket_rcv(struct sk_buff *s snaplen = skb->len-skb->data_len; spin_lock(&sk->sk_receive_queue.lock); - h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head); - - if (h->tp_status) + idx = po->head; + cur_frame = packet_lookup_frame(po, idx); + if (tpacket_hdr_status(po, idx)) goto ring_is_full; po->head = po->head != po->frame_max ? po->head+1 : 0; po->stats.tp_packets++; @@ -645,20 +752,14 @@ static int tpacket_rcv(struct sk_buff *s status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); - memcpy((u8*)h + macoff, skb->data, snaplen); + memcpy(cur_frame + macoff, skb->data, snaplen); - h->tp_len = skb->len; - h->tp_snaplen = snaplen; - h->tp_mac = macoff; - h->tp_net = netoff; - if (skb->tstamp.off_sec == 0) { + if (skb->tstamp.off_sec == 0) { __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb->tstamp.off_sec; - h->tp_usec = skb->tstamp.off_usec; - sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); + sll = (struct sockaddr_ll*)(cur_frame + TPACKET_ALIGN(tpacket_hdr_size(po))); sll->sll_halen = 0; if (dev->hard_header_parse) sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); @@ -668,14 +769,12 @@ static int tpacket_rcv(struct sk_buff *s sll->sll_pkttype = skb->pkt_type; sll->sll_ifindex = dev->ifindex; - h->tp_status = status; - mb(); - + tpacket_hdr_update(po, idx, skb, snaplen, macoff, netoff, status); { struct page *p_start, *p_end; - u8 *h_end = (u8 *)h + macoff + snaplen - 1; + u8 *h_end = cur_frame + macoff + snaplen - 1; - p_start = virt_to_page(h); + p_start = virt_to_page(cur_frame); p_end = virt_to_page(h_end); while (p_start <= p_end) { flush_dcache_page(p_start); @@ -1542,11 +1641,8 @@ static unsigned int packet_poll(struct f spin_lock_bh(&sk->sk_receive_queue.lock); if (po->pg_vec) { unsigned last = po->head ? po->head-1 : po->frame_max; - struct tpacket_hdr *h; - - h = (struct tpacket_hdr *)packet_lookup_frame(po, last); - if (h->tp_status) + if (tpacket_hdr_status(po, last)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -1636,7 +1732,11 @@ static int packet_set_ring(struct sock * struct packet_sock *po = pkt_sk(sk); int was_running, num, order = 0; int err = 0; - + +#ifdef CONFIG_COMPAT + po->compat = COMPAT_TEST; +#endif + if (req->tp_block_nr) { int i, l; @@ -1649,7 +1749,7 @@ static int packet_set_ring(struct sock * return -EINVAL; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) return -EINVAL; - if (unlikely(req->tp_frame_size < TPACKET_HDRLEN)) + if (unlikely(req->tp_frame_size < tpacket_hdrlen(po))) return -EINVAL; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) return -EINVAL; -- 1.1.3 - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html