Hello, developers. This cruft works now much better. Unfortunately I need to add some scary PTE insults- you can find them in update_address(). One big nitpick is that this module can not be unloaded if application do not closes socket - socket is being removed after mapping is destroyed, so I need to grab MM reference, but can not drop it. Also it uses flush_tlb() all over the place, but it is only one macros, that can be used in modules - tlb_flush_page() and tlb_flush_one() are not exported. It also has a race on startup, when there is only one page mapped (control page), but userspace (very simple) may want to access data pages. Control page contains set of control structures one per mapped page, i.e. mapped skb, control structure has an offset of skb->mac.raw in the page and flags field.
I gladly want to listen your comments. Thanks. Included files: af_tlb.[ch] - zero-copy sniffer implementation. tlb_test.c - simple userspace sniffer. af_tlb.c /* * af_tlb.c * * 2005 Copyright (c) Evgeniy Polyakov <[EMAIL PROTECTED]> * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/fcntl.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_packet.h> #include <linux/wireless.h> #include <linux/kmod.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/workqueue.h> #include <linux/mempolicy.h> #include <linux/rmap.h> #include <linux/fs.h> #include <linux/shm.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/swapops.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> #include "af_tlb.h" static unsigned int free_timeout = 10; module_param(free_timeout, uint, 0); static void test_timer_func(void *data); static DECLARE_WORK(w, test_timer_func, NULL); static void packet_free_skbs(struct packet_sock *po, int clear_last); static inline struct packet_sock *pkt_sk(struct sock *sk) { return (struct packet_sock *)sk; } static void packet_sock_destruct(struct sock *sk) { BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive packet socket: %p\n", sk); return; } } static struct proto_ops packet_ops_spkt; static void dump_skb(struct sk_buff *skb) { struct ethhdr *eth; int i; printk("shared=%d, cloned=%d, len=%4d: ", skb_shared(skb), skb_cloned(skb), skb->len); eth = eth_hdr(skb); printk("MAC: proto=%04x, src=", eth->h_proto); for (i=0; i<ETH_ALEN-1; ++i) printk("%02x:", eth->h_source[i]); printk("%02x, dst=", eth->h_source[ETH_ALEN-1]); for (i=0; i<ETH_ALEN-1; ++i) printk("%02x:", eth->h_dest[i]); printk("%02x.\n", eth->h_dest[ETH_ALEN-1]); } static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { struct sock *sk; struct sockaddr_pkt *spkt; struct packet_sock *po; int err; sk = pt->af_packet_priv; po = pkt_sk(sk); po->total++; /* * Yank back the headers [hope the device set this * right or kerboom...] * * Incoming packets have ll header pulled, * push it back. * * For outgoing ones skb->data == skb->mac.raw * so that this procedure is noop. */ if (skb->pkt_type == PACKET_LOOPBACK) goto out; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto oom; /* drop any routing info */ dst_release(skb->dst); skb->dst = NULL; spkt = (struct sockaddr_pkt*)skb->cb; skb_push(skb, skb->data-skb->mac.raw); /* * The SOCK_PACKET socket receives _all_ frames. */ spkt->spkt_family = dev->type; strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); spkt->spkt_protocol = skb->protocol; err = sock_queue_rcv_skb(sk, skb); if (!err) po->queued++; else po->dropped++; if (test_bit(PACKET_SOCKET_MAPPED, &po->flags)) schedule_work(&w); if (!err) return 0; out: kfree_skb(skb); oom: return 0; } /* * Close a PACKET socket. This is fairly simple. We immediately go * to 'closed' state and remove our protocol entry in the device list. */ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; if (!sk) return 0; po = pkt_sk(sk); sk_del_node_init(sk); if (test_bit(PACKET_SOCKET_RUNNING, &po->flags)) { dev_remove_pack(&po->prot_hook); clear_bit(PACKET_SOCKET_RUNNING, &po->flags); __sock_put(sk); } sock_orphan(sk); sock->sk = NULL; printk("%s: Waiting to workqueue.\n", __func__); clear_bit(PACKET_SOCKET_RUNNING, &po->flags); cancel_delayed_work(&w); flush_scheduled_work(); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&po->sk_free_queue); printk("%s: releasing page.\n", __func__); free_page(po->page); sock_put(sk); mmput(po->tsk->mm); return 0; } /* * Attach a packet hook. */ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol) { struct packet_sock *po = pkt_sk(sk); lock_sock(sk); spin_lock(&po->bind_lock); if (test_bit(PACKET_SOCKET_RUNNING, &po->flags)) { __sock_put(sk); clear_bit(PACKET_SOCKET_RUNNING, &po->flags); po->num = 0; spin_unlock(&po->bind_lock); dev_remove_pack(&po->prot_hook); spin_lock(&po->bind_lock); } po->num = protocol; po->prot_hook.type = protocol; po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; if (protocol == 0) goto out_unlock; if (dev) { if (dev->flags&IFF_UP) { dev_add_pack(&po->prot_hook); sock_hold(sk); set_bit(PACKET_SOCKET_RUNNING, &po->flags); } else { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); } } else { dev_add_pack(&po->prot_hook); sock_hold(sk); set_bit(PACKET_SOCKET_RUNNING, &po->flags); } out_unlock: spin_unlock(&po->bind_lock); release_sock(sk); return 0; } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk=sock->sk; char name[15]; struct net_device *dev; int err = -ENODEV; strlcpy(name, uaddr->sa_data, sizeof(name)); printk( "%s: name=%s.\n", __func__, name); if(addr_len!=sizeof(struct sockaddr)) return -EINVAL; dev = dev_get_by_name(name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); } return err; } static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { switch(cmd) { default: return dev_ioctl(cmd, (void __user *)arg); } return 0; } static struct proto packet_proto = { .name = "PACKET", .owner = THIS_MODULE, .obj_size = sizeof(struct packet_sock), }; static int packet_sock_init(struct packet_sock *po, int protocol, struct sock *sk) { skb_queue_head_init(&po->sk_free_queue); po->last = 0; po->total = 0; po->dropped = 0; po->queued = 0; po->flags = 0; po->budget = 1; po->next_free = jiffies + msecs_to_jiffies(free_timeout); spin_lock_init(&po->bind_lock); po->tsk = current; po->page = __get_free_page(GFP_KERNEL); if (!po->page) return -ENOMEM; memset((void *)po->page, 0, PAGE_SIZE); po->num = protocol; po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; get_task_mm(po->tsk); return 0; } static int packet_create(struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; int err; if (!capable(CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; err = -ENOBUFS; sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); if (sk == NULL) goto err_out_exit; sock->ops = &packet_ops_spkt; sock_init_data(sock, sk); po = pkt_sk(sk); sk->sk_family = PF_PACKET; sk->sk_destruct = packet_sock_destruct; err = packet_sock_init(po, protocol, sk); if (err) goto err_out_sock_free; if (protocol) { po->prot_hook.type = protocol; dev_add_pack(&po->prot_hook); sock_hold(sk); set_bit(PACKET_SOCKET_RUNNING, &po->flags); } return 0; err_out_sock_free: sk_free(sk); err_out_exit: return err; } static struct packet_shared *packet_find_shared_lazy(struct packet_sock *po, struct sk_buff *skb) { u16 offset = offset_in_page(skb->mac.raw); struct packet_shared *ps = (struct packet_shared *)po->page; int i; for (i=0; i<po->budget; ++i) { if (ps->offset == offset) break; ps++; } if (i == po->budget) return NULL; return ps; } static void packet_free_skbs(struct packet_sock *po, int clear_last) { struct sk_buff *skb; int num = 0; //struct sock *sk = po->prot_hook.af_packet_priv; struct packet_shared *ps; struct page *page; while ((!skb_queue_empty(&po->sk_free_queue) && po->free_queued > po->budget) || clear_last > 0) { spin_lock_bh(&po->sk_free_queue.lock); skb = __skb_dequeue(&po->sk_free_queue); if (skb) po->free_queued--; spin_unlock_bh(&po->sk_free_queue.lock); if (!skb) break; ps = packet_find_shared_lazy(po, skb); if (ps) { if (!test_bit(PACKET_MAPPED, &ps->flags)) printk("%s: pos=%d, offset=%04x, flags=%08lx.\n", __func__, ps->pos, ps->offset, ps->flags); clear_bit(PACKET_MAPPED, &ps->flags); } page = virt_to_page(skb->mac.raw); put_page(page); if (!page_count(page)) { ClearPageReserved(page); } kfree_skb(skb); num++; clear_last--; } #if 0 printk("%s: freed=%d, free_queued=%d, qeued=%d [rmem=%d, max=%d], budget=%d, queued=%lu, dropped=%lu, total=%lu.\n", __func__, num, po->free_queued, skb_queue_len(&sk->sk_receive_queue), atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf, po->budget, po->queued, po->dropped, po->total); #endif } static inline pte_t *get_pte(struct vm_area_struct *vma, unsigned long addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; pgd = pgd_offset(vma->vm_mm, addr); pud = pud_offset(pgd, addr); pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) vma->vm_mm->nr_ptes--; pte = pte_offset_map(pmd, addr); printk("%s: addr=%08lx, pte=%p, %08lx, pmd=%p, pud=%p, pgd=%p, nr_pte=%ld.\n", __func__, addr, pte, pte_val(*pte), pmd, pud, pgd, vma->vm_mm->nr_ptes); return pte; } static inline void update_address(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { pte_t *pte; struct page *page; pte = get_pte(vma, addr); page = pfn_to_page(pfn); printk("%s: pfn=%08lx, valid=%d, page=%p, res=%d, mapcount=%d.\n", __func__, pfn, pfn_valid(pfn), page, PageReserved(page), page_mapcount(page)); pte_clear(vma->mm, addr, pte); pte_unmap(pte); } static void test_timer_func(void *data) { struct sock *sk = (struct sock *)data; struct packet_sock *po; struct packet_shared *ps; struct sk_buff *skb; unsigned long virt, start; int num = 0; if (!sk) return; po = pkt_sk(sk); if (!po || !po->tsk || !po->tsk->mm || !test_bit(PACKET_SOCKET_RUNNING, &po->flags) || !test_bit(PACKET_SOCKET_MAPPED, &po->flags)) return; down_write(&po->tsk->mm->mmap_sem); #if 1 printk("%s: free_queued=%d, qeued=%d [rmem=%d, max=%d], budget=%d, queued=%lu, dropped=%lu, total=%lu.\n", __func__, po->free_queued, skb_queue_len(&sk->sk_receive_queue), atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf, po->budget, po->queued, po->dropped, po->total); #endif while (++num <= po->budget && (skb = skb_dequeue(&sk->sk_receive_queue))) { virt = (unsigned long)skb->mac.raw; if (!virt) goto out; start = po->vma->vm_start + PAGE_SIZE*(1+po->last); ps = &((struct packet_shared *)po->page)[po->last]; printk("s=%08lx, p=%p, pos=%d, offset=%04x, flags=%08lx.\n", start, virt_to_page(virt), ps->pos, ps->offset, ps->flags); if (0) { //int i; printk("offset=%4lx, num=%2d, last=%2d, users=%1d, dataref=%1d: ", offset_in_page(virt), num, po->last, atomic_read(&skb->users), atomic_read(&skb_shinfo(skb)->dataref)); dump_skb(skb); #if 0 for (i=0; i<32; ++i) printk("%02x ", ((unsigned char *)virt)[i]); printk("\n"); #endif } /* * This actually should not be flush_tlb(), * but it is the only one call that can be used in modules. * --zbr */ update_address(po->vma, start, __pa(virt) >> PAGE_SHIFT); __flush_tlb(); SetPageReserved(virt_to_page(virt)); get_page(virt_to_page(virt)); if (remap_pfn_range(po->vma, start, __pa(virt) >> PAGE_SHIFT, PAGE_SIZE, po->vma->vm_page_prot)) { printk("Remapping error.\n"); ClearPageReserved(virt_to_page(virt)); goto out; } flush_dcache_page(virt_to_page(virt)); if (test_bit(PACKET_MAPPED, &ps->flags)) packet_free_skbs(po, 1); ps->offset = offset_in_page(virt); set_bit(PACKET_MAPPED, &ps->flags); if (++po->last == po->budget) po->last = 0; { start = po->vma->vm_start; while (start < po->vma->vm_end) { pte_t *pte = get_pte(po->vma, start); if (pte_present(*pte)) { struct page *page = NULL; unsigned long pfn = pte_pfn(*pte); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); printk("s=%08lx, p=%p, r=%d, m=%d, pfn=%08lx.\n", start, page, PageReserved(page), page_mapcount(page), pfn); } else printk("p=NULL, pfn=%08lx.\n", pfn); } else { printk("pte=%p is not present.\n", pte); } start += PAGE_SIZE; } } out: /* * Actually here should be some smart algo, which will defer skb freeing * until userspace "read" it, so userspace should provide some kind of callback, * which will require write permisions to the area, so it should be splitted. * Or better just to free it after some timeout, say 100 msec should be enough. * --zbr * * Tricky algo is to place skbs into new list, which will be traversed * in a some interval and skbs will be unlinked and freed. * Actually, there is no need to lock this queue against freeing, since it happens * synchroniously, but if someday freeing will be separate nothing will be changed. * --zbr */ spin_lock_bh(&po->sk_free_queue.lock); po->free_queued++; __skb_queue_tail(&po->sk_free_queue, skb); spin_unlock_bh(&po->sk_free_queue.lock); } #if 0 if (time_after(jiffies, po->next_free)) { po->next_free = jiffies + msecs_to_jiffies(free_timeout); packet_free_skbs(po, 0); } #endif printk("%s: UP: po->tsk->mm=%p.\n", __func__, po->tsk->mm); up_write(&po->tsk->mm->mmap_sem); printk("%s finished.\n", __func__); } static void packet_mm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct inode *inode = file->f_dentry->d_inode; struct socket * sock = SOCKET_I(inode); struct sock *sk = sock->sk; printk( "%s, sk=%p.\n", __func__, sk); } static void packet_mm_close(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct inode *inode = file->f_dentry->d_inode; struct socket *sock = SOCKET_I(inode); struct sock *sk = sock->sk; printk( "%s, sk=%p.\n", __func__, sk); if (sk) { struct packet_sock *po = pkt_sk(sk); if (po) { down_write(&vma->vm_mm->mmap_sem); clear_bit(PACKET_SOCKET_MAPPED, &po->flags); up_write(&vma->vm_mm->mmap_sem); } } } static struct vm_operations_struct packet_mmap_ops = { .open = packet_mm_open, .close = packet_mm_close, }; static int packet_mmap_test(struct socket *sock, struct vm_area_struct *vma) { int i; struct timeval tv1, tv2; unsigned long start = vma->vm_start; u8 *data1, *data2; do_gettimeofday(&tv1); for (i=0; i<1000; i++) { update_address(vma, start, __pa(PAGE_OFFSET) >> PAGE_SHIFT); __flush_tlb(); if (remap_pfn_range(vma, start, __pa(PAGE_OFFSET) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) break; start += PAGE_SIZE; } do_gettimeofday(&tv2); printk("%s: 1000 remaps took %lu usec.\n", __func__, (tv2.tv_sec - tv1.tv_sec)*1000000 + tv2.tv_usec - tv1.tv_usec); data1 = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!data1) return -ENOMEM; data2 = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!data2) { kfree(data2); return -ENOMEM; } do_gettimeofday(&tv1); for (i=0; i<1000; i++) { memcpy(data1, data2, 1500); } do_gettimeofday(&tv2); printk("%s: 1000 copyings took %lu usec.\n", __func__, (tv2.tv_sec - tv1.tv_sec)*1000000 + tv2.tv_usec - tv1.tv_usec); kfree(data1); kfree(data2); return 0; } static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); unsigned long size = vma->vm_end - vma->vm_start; int err = 0; vma->vm_ops = &packet_mmap_ops; //err = packet_mmap_test(sock, vma); if (err) return err; lock_sock(sk); po->budget = (size - PAGE_SIZE) / PAGE_SIZE; update_address(vma, vma->vm_start, __pa(po->page) >> PAGE_SHIFT); __flush_tlb(); SetPageReserved(virt_to_page(po->page)); if (remap_pfn_range(vma, vma->vm_start, __pa(po->page) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) { ClearPageReserved(virt_to_page(po->page)); err = -EIO; goto err_out_unlock; } po->vma = vma; release_sock(sk); INIT_WORK(&w, test_timer_func, sk); set_bit(PACKET_SOCKET_MAPPED, &po->flags); return 0; err_out_unlock: release_sock(sk); return err; } static unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); unsigned int mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); if (po->free_queued < po->total) mask |= POLLIN | POLLRDNORM; spin_unlock_bh(&sk->sk_receive_queue.lock); return mask; } static struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, .bind = packet_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = packet_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, .mmap = packet_mmap, .sendpage = sock_no_sendpage, }; static struct net_proto_family packet_family_ops = { .family = PF_PACKET, .create = packet_create, .owner = THIS_MODULE, }; static void __exit packet_exit(void) { sock_unregister(PF_PACKET); proto_unregister(&packet_proto); } static int __init packet_init(void) { int rc = proto_register(&packet_proto, 0); if (rc != 0) goto out; sock_register(&packet_family_ops); printk("%s: initialized at %lu.\n", __func__, jiffies); out: return rc; } module_init(packet_init); module_exit(packet_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_PACKET); af_tlb.h /* * af_tlb.h * * 2005 Copyright (c) Evgeniy Polyakov <[EMAIL PROTECTED]> * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __AF_TLB_H #define __AF_TLB_H enum packet_shared_flags { PACKET_MAPPED = 0, }; struct packet_shared { __u16 offset; __u16 reserved; int pos; long flags; } __attribute__ ((packed)); #ifdef __KERNEL__ enum packet_flags { PACKET_SOCKET_RUNNING = 0, PACKET_SOCKET_MAPPED, }; struct packet_sock { struct sock sk; struct packet_type prot_hook; spinlock_t bind_lock; long flags; int ifindex; unsigned short num; struct vm_area_struct *vma; struct task_struct *tsk; int budget, last; unsigned long page; struct sk_buff_head sk_free_queue; int free_queued; unsigned long next_free; unsigned long queued; unsigned long dropped; unsigned long total; }; #endif /* __KERNEL__ */ #endif /* __AF_TLB_H */ tlb_test.c #include <sys/types.h> #include <sys/socket.h> #include <sys/mman.h> #include <sys/poll.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #include <errno.h> #include <unistd.h> #include <netinet/in.h> #include <netinet/ip.h> #include <net/ethernet.h> #include <linux/if_ether.h> #include <linux/types.h> #include "af_tlb.h" #define PAGE_SIZE 4096 static size_t mmap_size = 17*PAGE_SIZE; #define ulog(f, a...) do { fprintf(stderr, f, ##a); fflush(stderr); } while (0) #define NIPQUAD(addr) \ ((unsigned char *)&addr)[0], \ ((unsigned char *)&addr)[1], \ ((unsigned char *)&addr)[2], \ ((unsigned char *)&addr)[3] static __inline__ void set_bit(int bit, uint32_t *f) { *f |= (1<<bit); } static __inline__ void clear_bit(int bit, uint32_t *f) { *f &= ~(1<<bit); } static __inline__ int test_bit(int bit, uint32_t *f) { return ((*f >> bit) & 1); } static void dump_data(void *ptr, __u16 offset, int size) { int i; unsigned char *data = ptr + offset; ulog("%p: ", ptr); for (i=0; i<size; ++i) ulog("%02x ", data[i]); ulog("\n"); } static int dump_network(void *ptr, __u16 offset) { struct ether_header *eth = ptr + offset; struct iphdr *ip; char *proto; int i; unsigned short ether_type; //ulog("offset=%x: ", offset); ether_type = ntohs(eth->ether_type); if (ether_type != ETH_P_IP && ether_type != ETH_P_ARP) { //ulog("\n"); return -1; } ulog("MAC: proto=%04x, src=", eth->ether_type); for (i=0; i<ETH_ALEN-1; ++i) ulog("%02x:", eth->ether_shost[i]); ulog("%02x, dst=", eth->ether_shost[ETH_ALEN-1]); for (i=0; i<ETH_ALEN-1; ++i) ulog("%02x:", eth->ether_dhost[i]); ulog("%02x. ", eth->ether_dhost[ETH_ALEN-1]); if (ether_type != ETH_P_IP) { dump_data(ptr, offset + sizeof(*eth), 16); return 0; } ip = (struct iphdr *)(ptr + offset + sizeof(*eth)); switch (ip->protocol) { case IPPROTO_TCP: proto = "TCP "; break; case IPPROTO_UDP: proto = "UDP "; break; case IPPROTO_ICMP: proto = "ICMP"; break; default: proto = "UNKN"; dump_data(ptr, offset + sizeof(*eth), 16); return 0; } ulog("%s: ", proto); ulog("%u.%u.%u.%u -> %u.%u.%u.%u.\n", NIPQUAD(ip->saddr), NIPQUAD(ip->daddr)); return 0; } int main(int argc, char *argv[]) { struct sockaddr sa; int s, err, num, i, j; socklen_t len = sizeof(sa); void *mmap_ptr; struct packet_shared *ps, *ops; void *old_ps; struct pollfd pfd; if (argc > 1) memcpy(sa.sa_data, argv[1], sizeof(sa.sa_data)); else memcpy(sa.sa_data, "eth0", sizeof(sa.sa_data)); old_ps = malloc(PAGE_SIZE); if (!old_ps) { ulog("Failed to allocate backup packet shared page.\n"); return -ENOMEM; } memset(old_ps, 0, PAGE_SIZE); s = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (s == -1) { ulog("Failed to create PF_PACKET socket: %s [%d].\n", strerror(errno), errno); err = -errno; goto err_out_free_old_ps; } mmap_ptr = mmap(NULL, mmap_size, PROT_READ, MAP_SHARED, s, 0); if (mmap_ptr == MAP_FAILED) { ulog("Failed to map socket %d: %s [%d].\n", s, strerror(errno), errno); err = -errno; goto err_out_close; } err = bind(s, &sa, len); if (err == -1) { ulog("Failed to bind socket %d to device %s: %s [%d].\n", s, sa.sa_data, strerror(errno), errno); goto err_out_unmap; } pfd.fd = s; pfd.events = POLLIN; pfd.revents = 0; num = (mmap_size - PAGE_SIZE) / PAGE_SIZE; j = 0; while (1) { /*err = poll(&pfd, 1, -1); if ((err == 0 || err == -1) && (errno != EINTR)) { err = -errno; break; }*/ ps = (struct packet_shared *)mmap_ptr; ops = (struct packet_shared *)old_ps; for (i=0; i<num; ++i) { void *ptr = mmap_ptr + PAGE_SIZE*(i+1); if (test_bit(PACKET_MAPPED, &ps->flags) && ps->offset != ops->offset) { err = dump_network(ptr, ps->offset); if (++j > 1000) goto err_out_unmap; } #if 0 if (err && ps->offset) dump_data(ptr, ps->offset, 32); #endif *ops++ = *ps++; } pfd.events = POLLIN; pfd.revents = 0; } err = 0; err_out_unmap: munmap(mmap_ptr, mmap_size); err_out_close: close(s); err_out_free_old_ps: free(old_ps); return err; } Makefile. obj-m := af_tlb.o KDIR := /lib/modules/`uname -r`/build #KDIR := /usr/local/src/linux-2.6 PWD := $(shell pwd) UCFLAGS := -W -Wall default: $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules test: gcc $(UCFLAGS) tlb_test.c -o tlb_test clean: $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) clean @rm -f *~ -- Evgeniy Polyakov - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html