Hello developers. For those of you who do not know if it is needed or not to switch to/from netlink, I've created simple set of benchmarks which shows performance of _blocking_ data transfer from kernelspace to userspace in case of using unicast and broadcast. There are several tests for different number of listening users (1, 2, 4 and 10) and different message size (128, 512, 1024 and 4096 bytes). 10k messages are transferred from process' context (keventd) in one loop. For unicasting this number of messages is multiplied by number of listening users, so each user receives 10k messages.
Messages are sent using blocking technique, since otherwise socket queue with default length of 200k is filled before the first rescheduling, so it is not allowed to send a lot of events without rescheduling using nonblocking methods (netlink_broadcast() for example). While unicast easily allows blocking send, broadcast does not, so I created simple patch [2] to implement this functionality, which is only usefull for testing, since netlink table is locked during test. Performance testing module and userspace tool are available from [1]. Test was run on 2.4 Ghz Xeon (HT enabled) with 1Gb RAM with 2.6.15-rc6 kernel. Performance graph attached. >From this test one can see, that unicasting is faster for 1 and maybe 2 users, and broadcast definitely wins when number of users increases. 1. http://tservice.net.ru/~s0mbre/old/?section=projects&item=netlink_perf 2. Blocking broadcasting implementation. diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 96020d7..49423fa 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -805,7 +805,7 @@ retry: return netlink_sendskb(sk, skb, ssk->sk_protocol); } -static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) +static __inline__ int netlink_broadcast_deliver_nowait(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -819,6 +819,41 @@ static __inline__ int netlink_broadcast_ return -1; } +static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb, long timeout) +{ + int err, attempts = 0; + + if (!timeout) + return netlink_broadcast_deliver_nowait(sk, skb); + + /* + * This references will be released in netlink_attach_skb() in case of any errrors + * or if timeout expired. + */ + skb = skb_get(skb); +retry: + sock_hold(sk); + + err = netlink_attachskb(sk, skb, 1, timeout); + if (err == 1) { + if (++attempts > 5) { + printk(KERN_ERR "%s: sk=%p, skb=%p, timeout=%ld.\n", + __func__, sk, skb, timeout); + kfree_skb(skb); + return -1; + } + goto retry; + } else if (err) + return err; + kfree_skb(skb); + sock_put(sk); + + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); + + return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; +} + struct netlink_broadcast_data { struct sock *exclude_sk; u32 pid; @@ -827,6 +862,7 @@ struct netlink_broadcast_data { int congested; int delivered; gfp_t allocation; + long timeout; struct sk_buff *skb, *skb2; }; @@ -865,7 +901,7 @@ static inline int do_one_broadcast(struc netlink_overrun(sk); /* Clone failed. Notify ALL listeners. */ p->failure = 1; - } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { + } else if ((val = netlink_broadcast_deliver(sk, p->skb2, p->timeout)) < 0) { netlink_overrun(sk); } else { p->congested |= val; @@ -878,8 +914,8 @@ out: return 0; } -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, - u32 group, gfp_t allocation) +int do_netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, + u32 group, gfp_t allocation, long timeout) { struct netlink_broadcast_data info; struct hlist_node *node; @@ -896,6 +932,7 @@ int netlink_broadcast(struct sock *ssk, info.allocation = allocation; info.skb = skb; info.skb2 = NULL; + info.timeout = timeout; /* While we sleep in clone, do not allow to change socket list */ @@ -921,6 +958,18 @@ int netlink_broadcast(struct sock *ssk, return -ESRCH; } +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, + u32 group, gfp_t allocation) +{ + return do_netlink_broadcast(ssk, skb, pid, group, allocation, 0); +} + +int netlink_broadcast_wait(struct sock *ssk, struct sk_buff *skb, u32 pid, + u32 group, gfp_t allocation, long timeout) +{ + return do_netlink_broadcast(ssk, skb, pid, group, allocation, timeout); +} + struct netlink_set_err_data { struct sock *exclude_sk; u32 pid; @@ -1751,6 +1800,7 @@ EXPORT_SYMBOL(netlink_ack); EXPORT_SYMBOL(netlink_run_queue); EXPORT_SYMBOL(netlink_queue_skip); EXPORT_SYMBOL(netlink_broadcast); +EXPORT_SYMBOL(netlink_broadcast_wait); EXPORT_SYMBOL(netlink_dump_start); EXPORT_SYMBOL(netlink_kernel_create); EXPORT_SYMBOL(netlink_register_notifier); -- Evgeniy Polyakov
netlink_perf.png
Description: PNG image