Hi Dave! The attached patch adds support for refcounting of modules implementing netlink protocols. The idea is that you prevent the module from disappearing as long as someone in userspace has still a socket talking to you.
I think the current behaviour can be quite confusing, since modules like ipt_ULOG or nfnetlink can have a use count of '0' even though a deamon is listening for events [and will not get notified of module unlaoding unless it actively sends a message]. Apart from that, it also adds a way for netlink protocols to support autoloading. This means as soon as any user opens a socket for a particular protocol (suchas NETLINK_NETFILTER), the kernel would try to open net-pf-16-proto-xx where 16 is AF_NETLINK and xx is the protocol number. [I'll probably also try to extend that mechanism to ipv4-sctp in a separate patch] The rest of the patch is some cleanup: - nl_table statically allocated - add missing MODULE_AUTHOR to nfnetlink.c - port all existing netlink users to new three-argument netlink_kernel_create() function - remove all code for modularized netlink, since it is always linked statically into the kernel If you agree with those changes, please apply to your net-2.6.14 tree. Thanks, Harald -- - Harald Welte <[EMAIL PROTECTED]> http://netfilter.org/ ============================================================================ "Fragmentation is like classful addressing -- an interesting early architectural error that shows how much experimentation was going on while IP was being designed." -- Paul Vixie
- Remove bogus code for compiling netlink as module - Add module refcounting support for modules implementing a netlink protocol - Statically allocate nl_table instead of kmalloc() at startup - Add support for autoloading modules that implement a netlink protocol as soon as someone opens a socket for that protocol Signed-off-by: Harald Welte <[EMAIL PROTECTED]> --- commit 09dd41ae8aa26ee297e2291b71641ffee8d98e83 tree 90132d31bdc6e189c02c7b4da412b3a61a4ad7f1 parent b4a566c332048b642506eff7de825fce710ff42c author laforge <[EMAIL PROTECTED]> Sa, 23 Jul 2005 16:04:18 -0400 committer laforge <[EMAIL PROTECTED]> Sa, 23 Jul 2005 16:04:18 -0400 drivers/w1/w1_int.c | 4 + include/linux/net.h | 3 + include/linux/netlink.h | 2 - kernel/audit.c | 3 + lib/kobject_uevent.c | 3 + net/bridge/netfilter/ebt_ulog.c | 2 - net/core/rtnetlink.c | 2 - net/decnet/netfilter/dn_rtmsg.c | 4 + net/ipv4/fib_frontend.c | 2 - net/ipv4/netfilter/ip_queue.c | 3 + net/ipv4/netfilter/ipt_ULOG.c | 3 + net/ipv4/tcp_diag.c | 3 + net/ipv6/netfilter/ip6_queue.c | 2 - net/netfilter/nfnetlink.c | 5 +- net/netlink/af_netlink.c | 128 ++++++++++++++++++++++++++++----------- net/xfrm/xfrm_user.c | 4 + security/selinux/netlink.c | 2 - 17 files changed, 124 insertions(+), 51 deletions(-) diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u dev->groups = 23; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL); + dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_W1, dev->dev.bus_id); @@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_b EXPORT_SYMBOL(w1_add_master_device); EXPORT_SYMBOL(w1_remove_master_device); + +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1); diff --git a/include/linux/net.h b/include/linux/net.h --- a/include/linux/net.h +++ b/include/linux/net.h @@ -282,5 +282,8 @@ static struct proto_ops name##_ops = { #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) +#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ + MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -119,7 +119,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); +extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/kernel/audit.c b/kernel/audit.c --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,8 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive); + audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -153,7 +153,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic) static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL); + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,7 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,7 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); + rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,7 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, + THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; @@ -162,6 +163,7 @@ static void __exit fini(void) MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); MODULE_AUTHOR("Steven Whitehouse <[EMAIL PROTECTED]>"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); module_init(init); module_exit(fini); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -567,7 +567,7 @@ static void nl_fib_input(struct sock *sk static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input); + netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -685,7 +685,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -62,6 +62,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <[EMAIL PROTECTED]>"); MODULE_DESCRIPTION("iptables userspace logging module"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ @@ -372,7 +373,7 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -774,7 +774,8 @@ static void tcpdiag_rcv(struct sock *sk, static int __init tcpdiag_init(void) { - tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv); + tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, + THIS_MODULE); if (tcpnl == NULL) return -ENOMEM; return 0; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -676,7 +676,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -38,6 +38,8 @@ #include <linux/netfilter/nfnetlink.h> MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <[EMAIL PROTECTED]>"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; @@ -323,7 +325,8 @@ int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv); + nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, + THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -13,6 +13,13 @@ * added netlink_proto_exit * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <[EMAIL PROTECTED]> * use nlk_sk, as sk->protinfo is on a diet 8) + * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <[EMAIL PROTECTED]> + * - inc module use count of module that owns + * the kernel socket in case userspace opens + * socket of same protocol + * - statically allocate nl_table + * - remove all module support, since netlink is + * mandatory if CONFIG_NET=y these days * */ @@ -92,9 +99,10 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; + struct proto_ops *p_ops; }; -static struct netlink_table *nl_table; +static struct netlink_table nl_table[MAX_LINKS]; static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); @@ -341,7 +349,20 @@ static int netlink_create(struct socket if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; - sock->ops = &netlink_ops; + netlink_table_grab(); + if (!nl_table[protocol].hash.entries) { + netlink_table_ungrab(); +#ifdef CONFIG_KMOD + /* We do 'best effort'. If we find a matching module, + * it is loaded. If not, we don't return an error to + * allow pure userspace<->userspace communication. -HW */ + request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); + netlink_table_grab(); +#endif + } + netlink_table_ungrab(); + + sock->ops = nl_table[protocol].p_ops; sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) @@ -394,6 +415,20 @@ static int netlink_release(struct socket }; notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } + + /* when this is a kernel socket, we need to remove the owner pointer, + * since we don't know whether the module will be dying at any given + * point - HW */ + if (!nlk->pid) { + struct proto_ops *p_tmp; + netlink_table_grab(); + p_tmp = nl_table[sk->sk_protocol].p_ops; + if (p_tmp != &netlink_ops) { + nl_table[sk->sk_protocol].p_ops = &netlink_ops; + kfree(p_tmp); + } + netlink_table_ungrab(); + } sock_put(sk); return 0; @@ -1023,33 +1058,70 @@ static void netlink_data_ready(struct so */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) +netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), + struct module *module) { + struct proto_ops *p_ops; struct socket *sock; struct sock *sk; - if (!nl_table) + if (unit<0 || unit>=MAX_LINKS) return NULL; - if (unit<0 || unit>=MAX_LINKS) + /* do a quick check, to make us not go down to netlink_insert() + * if protocol already has kernel socket */ + if (netlink_lookup(unit, 0)) return NULL; if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; + if (module) { + /* every registering protocol implemented in a module needs + * it's own p_ops, since the socket code cannot deal with + * module refcounting otherwise + * -HW */ + p_ops = kmalloc(sizeof(*p_ops), GFP_KERNEL); + if (!p_ops) { + sk = NULL; + goto out_sock_release; + } + + memcpy(p_ops, &netlink_ops, sizeof(*p_ops)); + p_ops->owner = module; + } else + p_ops = &netlink_ops; + + netlink_table_grab(); + nl_table[unit].p_ops = p_ops; + netlink_table_ungrab(); + if (netlink_create(sock, unit) < 0) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; } + sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; if (netlink_insert(sk, 0)) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; } + + return sk; + +out_kfree_p_ops: + netlink_table_grab(); + if (nl_table[unit].p_ops != &netlink_ops) { + kfree(nl_table[unit].p_ops); + nl_table[unit].p_ops = &netlink_ops; + } + netlink_table_ungrab(); +out_sock_release: + sock_release(sock); return sk; } @@ -1386,21 +1458,18 @@ static int __init netlink_proto_init(voi unsigned int order; int err = proto_register(&netlink_proto, 0); - if (err != 0) - goto out; + if (err != 0) { + panic("NET: cannot register netlink protocol"); + return err; + } + + /* fill in default proto_ops for all protocols */ + for (i = 0; i < MAX_LINKS; i++) + nl_table[i].p_ops = &netlink_ops; if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) netlink_skb_parms_too_large(); - nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL); - if (!nl_table) { -enomem: - printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n"); - return -ENOMEM; - } - - memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS); - if (num_physpages >= (128 * 1024)) max = num_physpages >> (21 - PAGE_SHIFT); else @@ -1418,7 +1487,6 @@ enomem: while (i-- > 0) nl_pid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table)); - kfree(nl_table); goto enomem; } memset(hash->table, 0, 1 * sizeof(*hash->table)); @@ -1434,25 +1502,13 @@ enomem: #endif /* The netlink device handler may be needed early. */ rtnetlink_init(); -out: - return err; -} -static void __exit netlink_proto_exit(void) -{ - sock_unregister(PF_NETLINK); - proc_net_remove("netlink"); - kfree(nl_table); - nl_table = NULL; - proto_unregister(&netlink_proto); + return 0; +enomem: + panic("NET: cannot allocate netlink hash table"); } core_initcall(netlink_proto_init); -module_exit(netlink_proto_exit); - -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_NETPROTO(PF_NETLINK); EXPORT_SYMBOL(netlink_ack); EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1516,7 +1516,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, + THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; @@ -1534,3 +1535,4 @@ static void __exit xfrm_user_exit(void) module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -103,7 +103,7 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL); + selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);
pgpEYzSLVB0Uc.pgp
Description: PGP signature