Hi Dave!

The attached patch adds support for refcounting of modules implementing
netlink protocols.  The idea is that you prevent the module from
disappearing as long as someone in userspace has still a socket talking
to you.

I think the current behaviour can be quite confusing, since modules like
ipt_ULOG or nfnetlink can have a use count of '0' even though a deamon
is listening for events [and will not get notified of module unlaoding
unless it actively sends a message].

Apart from that, it also adds a way for netlink protocols to support
autoloading.  This means as soon as any user opens a socket for a
particular protocol (suchas NETLINK_NETFILTER), the kernel would try to
open net-pf-16-proto-xx where 16 is AF_NETLINK and xx is the protocol
number. [I'll probably also try to extend that mechanism to ipv4-sctp in
a separate patch]

The rest of the patch is some cleanup:
- nl_table statically allocated
- add missing MODULE_AUTHOR to nfnetlink.c
- port all existing netlink users to new three-argument
  netlink_kernel_create() function
- remove all code for modularized netlink, since it is always linked
  statically into the kernel

If you agree with those changes, please apply to your net-2.6.14 tree.

Thanks,
        Harald

-- 
- Harald Welte <[EMAIL PROTECTED]>                 http://netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie
- Remove bogus code for compiling netlink as module
- Add module refcounting support for modules implementing a netlink
  protocol
- Statically allocate nl_table instead of kmalloc() at startup
- Add support for autoloading modules that implement a netlink protocol
  as soon as someone opens a socket for that protocol

Signed-off-by: Harald Welte <[EMAIL PROTECTED]>

---
commit 09dd41ae8aa26ee297e2291b71641ffee8d98e83
tree 90132d31bdc6e189c02c7b4da412b3a61a4ad7f1
parent b4a566c332048b642506eff7de825fce710ff42c
author laforge <[EMAIL PROTECTED]> Sa, 23 Jul 2005 16:04:18 -0400
committer laforge <[EMAIL PROTECTED]> Sa, 23 Jul 2005 16:04:18 -0400

 drivers/w1/w1_int.c             |    4 +
 include/linux/net.h             |    3 +
 include/linux/netlink.h         |    2 -
 kernel/audit.c                  |    3 +
 lib/kobject_uevent.c            |    3 +
 net/bridge/netfilter/ebt_ulog.c |    2 -
 net/core/rtnetlink.c            |    2 -
 net/decnet/netfilter/dn_rtmsg.c |    4 +
 net/ipv4/fib_frontend.c         |    2 -
 net/ipv4/netfilter/ip_queue.c   |    3 +
 net/ipv4/netfilter/ipt_ULOG.c   |    3 +
 net/ipv4/tcp_diag.c             |    3 +
 net/ipv6/netfilter/ip6_queue.c  |    2 -
 net/netfilter/nfnetlink.c       |    5 +-
 net/netlink/af_netlink.c        |  128 ++++++++++++++++++++++++++++-----------
 net/xfrm/xfrm_user.c            |    4 +
 security/selinux/netlink.c      |    2 -
 17 files changed, 124 insertions(+), 51 deletions(-)

diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u
 
        dev->groups = 23;
        dev->seq = 1;
-       dev->nls = netlink_kernel_create(NETLINK_W1, NULL);
+       dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE);
        if (!dev->nls) {
                printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 
master %s.\n",
                        NETLINK_W1, dev->dev.bus_id);
@@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_b
 
 EXPORT_SYMBOL(w1_add_master_device);
 EXPORT_SYMBOL(w1_remove_master_device);
+
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1);
diff --git a/include/linux/net.h b/include/linux/net.h
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -282,5 +282,8 @@ static struct proto_ops name##_ops = {              
 #define MODULE_ALIAS_NETPROTO(proto) \
        MODULE_ALIAS("net-pf-" __stringify(proto))
 
+#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
+       MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NET_H */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -119,7 +119,7 @@ struct netlink_skb_parms
 #define NETLINK_CREDS(skb)     (&NETLINK_CB((skb)).creds)
 
 
-extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock 
*sk, int len));
+extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock 
*sk, int len), struct module *module);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, 
int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
diff --git a/kernel/audit.c b/kernel/audit.c
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -514,7 +514,8 @@ static int __init audit_init(void)
 {
        printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
               audit_default ? "enabled" : "disabled");
-       audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive);
+       audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive,
+                                          THIS_MODULE);
        if (!audit_sock)
                audit_panic("cannot initialize netlink socket");
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -153,7 +153,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic)
 
 static int __init kobject_uevent_init(void)
 {
-       uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL);
+       uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL,
+                                           THIS_MODULE);
 
        if (!uevent_sock) {
                printk(KERN_ERR
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -258,7 +258,7 @@ static int __init init(void)
                spin_lock_init(&ulog_buffers[i].lock);
        }
 
-       ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+       ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE);
        if (!ebtulognl)
                ret = -ENOMEM;
        else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -708,7 +708,7 @@ void __init rtnetlink_init(void)
        if (!rta_buf)
                panic("rtnetlink_init: cannot allocate rta_buf\n");
 
-       rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+       rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE);
        if (rtnl == NULL)
                panic("rtnetlink_init: cannot initialize rtnetlink\n");
        netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -138,7 +138,8 @@ static int __init init(void)
 {
        int rv = 0;
 
-       dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk);
+       dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk,
+                                     THIS_MODULE);
        if (dnrmg == NULL) {
                printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
                return -ENOMEM;
@@ -162,6 +163,7 @@ static void __exit fini(void)
 MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
 MODULE_AUTHOR("Steven Whitehouse <[EMAIL PROTECTED]>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -567,7 +567,7 @@ static void nl_fib_input(struct sock *sk
 
 static void nl_fib_lookup_init(void)
 {
-      netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input);
+      netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -685,7 +685,8 @@ init_or_cleanup(int init)
                goto cleanup;
 
        netlink_register_notifier(&ipq_nl_notifier);
-       ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk);
+       ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk,
+                                     THIS_MODULE);
        if (ipqnl == NULL) {
                printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
                goto cleanup_netlink_notifier;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -62,6 +62,7 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <[EMAIL PROTECTED]>");
 MODULE_DESCRIPTION("iptables userspace logging module");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 
 #define ULOG_NL_EVENT          111             /* Harald's favorite number */
 #define ULOG_MAXNLGROUPS       32              /* numer of nlgroups */
@@ -372,7 +373,7 @@ static int __init init(void)
                ulog_buffers[i].timer.data = i;
        }
 
-       nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+       nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE);
        if (!nflognl)
                return -ENOMEM;
 
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -774,7 +774,8 @@ static void tcpdiag_rcv(struct sock *sk,
 
 static int __init tcpdiag_init(void)
 {
-       tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv);
+       tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv,
+                                     THIS_MODULE);
        if (tcpnl == NULL)
                return -ENOMEM;
        return 0;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -676,7 +676,7 @@ init_or_cleanup(int init)
                goto cleanup;
 
        netlink_register_notifier(&ipq_nl_notifier);
-       ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
+       ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE);
        if (ipqnl == NULL) {
                printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
                goto cleanup_netlink_notifier;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -38,6 +38,8 @@
 #include <linux/netfilter/nfnetlink.h>
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <[EMAIL PROTECTED]>");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
@@ -323,7 +325,8 @@ int __init nfnetlink_init(void)
 {
        printk("Netfilter messages via NETLINK v%s.\n", nfversion);
 
-       nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv);
+       nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv,
+                                    THIS_MODULE);
        if (!nfnl) {
                printk(KERN_ERR "cannot initialize nfnetlink!\n");
                return -1;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -13,6 +13,13 @@
  *                               added netlink_proto_exit
  * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <[EMAIL PROTECTED]>
  *                              use nlk_sk, as sk->protinfo is on a diet 8)
+ * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <[EMAIL PROTECTED]>
+ *                              - inc module use count of module that owns
+ *                                the kernel socket in case userspace opens
+ *                                socket of same protocol
+ *                              - statically allocate nl_table
+ *                              - remove all module support, since netlink is
+ *                                mandatory if CONFIG_NET=y these days
  *
  */
 
@@ -92,9 +99,10 @@ struct netlink_table {
        struct nl_pid_hash hash;
        struct hlist_head mc_list;
        unsigned int nl_nonroot;
+       struct proto_ops *p_ops;
 };
 
-static struct netlink_table *nl_table;
+static struct netlink_table nl_table[MAX_LINKS];
 
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
@@ -341,7 +349,20 @@ static int netlink_create(struct socket 
        if (protocol<0 || protocol >= MAX_LINKS)
                return -EPROTONOSUPPORT;
 
-       sock->ops = &netlink_ops;
+       netlink_table_grab();
+       if (!nl_table[protocol].hash.entries) {
+               netlink_table_ungrab();
+#ifdef CONFIG_KMOD
+               /* We do 'best effort'.  If we find a matching module,
+                * it is loaded.  If not, we don't return an error to
+                * allow pure userspace<->userspace communication. -HW */
+               request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
+               netlink_table_grab();
+#endif
+       }
+       netlink_table_ungrab();
+
+       sock->ops = nl_table[protocol].p_ops;
 
        sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
        if (!sk)
@@ -394,6 +415,20 @@ static int netlink_release(struct socket
                                          };
                notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n);
        }       
+
+       /* when this is a kernel socket, we need to remove the owner pointer,
+        * since we don't know whether the module will be dying at any given
+        * point - HW */
+       if (!nlk->pid) {
+               struct proto_ops *p_tmp;
+               netlink_table_grab();
+               p_tmp = nl_table[sk->sk_protocol].p_ops;
+               if (p_tmp != &netlink_ops) {
+                       nl_table[sk->sk_protocol].p_ops = &netlink_ops;
+                       kfree(p_tmp);
+               }
+               netlink_table_ungrab();
+       }
        
        sock_put(sk);
        return 0;
@@ -1023,33 +1058,70 @@ static void netlink_data_ready(struct so
  */
 
 struct sock *
-netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
+netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len),
+                     struct module *module)
 {
+       struct proto_ops *p_ops;
        struct socket *sock;
        struct sock *sk;
 
-       if (!nl_table)
+       if (unit<0 || unit>=MAX_LINKS)
                return NULL;
 
-       if (unit<0 || unit>=MAX_LINKS)
+       /* do a quick check, to make us not go down to netlink_insert()
+        * if protocol already has kernel socket */
+       if (netlink_lookup(unit, 0))
                return NULL;
 
        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
                return NULL;
 
+       if (module) {
+               /* every registering protocol implemented in a module needs
+                * it's own p_ops, since the socket code cannot deal with
+                * module refcounting otherwise
+                * -HW */
+               p_ops = kmalloc(sizeof(*p_ops), GFP_KERNEL);
+               if (!p_ops) {
+                       sk = NULL;
+                       goto out_sock_release;
+               }
+
+               memcpy(p_ops, &netlink_ops, sizeof(*p_ops));
+               p_ops->owner = module;
+       } else
+               p_ops = &netlink_ops;
+
+       netlink_table_grab();
+       nl_table[unit].p_ops = p_ops;
+       netlink_table_ungrab();
+
        if (netlink_create(sock, unit) < 0) {
-               sock_release(sock);
-               return NULL;
+               sk = NULL;
+               goto out_kfree_p_ops;
        }
+
        sk = sock->sk;
        sk->sk_data_ready = netlink_data_ready;
        if (input)
                nlk_sk(sk)->data_ready = input;
 
        if (netlink_insert(sk, 0)) {
-               sock_release(sock);
-               return NULL;
+               sk = NULL;
+               goto out_kfree_p_ops;
        }
+
+       return sk;
+
+out_kfree_p_ops:
+       netlink_table_grab();
+       if (nl_table[unit].p_ops != &netlink_ops) {
+               kfree(nl_table[unit].p_ops);
+               nl_table[unit].p_ops = &netlink_ops;
+       }
+       netlink_table_ungrab();
+out_sock_release:
+       sock_release(sock);
        return sk;
 }
 
@@ -1386,21 +1458,18 @@ static int __init netlink_proto_init(voi
        unsigned int order;
        int err = proto_register(&netlink_proto, 0);
 
-       if (err != 0)
-               goto out;
+       if (err != 0) {
+               panic("NET: cannot register netlink protocol");
+               return err;
+       }
+
+       /* fill in default proto_ops for all protocols */
+       for (i = 0; i < MAX_LINKS; i++)
+               nl_table[i].p_ops = &netlink_ops;
 
        if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
                netlink_skb_parms_too_large();
 
-       nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL);
-       if (!nl_table) {
-enomem:
-               printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n");
-               return -ENOMEM;
-       }
-
-       memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS);
-
        if (num_physpages >= (128 * 1024))
                max = num_physpages >> (21 - PAGE_SHIFT);
        else
@@ -1418,7 +1487,6 @@ enomem:
                        while (i-- > 0)
                                nl_pid_hash_free(nl_table[i].hash.table,
                                                 1 * sizeof(*hash->table));
-                       kfree(nl_table);
                        goto enomem;
                }
                memset(hash->table, 0, 1 * sizeof(*hash->table));
@@ -1434,25 +1502,13 @@ enomem:
 #endif
        /* The netlink device handler may be needed early. */ 
        rtnetlink_init();
-out:
-       return err;
-}
 
-static void __exit netlink_proto_exit(void)
-{
-       sock_unregister(PF_NETLINK);
-       proc_net_remove("netlink");
-       kfree(nl_table);
-       nl_table = NULL;
-       proto_unregister(&netlink_proto);
+       return 0;
+enomem:
+       panic("NET: cannot allocate netlink hash table");
 }
 
 core_initcall(netlink_proto_init);
-module_exit(netlink_proto_exit);
-
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_NETPROTO(PF_NETLINK);
 
 EXPORT_SYMBOL(netlink_ack);
 EXPORT_SYMBOL(netlink_broadcast);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1516,7 +1516,8 @@ static int __init xfrm_user_init(void)
 {
        printk(KERN_INFO "Initializing IPsec netlink socket\n");
 
-       xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
+       xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv,
+                                       THIS_MODULE);
        if (xfrm_nl == NULL)
                return -ENOMEM;
 
@@ -1534,3 +1535,4 @@ static void __exit xfrm_user_exit(void)
 module_init(xfrm_user_init);
 module_exit(xfrm_user_exit);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -103,7 +103,7 @@ void selnl_notify_policyload(u32 seqno)
 
 static int __init selnl_init(void)
 {
-       selnl = netlink_kernel_create(NETLINK_SELINUX, NULL);
+       selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE);
        if (selnl == NULL)
                panic("SELinux:  Cannot create netlink socket.");
        netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);  

Attachment: pgpEYzSLVB0Uc.pgp
Description: PGP signature

Reply via email to