Patrick McHardy wrote:
> Mattia Dongili wrote:
> 
>>On Mon, Aug 01, 2005 at 04:27:53PM +0200, Patrick McHardy wrote:
>>
>>
>>>>--- include/linux/netfilter_ipv4/ip_conntrack.h.clean       2005-08-01 
>>>>15:09:49.000000000 +0200
>>>>+++ include/linux/netfilter_ipv4/ip_conntrack.h     2005-08-01 
>>>>15:08:52.000000000 +0200
>>>>@@ -298,6 +298,7 @@ static inline struct ip_conntrack *
>>>>ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
>>>>{
>>>>    *ctinfo = skb->nfctinfo;
>>>>+   nf_conntrack_get(skb->nfct);
>>>>    return (struct ip_conntrack *)skb->nfct;
>>>>}
>>>
>>>This creates lots of refcnt leaks, which is probably why it makes the
>>>underflow go away :) Please try this patch instead.
>>
>>
>>this doesn't fix it actually, see dmesg below:
> 
> 
> It looks like ip_ct_iterate_cleanup and ip_conntrack_event_cache_init
> race against each other with assigning pointers and grabbing/putting the
> refcounts if called from different contexts.

This should be a fist step towards fixing it. It's probably incomplete
(I'm too tired to check it now), but it should fix the problem you're
seeing. Could you give it a spin?

BTW, ip_ct_iterate_cleanup can only be called from ipt_MASQUERADE when
a device goes down. It seems a bit odd that this is happending on boot,
is there anything special about your setup?

Regards
Patrick
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h 
b/include/linux/netfilter_ipv4/ip_conntrack.h
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -411,6 +411,7 @@ struct ip_conntrack_stat
 
 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
 #include <linux/notifier.h>
+#include <linux/interrupt.h>
 
 struct ip_conntrack_ecache {
        struct ip_conntrack *ct;
@@ -445,26 +446,25 @@ ip_conntrack_expect_unregister_notifier(
        return notifier_chain_unregister(&ip_conntrack_expect_chain, nb);
 }
 
+extern void ip_conntrack_event_cache_init(struct ip_conntrack *ct);
+extern void 
+ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct);
+
 static inline void 
 ip_conntrack_event_cache(enum ip_conntrack_events event,
                         const struct sk_buff *skb)
 {
-       struct ip_conntrack_ecache *ecache = 
-                                       &__get_cpu_var(ip_conntrack_ecache);
-
-       if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) {
-               if (net_ratelimit()) {
-                       printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n");
-                       dump_stack();
-               }
-       }
+       struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
+       struct ip_conntrack_ecache *ecache;
+       
+       local_bh_disable();
+       ecache = &__get_cpu_var(ip_conntrack_ecache);
+       if (unlikely(ct != ecache->ct))
+               ip_conntrack_event_cache_init(ct);
        ecache->events |= event;
+       local_bh_enable();
 }
 
-extern void 
-ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct);
-extern void ip_conntrack_event_cache_init(const struct sk_buff *skb);
-
 static inline void ip_conntrack_event(enum ip_conntrack_events event,
                                      struct ip_conntrack *ct)
 {
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c 
b/net/ipv4/netfilter/ip_conntrack_core.c
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -100,56 +100,45 @@ void __ip_ct_deliver_cached_events(struc
 
 /* Deliver all cached events for a particular conntrack. This is called
  * by code prior to async packet handling or freeing the skb */
-void 
-ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct)
+void ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct)
 {
-       struct ip_conntrack_ecache *ecache = 
-                                       &__get_cpu_var(ip_conntrack_ecache);
-
-       if (!ct)
-               return;
-
+       struct ip_conntrack_ecache *ecache;
+       
+       local_bh_disable();
+       ecache = &__get_cpu_var(ip_conntrack_ecache);
        if (ecache->ct == ct) {
                DEBUGP("ecache: delivering event for %p\n", ct);
                __deliver_cached_events(ecache);
        } else {
-               if (net_ratelimit())
-                       printk(KERN_WARNING "ecache: want to deliver for %p, "
-                               "but cache has %p\n", ct, ecache->ct);
+               DEBUGP("ecache: already delivered for %p, cache %p",
+                      ct, ecache->ct);
        }
-
        /* signalize that events have already been delivered */
        ecache->ct = NULL;
+       local_bh_enable();
 }
 
 /* Deliver cached events for old pending events, if current conntrack != old */
-void ip_conntrack_event_cache_init(const struct sk_buff *skb)
+void ip_conntrack_event_cache_init(struct ip_conntrack *ct)
 {
-       struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct;
-       struct ip_conntrack_ecache *ecache = 
-                                       &__get_cpu_var(ip_conntrack_ecache);
+       struct ip_conntrack_ecache *ecache;
 
        /* take care of delivering potentially old events */
-       if (ecache->ct != ct) {
-               enum ip_conntrack_info ctinfo;
-               /* we have to check, since at startup the cache is NULL */
-               if (likely(ecache->ct)) {
-                       DEBUGP("ecache: entered for different conntrack: "
-                              "ecache->ct=%p, skb->nfct=%p. delivering "
-                              "events\n", ecache->ct, ct);
-                       __deliver_cached_events(ecache);
-                       ip_conntrack_put(ecache->ct);
-               } else {
-                       DEBUGP("ecache: entered for conntrack %p, "
-                               "cache was clean before\n", ct);
-               }
-
-               /* initialize for this conntrack/packet */
-               ecache->ct = ip_conntrack_get(skb, &ctinfo);
-               /* ecache->events cleared by __deliver_cached_devents() */
+       ecache = &__get_cpu_var(ip_conntrack_ecache);
+       BUG_ON(ecache->ct == ct);
+       if (ecache->ct) {
+               DEBUGP("ecache: entered for different conntrack: "
+                      "ecache->ct=%p, skb->nfct=%p. delivering "
+                      "events\n", ecache->ct, ct);
+               __deliver_cached_events(ecache);
+               ip_conntrack_put(ecache->ct);
        } else {
-               DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
+               DEBUGP("ecache: entered for conntrack %p, "
+                       "cache was clean before\n", ct);
        }
+       /* initialize for this conntrack/packet */
+       ecache->ct = ct;
+       nf_conntrack_get(&ct->ct_general);
 }
 
 #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
@@ -873,8 +862,6 @@ unsigned int ip_conntrack_in(unsigned in
 
        IP_NF_ASSERT((*pskb)->nfct);
 
-       ip_conntrack_event_cache_init(*pskb);
-
        ret = proto->packet(ct, *pskb, ctinfo);
        if (ret < 0) {
                /* Invalid: inverse of the return code tells
@@ -1279,15 +1266,18 @@ ip_ct_iterate_cleanup(int (*iter)(struct
                /* we need to deliver all cached events in order to drop
                 * the reference counts */
                int cpu;
+
+               local_bh_disable();
                for_each_cpu(cpu) {
                        struct ip_conntrack_ecache *ecache = 
                                        &per_cpu(ip_conntrack_ecache, cpu);
                        if (ecache->ct) {
-                               __ip_ct_deliver_cached_events(ecache);
+                               __deliver_cached_events(ecache);
                                ip_conntrack_put(ecache->ct);
                                ecache->ct = NULL;
                        }
                }
+               local_bh_enable();
        }
 #endif
 }
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c 
b/net/ipv4/netfilter/ip_conntrack_standalone.c
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -401,9 +401,13 @@ static unsigned int ip_confirm(unsigned 
                               const struct net_device *out,
                               int (*okfn)(struct sk_buff *))
 {
-       ip_conntrack_event_cache_init(*pskb);
        /* We've seen it coming out the other side: confirm it */
-       return ip_conntrack_confirm(pskb);
+       struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct;
+       unsigned int ret;
+       
+       ret = ip_conntrack_confirm(pskb);
+       ip_conntrack_deliver_cached_events_for(ct);
+       return ret;
 }
 
 static unsigned int ip_conntrack_help(unsigned int hooknum,
@@ -419,7 +423,7 @@ static unsigned int ip_conntrack_help(un
        ct = ip_conntrack_get(*pskb, &ctinfo);
        if (ct && ct->helper) {
                unsigned int ret;
-               ip_conntrack_event_cache_init(*pskb);
+               ip_conntrack_event_cache_init(ct);
                ret = ct->helper->help(pskb, ct, ctinfo);
                if (ret != NF_ACCEPT)
                        return ret;

Reply via email to