sepherosa_gmail.com created this revision.
sepherosa_gmail.com added reviewers: network, adrian, delphij, rrs, glebius, 
kmacy, decui_microsoft.com, honzhan_microsoft.com, howard0su_gmail.com.
sepherosa_gmail.com added a subscriber: freebsd-net-list.

REVISION SUMMARY
  Unlike buf_ring_peek, it only supports single consumer mode, and it clears 
the cons_head if DEBUG_BUFRING/INVARIANTS is defined.
  
  The normal use case of drbr_peek for network drivers is:
  
    m = drbr_peek(br);
    err = hw_spec_encap(&m); /* could m_defrag/m_collapse */
    (*)
    if (err) {
        if (m == NULL)
            drbr_advance(br);
        else
            drbr_putback(br, m);
        /* break the loop */
    }
    drbr_advance(br);
  
  The race is:
  If hw_spec_encap() m_defrag or m_collapse the mbuf, i.e. the old mbuf was 
freed, or like the Hyper-V's network driver, that transmission-done does not 
even require the TX lock; then on the other CPU at the (*) time, the freed mbuf 
could be recycled and being drbr_enqueue even before the current CPU had the 
chance to call drbr_{advance,putback}.  This triggers a panic in drbr_enqueue 
duplicated element check, if DEBUG_BUFRING/INVARIANTS is defined.
  
  Use buf_ring_peek_clear_sc() in drbr_peek() to fix the above race.
  
  This change is a NO-OP, if neither DEBUG_BUFRING nor INVARIANTS are defined.

REVISION DETAIL
  https://reviews.freebsd.org/D5416

AFFECTED FILES
  sys/net/ifq.h
  sys/sys/buf_ring.h

CHANGE DETAILS
  diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h
  --- a/sys/sys/buf_ring.h
  +++ b/sys/sys/buf_ring.h
  @@ -268,6 +268,37 @@
        return (br->br_ring[br->br_cons_head]);
   }
   
  +static __inline void *
  +buf_ring_peek_clear_sc(struct buf_ring *br)
  +{
  +#ifdef DEBUG_BUFRING
  +     void *ret;
  +
  +     if (!mtx_owned(br->br_lock))
  +             panic("lock not held on single consumer dequeue");
  +#endif       
  +     /*
  +      * I believe it is safe to not have a memory barrier
  +      * here because we control cons and tail is worst case
  +      * a lagging indicator so we worst case we might
  +      * return NULL immediately after a buffer has been enqueued
  +      */
  +     if (br->br_cons_head == br->br_prod_tail)
  +             return (NULL);
  +
  +#ifdef DEBUG_BUFRING
  +     /*
  +      * Single consumer, i.e. cons_head will not move while we are
  +      * running, so atomic_swap_ptr() is not necessary here.
  +      */
  +     ret = br->br_ring[br->br_cons_head];
  +     br->br_ring[br->br_cons_head] = NULL;
  +     return (ret);
  +#else
  +     return (br->br_ring[br->br_cons_head]);
  +#endif
  +}
  +
   static __inline int
   buf_ring_full(struct buf_ring *br)
   {
  diff --git a/sys/net/ifq.h b/sys/net/ifq.h
  --- a/sys/net/ifq.h
  +++ b/sys/net/ifq.h
  @@ -369,7 +369,7 @@
                return (m);
        }
   #endif
  -     return(buf_ring_peek(br));
  +     return(buf_ring_peek_clear_sc(br));
   }
   
   static __inline void

EMAIL PREFERENCES
  https://reviews.freebsd.org/settings/panel/emailpreferences/

To: sepherosa_gmail.com, network, adrian, delphij, rrs, glebius, kmacy, 
decui_microsoft.com, honzhan_microsoft.com, howard0su_gmail.com
Cc: freebsd-net-list
diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h
--- a/sys/sys/buf_ring.h
+++ b/sys/sys/buf_ring.h
@@ -268,6 +268,37 @@
 	return (br->br_ring[br->br_cons_head]);
 }
 
+static __inline void *
+buf_ring_peek_clear_sc(struct buf_ring *br)
+{
+#ifdef DEBUG_BUFRING
+	void *ret;
+
+	if (!mtx_owned(br->br_lock))
+		panic("lock not held on single consumer dequeue");
+#endif	
+	/*
+	 * I believe it is safe to not have a memory barrier
+	 * here because we control cons and tail is worst case
+	 * a lagging indicator so we worst case we might
+	 * return NULL immediately after a buffer has been enqueued
+	 */
+	if (br->br_cons_head == br->br_prod_tail)
+		return (NULL);
+
+#ifdef DEBUG_BUFRING
+	/*
+	 * Single consumer, i.e. cons_head will not move while we are
+	 * running, so atomic_swap_ptr() is not necessary here.
+	 */
+	ret = br->br_ring[br->br_cons_head];
+	br->br_ring[br->br_cons_head] = NULL;
+	return (ret);
+#else
+	return (br->br_ring[br->br_cons_head]);
+#endif
+}
+
 static __inline int
 buf_ring_full(struct buf_ring *br)
 {
diff --git a/sys/net/ifq.h b/sys/net/ifq.h
--- a/sys/net/ifq.h
+++ b/sys/net/ifq.h
@@ -369,7 +369,7 @@
 		return (m);
 	}
 #endif
-	return(buf_ring_peek(br));
+	return(buf_ring_peek_clear_sc(br));
 }
 
 static __inline void

_______________________________________________
freebsd-net@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to