Module Name:    src
Committed By:   martin
Date:           Sun Oct 22 06:25:32 UTC 2023

Modified Files:
        src/sys/dev/pci [netbsd-10]: if_rge.c

Log Message:
Pull up following revision(s) (requested by mrg in ticket #434):

        sys/dev/pci/if_rge.c: revision 1.26
        sys/dev/pci/if_rge.c: revision 1.28

rge(4): check for all errors in rx buffer allocation
should fix a crash seen by by Chavdar Ivanov reported on current-users.
move the rx and tx list clean up into their own functions, and call the
rx clean up function from the init function if something fails.  this
should fix a potential leak in this case, and generally frees up memory
that won't be used without a successful init phase again.
slight application of 'static', much more could be done.

rge: properly handle mbuf allocation failures in rx interrupts
several changes that should fix crashes seen after an mbuf
allocation failure:
- create RX ring dma maps with BUS_DMA_ALLOCNOW, so that any
  future bus_dmamap_load*() call will succeed.  this avoids one
  error case in rge_newbuf(), that similar cases in eg wm(4)
  actually call panic for now (i think this idea can be copied
  into wm(4) as well.)
- extract the RX descriptor set into a common function that
  both rge_newbuf() and rge_rxeof() can both use.  it's almost
  identical to the old rge_discard_rxbuf() except it also sets
  the rge_addr (this is needed for the newbuf case.)
- move the bus_dmamap_unload() into rge_newbuf(), so that the
  existing mbuf will remain mapped until a new mbuf is allocated.
  (this part is what should fix crashes seen by wiz and Chavdar,
  as the unload follow by sync is what triggers the assert in
  x86 bus_dma.  without the assert, it will would have shortly
  triggered a page fault.)  remove the assignment to NULL for
  the rxq mbuf pointer, it is required for reload.
- add a couple of missing if_statinc() calls.

tested on amd64 and arm64.


To generate a diff of this commit:
cvs rdiff -u -r1.24.4.2 -r1.24.4.3 src/sys/dev/pci/if_rge.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/pci/if_rge.c
diff -u src/sys/dev/pci/if_rge.c:1.24.4.2 src/sys/dev/pci/if_rge.c:1.24.4.3
--- src/sys/dev/pci/if_rge.c:1.24.4.2	Sat Oct 14 06:59:43 2023
+++ src/sys/dev/pci/if_rge.c	Sun Oct 22 06:25:32 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_rge.c,v 1.24.4.2 2023/10/14 06:59:43 martin Exp $	*/
+/*	$NetBSD: if_rge.c,v 1.24.4.3 2023/10/22 06:25:32 martin Exp $	*/
 /*	$OpenBSD: if_rge.c,v 1.9 2020/12/12 11:48:53 jan Exp $	*/
 
 /*
@@ -18,7 +18,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_rge.c,v 1.24.4.2 2023/10/14 06:59:43 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_rge.c,v 1.24.4.3 2023/10/22 06:25:32 martin Exp $");
 
 #include <sys/types.h>
 
@@ -106,9 +106,10 @@ int		rge_ifmedia_upd(struct ifnet *);
 void		rge_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 int		rge_allocmem(struct rge_softc *);
 int		rge_newbuf(struct rge_softc *, int);
-void		rge_discard_rxbuf(struct rge_softc *, int);
-int		rge_rx_list_init(struct rge_softc *);
-void		rge_tx_list_init(struct rge_softc *);
+static int	rge_rx_list_init(struct rge_softc *);
+static void	rge_rx_list_fini(struct rge_softc *);
+static void	rge_tx_list_init(struct rge_softc *);
+static void	rge_tx_list_fini(struct rge_softc *);
 int		rge_rxeof(struct rge_softc *);
 int		rge_txeof(struct rge_softc *);
 void		rge_reset(struct rge_softc *);
@@ -650,7 +651,7 @@ rge_init(struct ifnet *ifp)
 {
 	struct rge_softc *sc = ifp->if_softc;
 	uint32_t val;
-	int i;
+	unsigned i;
 
 	rge_stop(ifp, 0);
 
@@ -661,11 +662,12 @@ rge_init(struct ifnet *ifp)
 	RGE_WRITE_2(sc, RGE_RXMAXSIZE, RGE_JUMBO_FRAMELEN);
 
 	/* Initialize RX descriptors list. */
-	if (rge_rx_list_init(sc) == ENOBUFS) {
+	int error = rge_rx_list_init(sc);
+	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "init failed: no memory for RX buffers\n");
 		rge_stop(ifp, 1);
-		return (ENOBUFS);
+		return error;
 	}
 
 	/* Initialize TX descriptors. */
@@ -836,7 +838,6 @@ void
 rge_stop(struct ifnet *ifp, int disable)
 {
 	struct rge_softc *sc = ifp->if_softc;
-	int i;
 
 	callout_halt(&sc->sc_timeout, NULL);
 
@@ -867,25 +868,8 @@ rge_stop(struct ifnet *ifp, int disable)
 		sc->rge_head = sc->rge_tail = NULL;
 	}
 
-	/* Free the TX list buffers. */
-	for (i = 0; i < RGE_TX_LIST_CNT; i++) {
-		if (sc->rge_ldata.rge_txq[i].txq_mbuf != NULL) {
-			bus_dmamap_unload(sc->sc_dmat,
-			    sc->rge_ldata.rge_txq[i].txq_dmamap);
-			m_freem(sc->rge_ldata.rge_txq[i].txq_mbuf);
-			sc->rge_ldata.rge_txq[i].txq_mbuf = NULL;
-		}
-	}
-
-	/* Free the RX list buffers. */
-	for (i = 0; i < RGE_RX_LIST_CNT; i++) {
-		if (sc->rge_ldata.rge_rxq[i].rxq_mbuf != NULL) {
-			bus_dmamap_unload(sc->sc_dmat,
-			    sc->rge_ldata.rge_rxq[i].rxq_dmamap);
-			m_freem(sc->rge_ldata.rge_rxq[i].rxq_mbuf);
-			sc->rge_ldata.rge_rxq[i].rxq_mbuf = NULL;
-		}
-	}
+	rge_tx_list_fini(sc);
+	rge_rx_list_fini(sc);
 }
 
 /*
@@ -991,6 +975,9 @@ rge_ifmedia_sts(struct ifnet *ifp, struc
 
 /*
  * Allocate memory for RX/TX rings.
+ *
+ * XXX There is no tear-down for this if it any part fails, so everything
+ * remains allocated.
  */
 int
 rge_allocmem(struct rge_softc *sc)
@@ -1086,10 +1073,13 @@ rge_allocmem(struct rge_softc *sc)
 		return (error);
 	}
 
-	/* Create DMA maps for RX buffers. */
+	/*
+	 * Create DMA maps for RX buffers.  Use BUS_DMA_ALLOCNOW to avoid any
+	 * potential failure in bus_dmamap_load_mbuf() in the RX path.
+	 */
 	for (i = 0; i < RGE_RX_LIST_CNT; i++) {
 		error = bus_dmamap_create(sc->sc_dmat, RGE_JUMBO_FRAMELEN, 1,
-		    RGE_JUMBO_FRAMELEN, 0, 0,
+		    RGE_JUMBO_FRAMELEN, 0, BUS_DMA_ALLOCNOW,
 		    &sc->rge_ldata.rge_rxq[i].rxq_dmamap);
 		if (error) {
 			aprint_error_dev(sc->sc_dev, "can't create DMA map for RX\n");
@@ -1101,15 +1091,39 @@ rge_allocmem(struct rge_softc *sc)
 }
 
 /*
+ * Set an RX descriptor and sync it.
+ */
+static void
+rge_load_rxbuf(struct rge_softc *sc, int idx)
+{
+	struct rge_rx_desc *r = &sc->rge_ldata.rge_rx_list[idx];
+	struct rge_rxq *rxq = &sc->rge_ldata.rge_rxq[idx];
+	bus_dmamap_t rxmap = rxq->rxq_dmamap;
+	uint32_t cmdsts;
+
+	cmdsts = rxmap->dm_segs[0].ds_len | RGE_RDCMDSTS_OWN;
+	if (idx == RGE_RX_LIST_CNT - 1)
+		cmdsts |= RGE_RDCMDSTS_EOR;
+
+	r->hi_qword0.rge_addr = htole64(rxmap->dm_segs[0].ds_addr);
+	r->hi_qword1.rx_qword4.rge_extsts = 0;
+	r->hi_qword1.rx_qword4.rge_cmdsts = htole32(cmdsts);
+
+	bus_dmamap_sync(sc->sc_dmat, sc->rge_ldata.rge_rx_list_map,
+	    idx * sizeof(struct rge_rx_desc), sizeof(struct rge_rx_desc),
+	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+}
+
+/*
  * Initialize the RX descriptor and attach an mbuf cluster.
  */
 int
 rge_newbuf(struct rge_softc *sc, int idx)
 {
 	struct mbuf *m;
-	struct rge_rx_desc *r;
 	struct rge_rxq *rxq;
 	bus_dmamap_t rxmap;
+	int error __diagused;
 
 	m = MCLGETL(NULL, M_DONTWAIT, RGE_JUMBO_FRAMELEN);
 	if (m == NULL)
@@ -1120,66 +1134,37 @@ rge_newbuf(struct rge_softc *sc, int idx
 	rxq = &sc->rge_ldata.rge_rxq[idx];
 	rxmap = rxq->rxq_dmamap;
 
-	if (bus_dmamap_load_mbuf(sc->sc_dmat, rxmap, m, BUS_DMA_NOWAIT))
-		goto out;
+	if (rxq->rxq_mbuf != NULL)
+		bus_dmamap_unload(sc->sc_dmat, rxq->rxq_dmamap);
+
+	/* This map was created with BUS_DMA_ALLOCNOW so should never fail. */
+	error = bus_dmamap_load_mbuf(sc->sc_dmat, rxmap, m, BUS_DMA_NOWAIT);
+	KASSERTMSG(error == 0, "error=%d", error);
 
 	bus_dmamap_sync(sc->sc_dmat, rxmap, 0, rxmap->dm_mapsize,
 	    BUS_DMASYNC_PREREAD);
 
 	/* Map the segments into RX descriptors. */
-	r = &sc->rge_ldata.rge_rx_list[idx];
 
 	rxq->rxq_mbuf = m;
+	rge_load_rxbuf(sc, idx);
 
-	r->hi_qword1.rx_qword4.rge_extsts = 0;
-	r->hi_qword0.rge_addr = htole64(rxmap->dm_segs[0].ds_addr);
-
-	r->hi_qword1.rx_qword4.rge_cmdsts = htole32(rxmap->dm_segs[0].ds_len);
-	if (idx == RGE_RX_LIST_CNT - 1)
-		r->hi_qword1.rx_qword4.rge_cmdsts |= htole32(RGE_RDCMDSTS_EOR);
-
-	r->hi_qword1.rx_qword4.rge_cmdsts |= htole32(RGE_RDCMDSTS_OWN);
-
-	bus_dmamap_sync(sc->sc_dmat, sc->rge_ldata.rge_rx_list_map,
-	    idx * sizeof(struct rge_rx_desc), sizeof(struct rge_rx_desc),
-	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-
-	return (0);
-out:
-	if (m != NULL)
-		m_freem(m);
-	return (ENOMEM);
+	return 0;
 }
 
-void
-rge_discard_rxbuf(struct rge_softc *sc, int idx)
-{
-	struct rge_rx_desc *r;
-
-	r = &sc->rge_ldata.rge_rx_list[idx];
-
-	r->hi_qword1.rx_qword4.rge_cmdsts = htole32(RGE_JUMBO_FRAMELEN);
-	r->hi_qword1.rx_qword4.rge_extsts = 0;
-	if (idx == RGE_RX_LIST_CNT - 1)
-		r->hi_qword1.rx_qword4.rge_cmdsts |= htole32(RGE_RDCMDSTS_EOR);
-	r->hi_qword1.rx_qword4.rge_cmdsts |= htole32(RGE_RDCMDSTS_OWN);
-
-	bus_dmamap_sync(sc->sc_dmat, sc->rge_ldata.rge_rx_list_map,
-	    idx * sizeof(struct rge_rx_desc), sizeof(struct rge_rx_desc),
-	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-}
-
-int
+static int
 rge_rx_list_init(struct rge_softc *sc)
 {
-	int i;
+	unsigned i;
 
 	memset(sc->rge_ldata.rge_rx_list, 0, RGE_RX_LIST_SZ);
 
 	for (i = 0; i < RGE_RX_LIST_CNT; i++) {
 		sc->rge_ldata.rge_rxq[i].rxq_mbuf = NULL;
-		if (rge_newbuf(sc, i) == ENOBUFS)
+		if (rge_newbuf(sc, i) != 0) {
+			rge_rx_list_fini(sc);
 			return (ENOBUFS);
+		}
 	}
 
 	sc->rge_ldata.rge_rxq_prodidx = sc->rge_ldata.rge_rxq_considx = 0;
@@ -1188,10 +1173,26 @@ rge_rx_list_init(struct rge_softc *sc)
 	return (0);
 }
 
-void
+static void
+rge_rx_list_fini(struct rge_softc *sc)
+{
+	unsigned i;
+
+	/* Free the RX list buffers. */
+	for (i = 0; i < RGE_RX_LIST_CNT; i++) {
+		if (sc->rge_ldata.rge_rxq[i].rxq_mbuf != NULL) {
+			bus_dmamap_unload(sc->sc_dmat,
+			    sc->rge_ldata.rge_rxq[i].rxq_dmamap);
+			m_freem(sc->rge_ldata.rge_rxq[i].rxq_mbuf);
+			sc->rge_ldata.rge_rxq[i].rxq_mbuf = NULL;
+		}
+	}
+}
+
+static void
 rge_tx_list_init(struct rge_softc *sc)
 {
-	int i;
+	unsigned i;
 
 	memset(sc->rge_ldata.rge_tx_list, 0, RGE_TX_LIST_SZ);
 
@@ -1205,6 +1206,22 @@ rge_tx_list_init(struct rge_softc *sc)
 	sc->rge_ldata.rge_txq_prodidx = sc->rge_ldata.rge_txq_considx = 0;
 }
 
+static void
+rge_tx_list_fini(struct rge_softc *sc)
+{
+	unsigned i;
+
+	/* Free the TX list buffers. */
+	for (i = 0; i < RGE_TX_LIST_CNT; i++) {
+		if (sc->rge_ldata.rge_txq[i].txq_mbuf != NULL) {
+			bus_dmamap_unload(sc->sc_dmat,
+			    sc->rge_ldata.rge_txq[i].txq_dmamap);
+			m_freem(sc->rge_ldata.rge_txq[i].txq_mbuf);
+			sc->rge_ldata.rge_txq[i].txq_mbuf = NULL;
+		}
+	}
+}
+
 int
 rge_rxeof(struct rge_softc *sc)
 {
@@ -1232,17 +1249,16 @@ rge_rxeof(struct rge_softc *sc)
 		total_len = RGE_RXBYTES(cur_rx);
 		rxq = &sc->rge_ldata.rge_rxq[i];
 		m = rxq->rxq_mbuf;
-		rxq->rxq_mbuf = NULL;
 		rx = 1;
 
-		/* Invalidate the RX mbuf and unload its map. */
+		/* Invalidate the RX mbuf. */
 		bus_dmamap_sync(sc->sc_dmat, rxq->rxq_dmamap, 0,
 		    rxq->rxq_dmamap->dm_mapsize, BUS_DMASYNC_POSTREAD);
-		bus_dmamap_unload(sc->sc_dmat, rxq->rxq_dmamap);
 
 		if ((rxstat & (RGE_RDCMDSTS_SOF | RGE_RDCMDSTS_EOF)) !=
 		    (RGE_RDCMDSTS_SOF | RGE_RDCMDSTS_EOF)) {
-			rge_discard_rxbuf(sc, i);
+			if_statinc(ifp, if_ierrors);
+			rge_load_rxbuf(sc, i);
 			continue;
 		}
 
@@ -1252,11 +1268,11 @@ rge_rxeof(struct rge_softc *sc)
 			 * If this is part of a multi-fragment packet,
 			 * discard all the pieces.
 			 */
-			 if (sc->rge_head != NULL) {
+			if (sc->rge_head != NULL) {
 				m_freem(sc->rge_head);
 				sc->rge_head = sc->rge_tail = NULL;
 			}
-			rge_discard_rxbuf(sc, i);
+			rge_load_rxbuf(sc, i);
 			continue;
 		}
 
@@ -1264,13 +1280,13 @@ rge_rxeof(struct rge_softc *sc)
 		 * If allocating a replacement mbuf fails,
 		 * reload the current one.
 		 */
-
-		if (rge_newbuf(sc, i) == ENOBUFS) {
+		if (rge_newbuf(sc, i) != 0) {
+			if_statinc(ifp, if_iqdrops);
 			if (sc->rge_head != NULL) {
 				m_freem(sc->rge_head);
 				sc->rge_head = sc->rge_tail = NULL;
 			}
-			rge_discard_rxbuf(sc, i);
+			rge_load_rxbuf(sc, i);
 			continue;
 		}
 

Reply via email to