"Theo de Raadt" <dera...@openbsd.org> writes: > Greg Steuck <gne...@openbsd.org> wrote: > >> Mark Kettenis <mark.kette...@xs4all.nl> writes: >> >> > Anyway, I think you're right in thinking that nvme_intr() needs some >> > belt and suspenders. In nvme_shutdown() we delete the "normal" >> > command queue, but nvme_intr() inconditionally looks at both of them. >> > Now nvme_shutdown() masks the interrupt and nvme_resume() unmasks it >> > only after it re-creates the "normal" command queue. But I think >> > there are scenarios where we can get a spurious interrupt and it would >> > check a queue that isn't there. So I think something like the diff >> > below would make sense. >> > >> > Greg, does this fix your crash? >> >> I applied this on top of -current. Sadly I'm still getting the same >> crashes in sd_buf_done. > > Can you add some printf's to track what happens to sc_q ?
I don't see anything suspicious (at the end) having added the prints to the Mark's patch (immediately below). One observation: sometimes it takes a long time of IO failures (like shell commands yielding "Input/output error") before the crash. Maybe something else is going on? Interrupts not getting reenabled? I don't know how this could lead to the eventual crash as don't understand what's going on at this layer. diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c index a37f0f74587..5da1cb875b9 100644 --- a/sys/dev/ic/nvme.c +++ b/sys/dev/ic/nvme.c @@ -50,6 +50,10 @@ struct cfdriver nvme_cd = { DV_DULL }; +int nvm_debug = 0; + +#define DUMP_SC_Q if (nvm_debug) printf("%s:%d %s %p\n", __func__, __LINE__, DEVNAME(sc), sc->sc_q) + int nvme_ready(struct nvme_softc *, u_int32_t); int nvme_enable(struct nvme_softc *); int nvme_disable(struct nvme_softc *); @@ -372,8 +376,9 @@ nvme_attach(struct nvme_softc *sc) goto free_admin_q; } nccbs = 64; - + DUMP_SC_Q; sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd); + DUMP_SC_Q; if (sc->sc_q == NULL) { printf("%s: unable to allocate io q\n", DEVNAME(sc)); goto disable; @@ -417,13 +422,17 @@ nvme_attach(struct nvme_softc *sc) return (0); free_q: + DUMP_SC_Q; nvme_q_free(sc, sc->sc_q); + sc->sc_q = NULL; + DUMP_SC_Q; disable: nvme_disable(sc); free_ccbs: nvme_ccbs_free(sc, nccbs); free_admin_q: nvme_q_free(sc, sc->sc_admin_q); + sc->sc_admin_q = NULL; return (1); } @@ -446,7 +455,9 @@ nvme_resume(struct nvme_softc *sc) return (1); } + DUMP_SC_Q; sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd); + DUMP_SC_Q; if (sc->sc_q == NULL) { printf("%s: unable to allocate io q\n", DEVNAME(sc)); goto disable; @@ -462,7 +473,10 @@ nvme_resume(struct nvme_softc *sc) return (0); free_q: + DUMP_SC_Q; nvme_q_free(sc, sc->sc_q); + sc->sc_q = NULL; + DUMP_SC_Q; disable: nvme_disable(sc); @@ -524,13 +538,17 @@ nvme_shutdown(struct nvme_softc *sc) { u_int32_t cc, csts; int i; - + nvm_debug = 1; + nvme_write4(sc, NVME_INTMC, 0); + DUMP_SC_Q; if (nvme_q_delete(sc, sc->sc_q) != 0) { printf("%s: unable to delete q, disabling\n", DEVNAME(sc)); goto disable; } + sc->sc_q = NULL; + DUMP_SC_Q; cc = nvme_read4(sc, NVME_CC); CLR(cc, NVME_CC_SHN_MASK); @@ -669,10 +687,12 @@ nvme_scsi_io(struct scsi_xfer *xs, int dir) } if (ISSET(xs->flags, SCSI_POLL)) { + DUMP_SC_Q; nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_io_fill, xs->timeout); return; } + DUMP_SC_Q; nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_io_fill); return; @@ -756,10 +776,12 @@ nvme_scsi_sync(struct scsi_xfer *xs) ccb->ccb_cookie = xs; if (ISSET(xs->flags, SCSI_POLL)) { + DUMP_SC_Q; nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_sync_fill, xs->timeout); return; } + DUMP_SC_Q; nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_sync_fill); } @@ -1551,10 +1573,10 @@ nvme_intr(void *xsc) { struct nvme_softc *sc = xsc; int rv = 0; - - if (nvme_q_complete(sc, sc->sc_q)) + DUMP_SC_Q; + if (sc->sc_q && nvme_q_complete(sc, sc->sc_q)) rv = 1; - if (nvme_q_complete(sc, sc->sc_admin_q)) + if (sc->sc_admin_q && nvme_q_complete(sc, sc->sc_admin_q)) rv = 1; return (rv); wskbd1: disconnecting from wsdisplay0 wskbd1 detached ukbd0 detached uhidev0 detached wsmouse0 detached ums0 detached uhidev1 detached wsmouse1 detached ums1 detached uhidev2 detached fido0 detached uhidev3 detached uhid0 detached uhidev4 detached uhub4 detached uhub3 detached nvme_shutdown:545 nvme0 0xffff8000001dce00 nvme_shutdown:551 nvme0 0x0 nvme_shutdown:545 nvme1 0xffff800000254580 nvme_shutdown:551 nvme1 0x0 xhci3: save state timeout nvme_resume:458 nvme0 0x0 nvme_resume:460 nvme0 0xffff8000003ae880 uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1 uhub1 at usb1 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1 nvme_resume:458 nvme1 0x0 nvme_resume:460 nvme1 0xffff800001184a80 uhub2 at usb2 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1 uhub3 at usb3 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1 uhub4 at uhub3 port 1 configuration 1 interface 0 "Texas Instruments product 0x8442" rev 2.10/1.00 addr 2 uhidev0 at uhub4 port 1 configuration 1 interface 0 "P.I. Engineering PC Keyboard/Mouse to USB Adapter" rev 1.10/3.10 addr 3 uhidev0: iclass 3/1 ukbd0 at uhidev0: 8 variable keys, 6 key codes nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 wakeup event: nvme_intr:1576 nvme1 0xffff800001184a80 unknown wskbd1 at ukbd0 mux 1 wskbd1: connecting to wsdisplay0 uhidev1 at uhub4 port 1 configuration 1 interface 1 "P.I. Engineering PC Keyboard/Mouse to USB Adapter" rev 1.10/3.10 addr 3 uhidev1: iclass 3/1 ums0 at uhidev1: 5 buttons, Z dir wsmouse0 at ums0 mux 0 uhidev2 at uhub4 port 2 configuration 1 interface 0 "Microsoft Microsoft 3-Button Mouse with IntelliEye(TM)" rev 1.10/3.00 addr 4 uhidev2: iclass 3/1 ums1 at uhidev2: 3 buttons, Z dir wsmouse1 at ums1 mux 0 uhidev3 at uhub4 port 3 configuration 1 interface 0 "Yubico Yubico Gnubby (gnubby1)" rev 2.00/0.97 addr 5 uhidev3: iclass 3/0 fido0 at uhidev3: input=64, output=64, feature=0 uvideo0 at uhub4 port 4 configuration 1 interface 0 "Logitech HD Pro Webcam C920" rev 2.00/0.19 addr 6 video0 at uvideo0 uaudio0 at uhub4 port 4 configuration 1 interface 3 "Logitech HD Pro Webcam C920" rev 2.00/0.19 addr 6 uaudio0: class v1, high-speed, sync, channels: 0 play, 2 rec, 2 ctls audio1 at uaudio0 uhidev4 at uhub4 port 5 configuration 1 interface 0 "Texas Instruments product 0x82ff" rev 2.01/1.00 addr 7 uhidev4: iclass 3/0 uhid0 at uhidev4: input=255, output=255, feature=2 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_scsi_io:695 nvme1 0xffff800001184a80 nvme_intr:1576 nvme1 0xffff800001184a80