"Theo de Raadt" <dera...@openbsd.org> writes:

> Greg Steuck <gne...@openbsd.org> wrote:
>
>> Mark Kettenis <mark.kette...@xs4all.nl> writes:
>> 
>> > Anyway, I think you're right in thinking that nvme_intr() needs some
>> > belt and suspenders.  In nvme_shutdown() we delete the "normal"
>> > command queue, but nvme_intr() inconditionally looks at both of them.
>> > Now nvme_shutdown() masks the interrupt and nvme_resume() unmasks it
>> > only after it re-creates the "normal" command queue.  But I think
>> > there are scenarios where we can get a spurious interrupt and it would
>> > check a queue that isn't there.  So I think something like the diff
>> > below would make sense.
>> >
>> > Greg, does this fix your crash?
>> 
>> I applied this on top of -current. Sadly I'm still getting the same
>> crashes in sd_buf_done.
>
> Can you add some printf's to track what happens to sc_q ?


I don't see anything suspicious (at the end) having added the prints
to the Mark's patch (immediately below).

One observation: sometimes it takes a long time of IO failures (like
shell commands yielding "Input/output error") before the crash. Maybe
something else is going on? Interrupts not getting reenabled? I
don't know how this could lead to the eventual crash as don't understand
what's going on at this layer.

diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c
index a37f0f74587..5da1cb875b9 100644
--- a/sys/dev/ic/nvme.c
+++ b/sys/dev/ic/nvme.c
@@ -50,6 +50,10 @@ struct cfdriver nvme_cd = {
        DV_DULL
 };
 
+int nvm_debug = 0;
+
+#define DUMP_SC_Q if (nvm_debug) printf("%s:%d %s %p\n", __func__, __LINE__, 
DEVNAME(sc), sc->sc_q)
+
 int    nvme_ready(struct nvme_softc *, u_int32_t);
 int    nvme_enable(struct nvme_softc *);
 int    nvme_disable(struct nvme_softc *);
@@ -372,8 +376,9 @@ nvme_attach(struct nvme_softc *sc)
                goto free_admin_q;
        }
        nccbs = 64;
-
+       DUMP_SC_Q;
        sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
+       DUMP_SC_Q;
        if (sc->sc_q == NULL) {
                printf("%s: unable to allocate io q\n", DEVNAME(sc));
                goto disable;
@@ -417,13 +422,17 @@ nvme_attach(struct nvme_softc *sc)
        return (0);
 
 free_q:
+       DUMP_SC_Q;
        nvme_q_free(sc, sc->sc_q);
+       sc->sc_q = NULL;
+       DUMP_SC_Q;
 disable:
        nvme_disable(sc);
 free_ccbs:
        nvme_ccbs_free(sc, nccbs);
 free_admin_q:
        nvme_q_free(sc, sc->sc_admin_q);
+       sc->sc_admin_q = NULL;
 
        return (1);
 }
@@ -446,7 +455,9 @@ nvme_resume(struct nvme_softc *sc)
                return (1);
        }
 
+       DUMP_SC_Q;
        sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
+       DUMP_SC_Q;
        if (sc->sc_q == NULL) {
                printf("%s: unable to allocate io q\n", DEVNAME(sc));
                goto disable;
@@ -462,7 +473,10 @@ nvme_resume(struct nvme_softc *sc)
        return (0);
 
 free_q:
+       DUMP_SC_Q;
        nvme_q_free(sc, sc->sc_q);
+       sc->sc_q = NULL;
+       DUMP_SC_Q;
 disable:
        nvme_disable(sc);
 
@@ -524,13 +538,17 @@ nvme_shutdown(struct nvme_softc *sc)
 {
        u_int32_t cc, csts;
        int i;
-
+       nvm_debug = 1;
+       
        nvme_write4(sc, NVME_INTMC, 0);
 
+       DUMP_SC_Q;
        if (nvme_q_delete(sc, sc->sc_q) != 0) {
                printf("%s: unable to delete q, disabling\n", DEVNAME(sc));
                goto disable;
        }
+       sc->sc_q = NULL;
+       DUMP_SC_Q;
 
        cc = nvme_read4(sc, NVME_CC);
        CLR(cc, NVME_CC_SHN_MASK);
@@ -669,10 +687,12 @@ nvme_scsi_io(struct scsi_xfer *xs, int dir)
        }
 
        if (ISSET(xs->flags, SCSI_POLL)) {
+               DUMP_SC_Q;
                nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_io_fill, xs->timeout);
                return;
        }
 
+       DUMP_SC_Q;
        nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_io_fill);
        return;
 
@@ -756,10 +776,12 @@ nvme_scsi_sync(struct scsi_xfer *xs)
        ccb->ccb_cookie = xs;
 
        if (ISSET(xs->flags, SCSI_POLL)) {
+               DUMP_SC_Q;
                nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_sync_fill, xs->timeout);
                return;
        }
 
+       DUMP_SC_Q;
        nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_sync_fill);
 }
 
@@ -1551,10 +1573,10 @@ nvme_intr(void *xsc)
 {
        struct nvme_softc *sc = xsc;
        int rv = 0;
-
-       if (nvme_q_complete(sc, sc->sc_q))
+       DUMP_SC_Q;
+       if (sc->sc_q && nvme_q_complete(sc, sc->sc_q))
                rv = 1;
-       if (nvme_q_complete(sc, sc->sc_admin_q))
+       if (sc->sc_admin_q && nvme_q_complete(sc, sc->sc_admin_q))
                rv = 1;
 
        return (rv);


wskbd1: disconnecting from wsdisplay0
wskbd1 detached
ukbd0 detached
uhidev0 detached
wsmouse0 detached
ums0 detached
uhidev1 detached
wsmouse1 detached
ums1 detached
uhidev2 detached
fido0 detached
uhidev3 detached
uhid0 detached
uhidev4 detached
uhub4 detached
uhub3 detached
nvme_shutdown:545 nvme0 0xffff8000001dce00
nvme_shutdown:551 nvme0 0x0
nvme_shutdown:545 nvme1 0xffff800000254580
nvme_shutdown:551 nvme1 0x0
xhci3: save state timeout
nvme_resume:458 nvme0 0x0
nvme_resume:460 nvme0 0xffff8000003ae880
uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 
addr 1
uhub1 at usb1 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 
addr 1
nvme_resume:458 nvme1 0x0
nvme_resume:460 nvme1 0xffff800001184a80
uhub2 at usb2 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 
addr 1
uhub3 at usb3 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 
addr 1
uhub4 at uhub3 port 1 configuration 1 interface 0 "Texas Instruments product 
0x8442" rev 2.10/1.00 addr 2
uhidev0 at uhub4 port 1 configuration 1 interface 0 "P.I. Engineering PC 
Keyboard/Mouse to USB  Adapter" rev 1.10/3.10 addr 3
uhidev0: iclass 3/1
ukbd0 at uhidev0: 8 variable keys, 6 key codes
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
wakeup event: nvme_intr:1576 nvme1 0xffff800001184a80
unknown
wskbd1 at ukbd0 mux 1
wskbd1: connecting to wsdisplay0
uhidev1 at uhub4 port 1 configuration 1 interface 1 "P.I. Engineering PC 
Keyboard/Mouse to USB  Adapter" rev 1.10/3.10 addr 3
uhidev1: iclass 3/1
ums0 at uhidev1: 5 buttons, Z dir
wsmouse0 at ums0 mux 0
uhidev2 at uhub4 port 2 configuration 1 interface 0 "Microsoft Microsoft 
3-Button Mouse with IntelliEye(TM)" rev 1.10/3.00 addr 4
uhidev2: iclass 3/1
ums1 at uhidev2: 3 buttons, Z dir
wsmouse1 at ums1 mux 0
uhidev3 at uhub4 port 3 configuration 1 interface 0 "Yubico Yubico Gnubby 
(gnubby1)" rev 2.00/0.97 addr 5
uhidev3: iclass 3/0
fido0 at uhidev3: input=64, output=64, feature=0
uvideo0 at uhub4 port 4 configuration 1 interface 0 "Logitech HD Pro Webcam 
C920" rev 2.00/0.19 addr 6
video0 at uvideo0
uaudio0 at uhub4 port 4 configuration 1 interface 3 "Logitech HD Pro Webcam 
C920" rev 2.00/0.19 addr 6
uaudio0: class v1, high-speed, sync, channels: 0 play, 2 rec, 2 ctls
audio1 at uaudio0
uhidev4 at uhub4 port 5 configuration 1 interface 0 "Texas Instruments product 
0x82ff" rev 2.01/1.00 addr 7
uhidev4: iclass 3/0
uhid0 at uhidev4: input=255, output=255, feature=2
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_scsi_io:695 nvme1 0xffff800001184a80
nvme_intr:1576 nvme1 0xffff800001184a80

Reply via email to