On Tue, Sep 07, 2021 at 07:29:37PM +0200, Martin Pieuchot wrote:
> On 07/09/21(Tue) 18:03, Stefan Sperling wrote:
> > On Tue, Sep 07, 2021 at 05:16:52PM +0200, Martin Pieuchot wrote:
> > > On 07/09/21(Tue) 15:48, Stefan Sperling wrote:
> > > > This patch makes iwx(4) resume reliably for me.
> > > > 
> > > > There were missing splnet() calls which leads to an obvious race
> > > > between the interrupt handler and the code which triggers firmware
> > > > loading and then sleeps to wait for confirmation.
> > > > This patch adds the missing splnet().
> > > > 
> > > > However, even with splnet() protection added I need to add the
> > > > following two lines of code to make the patch work reliably:
> > > > 
> > > >         /* wait for the firmware to load */
> > > >         for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) {
> > > >                 err = tsleep_nsec(&sc->sc_uc, 0, "iwxuc", 
> > > > MSEC_TO_NSEC(100));
> > > > +               /* XXX This should not be needed, should it: */
> > > > +               if (err == EWOULDBLOCK && sc->sc_uc.uc_intr)
> > > > +                       err = 0;
> > > >         }
> > > >         if (err || !sc->sc_uc.uc_ok)
> > > >                 printf("%s: could not load firmware, %d\n", 
> > > > DEVNAME(sc), err);
> > > > 
> > > > Which seems odd. I would expect tsleep to return EWOULDBLOCK only when
> > > > the interrupt handler did not already set uc_intr and call wakeup().
> > > 
> > > That suggests the timeout fires before the wakeup(9).
> > 
> > Yes, it does.
> > 
> > But how could uc_intr already be set to 1 in that case?
> 
> Is it set before the timeout fires or before tsleep(9) returns?
> 
> 

As discussed off-list, the interrupt fired between endtsleep() and
sleep_finish(). This means the driver cannot expect that EWOULDBLOCK
is only returned if the interrupt did not fire.

Instead of dancing around a 100msec timeout, do this like iwn(4) does it
and wait for up to 1 second. This avoids the race between sleep_finish()
and the interrupt handler.

ok?

I will fix the splnet() issue separately later.

diff c05ef66598a004c1e173a5d0fd4cdf5403f2ad99 /usr/src (staged changes)
blob - be92ca7dabbc95aa664db41dbcc8806c766d5c9e
blob + cb1fcf19f26a512274ac04a4e42a6389a203d08a
--- sys/dev/pci/if_iwm.c
+++ sys/dev/pci/if_iwm.c
@@ -4145,9 +4145,10 @@ iwm_load_firmware_8000(struct iwm_softc *sc, enum iwm_
 int
 iwm_load_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type)
 {
-       int err, w;
+       int err;
 
        sc->sc_uc.uc_intr = 0;
+       sc->sc_uc.uc_ok = 0;
 
        if (sc->sc_device_family >= IWM_DEVICE_FAMILY_8000)
                err = iwm_load_firmware_8000(sc, ucode_type);
@@ -4158,9 +4159,7 @@ iwm_load_firmware(struct iwm_softc *sc, enum iwm_ucode
                return err;
 
        /* wait for the firmware to load */
-       for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) {
-               err = tsleep_nsec(&sc->sc_uc, 0, "iwmuc", MSEC_TO_NSEC(100));
-       }
+       err = tsleep_nsec(&sc->sc_uc, 0, "iwmuc", SEC_TO_NSEC(1));
        if (err || !sc->sc_uc.uc_ok)
                printf("%s: could not load firmware\n", DEVNAME(sc));
 
blob - 428a299e7a2d859aa68a934d4048d843be1835ce
blob + 39e5ecf5f6435687d6c998f65d69416ee592217d
--- sys/dev/pci/if_iwx.c
+++ sys/dev/pci/if_iwx.c
@@ -3348,9 +3348,10 @@ int
 iwx_load_firmware(struct iwx_softc *sc)
 {
        struct iwx_fw_sects *fws;
-       int err, w;
+       int err;
 
        sc->sc_uc.uc_intr = 0;
+       sc->sc_uc.uc_ok = 0;
 
        fws = &sc->sc_fw.fw_sects[IWX_UCODE_TYPE_REGULAR];
        err = iwx_ctxt_info_init(sc, fws);
@@ -3360,9 +3361,7 @@ iwx_load_firmware(struct iwx_softc *sc)
        }
 
        /* wait for the firmware to load */
-       for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) {
-               err = tsleep_nsec(&sc->sc_uc, 0, "iwxuc", MSEC_TO_NSEC(100));
-       }
+       err = tsleep_nsec(&sc->sc_uc, 0, "iwxuc", SEC_TO_NSEC(1));
        if (err || !sc->sc_uc.uc_ok)
                printf("%s: could not load firmware, %d\n", DEVNAME(sc), err);
 

Reply via email to