On 18.10.2020 11:55, Thomas Gleixner wrote: > Jakub, > > On Sat, Oct 17 2020 at 16:29, Jakub Kicinski wrote: >> On Sat, 17 Oct 2020 15:45:57 +0200 Heiner Kallweit wrote: >>> It turned out that this most of the time isn't safe in certain >>> configurations: >>> - if CONFIG_PREEMPT_RT is set >>> - if command line parameter threadirqs is set >>> >>> Having said that drivers are being switched back to __napi_schedule(), >>> see e.g. patch in [0] and related discussion. I thought about a >>> __napi_schedule version checking dynamically whether interrupts are >>> disabled. But checking e.g. variable force_irqthreads also comes at >>> a cost, so that we may not see a benefit compared to calling >>> local_irq_save/local_irq_restore. > > This does not have to be a variable check. It's trivial enough to make > it a static key. > Pretty cool. I have to admit that I wasn't aware of the jump label mechanism.
>>> If more or less all users have to switch back, then the question is >>> whether we should remove __napi_schedule_irqoff. >>> Instead of touching all users we could make __napi_schedule_irqoff >>> an alias for __napi_schedule for now. >>> >>> [0] https://lkml.org/lkml/2020/10/8/706 >> >> We're effectively calling raise_softirq_irqoff() from IRQ handlers, >> with force_irqthreads == true that's no longer legal. > > Hrmpf, indeed. When force threading was introduced that did not exist. > > The forced threaded handler is always invoked with bottom halfs disabled > and bottom half processing either happens when the handler returns and > the thread wrapper invokes local_bh_enable() or from ksoftirq. As > everything runs in thread context CPU local serialization through > local_bh_disable() is sufficient. > >> Thomas - is the expectation that IRQ handlers never assume they have >> IRQs disabled going forward? We don't have any performance numbers >> but if I'm reading Agner's tables right POPF is 18 cycles on Broadwell. >> Is PUSHF/POPF too cheap to bother? > > It's not only PUSHF/POPF it's PUSHF,CLI -> POPF, but yeah it's pretty > cheap nowadays. But doing the static key change might still be a good > thing. Completely untested patch below. > > Quoting Eric: > >> I have to say I do not understand why we want to defer to a thread the >> hard IRQ that we use in NAPI model. >> >> Whole point of NAPI was to keep hard irq handler very short. > > Right. In case the interrupt handler is doing not much more than > scheduling NAPI then you can request it with IRQF_NO_THREAD, which will > prevent it from being threaded even on RT. > >> We should focus on transferring the NAPI work (potentially disrupting >> ) to a thread context, instead of the very minor hard irq trigger. > > Read about that. I only looked briefly at the patches and wondered why > this has it's own threading mechanism and is not using the irq thread > mechanics. I'll have a closer look in the next days. > > Thanks, > > tglx > --- > --- a/drivers/ide/ide-iops.c > +++ b/drivers/ide/ide-iops.c > @@ -109,7 +109,6 @@ int __ide_wait_stat(ide_drive_t *drive, > ide_hwif_t *hwif = drive->hwif; > const struct ide_tp_ops *tp_ops = hwif->tp_ops; > unsigned long flags; > - bool irqs_threaded = force_irqthreads; > int i; > u8 stat; > > @@ -117,7 +116,7 @@ int __ide_wait_stat(ide_drive_t *drive, > stat = tp_ops->read_status(hwif); > > if (stat & ATA_BUSY) { > - if (!irqs_threaded) { > + if (!force_irqthreads_active()) { > local_save_flags(flags); > local_irq_enable_in_hardirq(); > } > @@ -133,13 +132,13 @@ int __ide_wait_stat(ide_drive_t *drive, > if ((stat & ATA_BUSY) == 0) > break; > > - if (!irqs_threaded) > + if (!force_irqthreads_active()) > local_irq_restore(flags); > *rstat = stat; > return -EBUSY; > } > } > - if (!irqs_threaded) > + if (!force_irqthreads_active()) > local_irq_restore(flags); > } > /* > --- a/drivers/ide/ide-taskfile.c > +++ b/drivers/ide/ide-taskfile.c > @@ -406,7 +406,8 @@ static ide_startstop_t pre_task_out_intr > return startstop; > } > > - if (!force_irqthreads && (drive->dev_flags & IDE_DFLAG_UNMASK) == 0) > + if (!force_irqthreads_active() && > + (drive->dev_flags & IDE_DFLAG_UNMASK) == 0) > local_irq_disable(); > > ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); > --- a/include/linux/interrupt.h > +++ b/include/linux/interrupt.h > @@ -489,12 +489,16 @@ extern int irq_set_irqchip_state(unsigne > > #ifdef CONFIG_IRQ_FORCED_THREADING > # ifdef CONFIG_PREEMPT_RT > -# define force_irqthreads (true) > +static inline bool force_irqthreads_active(void) { return true; } > # else > -extern bool force_irqthreads; > +extern struct static_key_false force_irqthreads_key; > +static inline bool force_irqthreads_active(void) > +{ > + return static_branch_unlikely(&force_irqthreads_key); > +} > # endif > #else > -#define force_irqthreads (0) > +static inline bool force_irqthreads_active(void) { return false; } > #endif > > #ifndef local_softirq_pending > --- a/kernel/irq/manage.c > +++ b/kernel/irq/manage.c > @@ -25,12 +25,14 @@ > #include "internals.h" > > #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT) > -__read_mostly bool force_irqthreads; > -EXPORT_SYMBOL_GPL(force_irqthreads); > +DEFINE_STATIC_KEY_FALSE(force_irqthreads_key); > +#ifdef CONFIG_IDE > +EXPORT_SYMBOL_GPL(force_irqthreads_key); > +#endif > > static int __init setup_forced_irqthreads(char *arg) > { > - force_irqthreads = true; > + static_branch_enable(&force_irqthreads_key); > return 0; > } > early_param("threadirqs", setup_forced_irqthreads); > @@ -1155,8 +1157,8 @@ static int irq_thread(void *data) > irqreturn_t (*handler_fn)(struct irq_desc *desc, > struct irqaction *action); > > - if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, > - &action->thread_flags)) > + if (force_irqthreads_active() && test_bit(IRQTF_FORCED_THREAD, > + &action->thread_flags)) > handler_fn = irq_forced_thread_fn; > else > handler_fn = irq_thread_fn; > @@ -1217,7 +1219,7 @@ EXPORT_SYMBOL_GPL(irq_wake_thread); > > static int irq_setup_forced_threading(struct irqaction *new) > { > - if (!force_irqthreads) > + if (!force_irqthreads_active()) > return 0; > if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) > return 0; > --- a/kernel/softirq.c > +++ b/kernel/softirq.c > @@ -376,7 +376,7 @@ static inline void invoke_softirq(void) > if (ksoftirqd_running(local_softirq_pending())) > return; > > - if (!force_irqthreads) { > + if (!force_irqthreads_active()) { > #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK > /* > * We can safely execute softirq on the current stack if > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -6282,9 +6282,11 @@ void __napi_schedule(struct napi_struct > { > unsigned long flags; > > - local_irq_save(flags); > + if (force_irqthreads_active()) > + local_irq_save(flags); > ____napi_schedule(this_cpu_ptr(&softnet_data), n); > - local_irq_restore(flags); > + if (force_irqthreads_active()) > + local_irq_restore(flags); Question would be whether we want to modify __napi_schedule() or __napi_schedule_irqoff(). This may depend on whether we have calls to __napi_schedule() that require local_irq_save() even if force_irqthreads_active() is false. Not sure about that. At a first glance it should be better to modify __napi_schedule_irqoff(). Only drawback I see is that the name of the function then is a little bit inconsistent. > } > EXPORT_SYMBOL(__napi_schedule); > >