Re: [PATCHv10 2/4] genirq: Provide a snapshot mechanism for interrupt statistics
any_count == 0: return text; @@ -45,7 +45,7 @@ def show_irq_desc(prec, irq): text += "%*d: " % (prec, irq) for cpu in cpus.each_online_cpu(): if desc['kstat_irqs']: -count = cpus.per_cpu(desc['kstat_irqs'], cpu) +count = cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt'] else: count = 0 text += "%10u" % (count) @@ -177,7 +177,7 @@ def arm_common_show_interrupts(prec): if desc == 0: continue for cpu in cpus.each_online_cpu(): - text += "%10u" % (cpus.per_cpu(desc['kstat_irqs'], cpu)) +text += "%10u" % (cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt']) text += " %s" % (ipi_types[ipi].string()) text += "\n" return text Looks good. For the newly added struct irqstat, adding annotated comments to explain each field would be beneficial. Reviewed-by: Liu Song
Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts
在 2024/2/26 10:09, Bitao Hu 写道: We could use the irq_desc::tot_count member to avoid the summation loop for interrupts which are not marked as 'PER_CPU' interrupts in 'show_interrupts'. This could reduce the time overhead of reading /proc/interrupts. Originally-by: Thomas Gleixner Signed-off-by: Bitao Hu --- include/linux/irqdesc.h | 2 ++ kernel/irq/irqdesc.c| 2 +- kernel/irq/proc.c | 9 +++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 2912b1998670..1ee96d7232b4 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -121,6 +121,8 @@ static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif +extern bool irq_is_nmi(struct irq_desc *desc); + static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, unsigned int cpu) { diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9cd17080b2d8..56a767957a9d 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -955,7 +955,7 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) return desc && desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } -static bool irq_is_nmi(struct irq_desc *desc) +bool irq_is_nmi(struct irq_desc *desc) { return desc->istate & IRQS_NMI; } diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 6954e0a02047..b3b1b93f0410 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -489,8 +489,13 @@ int show_interrupts(struct seq_file *p, void *v) goto outsparse; if (desc->kstat_irqs) { - for_each_online_cpu(j) - any_count |= data_race(per_cpu(desc->kstat_irqs->cnt, j)); + if (!irq_settings_is_per_cpu_devid(desc) && + !irq_settings_is_per_cpu(desc) && + !irq_is_nmi(desc)) + any_count = data_race(desc->tot_count); + else + for_each_online_cpu(j) + any_count |= data_race(per_cpu(desc->kstat_irqs->cnt, j)); } if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) The modification borrows from the implementation of |kstat_irqs. Looks good.| |Reviewed-by: Liu Song | ||
Re: [PATCHv10 4/4] watchdog/softlockup: report the most frequent interrupts
top_counting_irqs(); + } +} + static void report_cpu_status(void) { print_cpustat(); + print_irq_counts(); } #else static inline void update_cpustat(void) { } static inline void report_cpu_status(void) { } +static inline bool need_counting_irqs(void) { return false; } +static inline void start_counting_irqs(void) { } +static inline void stop_counting_irqs(void) { } #endif /* @@ -527,6 +621,18 @@ static int is_softlockup(unsigned long touch_ts, unsigned long now) { if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) { + /* +* If period_ts has not been updated during a sample_period, then +* in the subsequent few sample_periods, period_ts might also not +* be updated, which could indicate a potential softlockup. In +* this case, if we suspect the cause of the potential softlockup +* might be interrupt storm, then we need to count the interrupts +* to find which interrupt is storming. +*/ + if (time_after_eq(now, period_ts + get_softlockup_thresh() / NUM_SAMPLE_PERIODS) && + need_counting_irqs()) + start_counting_irqs(); + /* Warn about unreasonable delays. */ if (time_after(now, period_ts + get_softlockup_thresh())) return now - touch_ts; @@ -549,6 +655,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); static int softlockup_fn(void *data) { update_touch_ts(); + stop_counting_irqs(); complete(this_cpu_ptr(&softlockup_completion)); return 0; Looks good. Reviewed-by: Liu Song