Re: [PATCHv10 2/4] genirq: Provide a snapshot mechanism for interrupt statistics

2024-02-26 Thread Liu Song
 any_count == 0:

  return text;
@@ -45,7 +45,7 @@ def show_irq_desc(prec, irq):
  text += "%*d: " % (prec, irq)
  for cpu in cpus.each_online_cpu():
  if desc['kstat_irqs']:
-count = cpus.per_cpu(desc['kstat_irqs'], cpu)
+count = cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt']
  else:
  count = 0
  text += "%10u" % (count)
@@ -177,7 +177,7 @@ def arm_common_show_interrupts(prec):
  if desc == 0:
  continue
  for cpu in cpus.each_online_cpu():
-    text += "%10u" % (cpus.per_cpu(desc['kstat_irqs'], cpu))
+text += "%10u" % (cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt'])
  text += "  %s" % (ipi_types[ipi].string())
  text += "\n"
  return text

Looks good.

For the newly added struct irqstat, adding annotated comments to explain 
each field would be beneficial.


Reviewed-by: Liu Song 



Re: [PATCHv10 3/4] genirq: Avoid summation loops for /proc/interrupts

2024-02-26 Thread Liu Song



在 2024/2/26 10:09, Bitao Hu 写道:

We could use the irq_desc::tot_count member to avoid the summation
loop for interrupts which are not marked as 'PER_CPU' interrupts in
'show_interrupts'. This could reduce the time overhead of reading
/proc/interrupts.

Originally-by: Thomas Gleixner 
Signed-off-by: Bitao Hu 
---
  include/linux/irqdesc.h | 2 ++
  kernel/irq/irqdesc.c| 2 +-
  kernel/irq/proc.c   | 9 +++--
  3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 2912b1998670..1ee96d7232b4 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -121,6 +121,8 @@ static inline void irq_unlock_sparse(void) { }
  extern struct irq_desc irq_desc[NR_IRQS];
  #endif
  
+extern bool irq_is_nmi(struct irq_desc *desc);

+
  static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc,
  unsigned int cpu)
  {
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9cd17080b2d8..56a767957a9d 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -955,7 +955,7 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
return desc && desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 
0;
  }
  
-static bool irq_is_nmi(struct irq_desc *desc)

+bool irq_is_nmi(struct irq_desc *desc)
  {
return desc->istate & IRQS_NMI;
  }
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6954e0a02047..b3b1b93f0410 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -489,8 +489,13 @@ int show_interrupts(struct seq_file *p, void *v)
goto outsparse;
  
  	if (desc->kstat_irqs) {

-   for_each_online_cpu(j)
-   any_count |= data_race(per_cpu(desc->kstat_irqs->cnt, 
j));
+   if (!irq_settings_is_per_cpu_devid(desc) &&
+   !irq_settings_is_per_cpu(desc) &&
+   !irq_is_nmi(desc))
+   any_count = data_race(desc->tot_count);
+   else
+   for_each_online_cpu(j)
+   any_count |= 
data_race(per_cpu(desc->kstat_irqs->cnt, j));
}
  
  	if ((!desc->action || irq_desc_is_chained(desc)) && !any_count)


The modification borrows from the implementation of |kstat_irqs. Looks 
good.|


|Reviewed-by: Liu Song  |

||



Re: [PATCHv10 4/4] watchdog/softlockup: report the most frequent interrupts

2024-02-27 Thread Liu Song
top_counting_irqs();
+   }
+}
+
  static void report_cpu_status(void)
  {
print_cpustat();
+   print_irq_counts();
  }
  #else
  static inline void update_cpustat(void) { }
  static inline void report_cpu_status(void) { }
+static inline bool need_counting_irqs(void) { return false; }
+static inline void start_counting_irqs(void) { }
+static inline void stop_counting_irqs(void) { }
  #endif
  
  /*

@@ -527,6 +621,18 @@ static int is_softlockup(unsigned long touch_ts,
 unsigned long now)
  {
if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) 
{
+   /*
+* If period_ts has not been updated during a sample_period, 
then
+* in the subsequent few sample_periods, period_ts might also 
not
+* be updated, which could indicate a potential softlockup. In
+* this case, if we suspect the cause of the potential 
softlockup
+* might be interrupt storm, then we need to count the 
interrupts
+* to find which interrupt is storming.
+*/
+   if (time_after_eq(now, period_ts + get_softlockup_thresh() / 
NUM_SAMPLE_PERIODS) &&
+   need_counting_irqs())
+   start_counting_irqs();
+
/* Warn about unreasonable delays. */
if (time_after(now, period_ts + get_softlockup_thresh()))
return now - touch_ts;
@@ -549,6 +655,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, 
softlockup_stop_work);
  static int softlockup_fn(void *data)
  {
update_touch_ts();
+   stop_counting_irqs();
complete(this_cpu_ptr(&softlockup_completion));
  
  	return 0;


Looks good.

Reviewed-by: Liu Song