On Tue, 20 May 2025, Bjorn Helgaas wrote: > From: Jon Pan-Doh <pan...@google.com> > > Spammy devices can flood kernel logs with AER errors and slow/stall > execution. Add per-device ratelimits for AER correctable and non-fatal > uncorrectable errors that use the kernel defaults (10 per 5s). Logging of > fatal errors is not ratelimited. > > There are two AER logging entry points: > > - aer_print_error() is used by DPC and native AER > > - pci_print_aer() is used by GHES and CXL > > The native AER aer_print_error() case includes a loop that may log details > from multiple devices. This is ratelimited such that we log all the > details we find if any of the devices has not hit the ratelimit. If no > such device details are found, we still log the Error Source from the ERR_* > Message, ratelimited by the Root Port or RCEC that received it. > > The DPC aer_print_error() case is not ratelimited, since this only happens > for fatal errors. > > The CXL pci_print_aer() case is ratelimited by the Error Source device. > > The GHES pci_print_aer() case is via aer_recover_work_func(), which > searches for the Error Source device. If the device is not found, there's > no per-device ratelimit, so we use a system-wide ratelimit that covers all > error types (correctable, non-fatal, and fatal). > > Sargun at Meta reported internally that a flood of AER errors causes RCU > CPU stall warnings and CSD-lock warnings. > > Tested using aer-inject[1]. Sent 11 AER errors. Observed 10 errors logged > while AER stats (cat /sys/bus/pci/devices/<dev>/aer_dev_correctable) show > true count of 11. > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/gong.chen/aer-inject.git > > [bhelgaas: commit log, factor out trace_aer_event() and aer_print_rp_info() > changes to previous patches, collect single aer_err_info.ratelimit as union > of ratelimits of all error source devices, don't ratelimit fatal errors, > "aer_report" -> "aer_info"] > Reported-by: Sargun Dhillon <sar...@meta.com> > Signed-off-by: Jon Pan-Doh <pan...@google.com> > Signed-off-by: Bjorn Helgaas <bhelg...@google.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvi...@linux.intel.com> -- i. > --- > drivers/pci/pci.h | 3 +- > drivers/pci/pcie/aer.c | 66 ++++++++++++++++++++++++++++++++++++++---- > drivers/pci/pcie/dpc.c | 1 + > 3 files changed, 64 insertions(+), 6 deletions(-) > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index 705f9ef58acc..65c466279ade 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -593,7 +593,8 @@ struct aer_err_info { > unsigned int id:16; > > unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ > - unsigned int __pad1:5; > + unsigned int ratelimit:1; /* 0=skip, 1=print */ > + unsigned int __pad1:4; > unsigned int multi_error_valid:1; > > unsigned int first_error:5; > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c > index 4f1bff0f000f..f9e684ac7878 100644 > --- a/drivers/pci/pcie/aer.c > +++ b/drivers/pci/pcie/aer.c > @@ -28,6 +28,7 @@ > #include <linux/interrupt.h> > #include <linux/delay.h> > #include <linux/kfifo.h> > +#include <linux/ratelimit.h> > #include <linux/slab.h> > #include <acpi/apei.h> > #include <acpi/ghes.h> > @@ -88,6 +89,10 @@ struct aer_info { > u64 rootport_total_cor_errs; > u64 rootport_total_fatal_errs; > u64 rootport_total_nonfatal_errs; > + > + /* Ratelimits for errors */ > + struct ratelimit_state cor_log_ratelimit; > + struct ratelimit_state uncor_log_ratelimit; > }; > > #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ > @@ -379,6 +384,11 @@ void pci_aer_init(struct pci_dev *dev) > > dev->aer_info = kzalloc(sizeof(*dev->aer_info), GFP_KERNEL); > > + ratelimit_state_init(&dev->aer_info->cor_log_ratelimit, > + DEFAULT_RATELIMIT_INTERVAL, > DEFAULT_RATELIMIT_BURST); > + ratelimit_state_init(&dev->aer_info->uncor_log_ratelimit, > + DEFAULT_RATELIMIT_INTERVAL, > DEFAULT_RATELIMIT_BURST); > + > /* > * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, > * PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event > @@ -672,6 +682,18 @@ static void pci_rootport_aer_stats_incr(struct pci_dev > *pdev, > } > } > > +static int aer_ratelimit(struct pci_dev *dev, unsigned int severity) > +{ > + struct ratelimit_state *ratelimit; > + > + if (severity == AER_CORRECTABLE) > + ratelimit = &dev->aer_info->cor_log_ratelimit; > + else > + ratelimit = &dev->aer_info->uncor_log_ratelimit; > + > + return __ratelimit(ratelimit); > +} > + > static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info) > { > const char **strings; > @@ -715,6 +737,9 @@ void aer_print_error(struct pci_dev *dev, struct > aer_err_info *info) > > pci_dev_aer_stats_incr(dev, info); > > + if (!info->ratelimit) > + return; > + > if (!info->status) { > pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, > (Unregistered Agent ID)\n", > aer_error_severity_string[info->severity]); > @@ -785,6 +810,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, > > pci_dev_aer_stats_incr(dev, &info); > > + if (!aer_ratelimit(dev, info.severity)) > + return; > + > layer = AER_GET_LAYER_ERROR(aer_severity, status); > agent = AER_GET_AGENT(aer_severity, status); > > @@ -815,8 +843,19 @@ EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); > */ > static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) > { > + /* > + * Ratelimit AER log messages. "dev" is either the source > + * identified by the root's Error Source ID or it has an unmasked > + * error logged in its own AER Capability. If any of these devices > + * has not reached its ratelimit, log messages for all of them. > + * Messages are emitted when "e_info->ratelimit" is non-zero. > + * > + * Note that "e_info->ratelimit" was already initialized to 1 for the > + * ERR_FATAL case. > + */ > if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { > e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); > + e_info->ratelimit |= aer_ratelimit(dev, e_info->severity); > e_info->error_dev_num++; > return 0; > } > @@ -914,7 +953,7 @@ static int find_device_iter(struct pci_dev *dev, void > *data) > * e_info->error_dev_num and e_info->dev[], based on the given information. > */ > static bool find_source_device(struct pci_dev *parent, > - struct aer_err_info *e_info) > + struct aer_err_info *e_info) > { > struct pci_dev *dev = parent; > int result; > @@ -1140,9 +1179,10 @@ static void aer_recover_work_func(struct work_struct > *work) > pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, > entry.devfn); > if (!pdev) { > - pr_err("no pci_dev for %04x:%02x:%02x.%x\n", > - entry.domain, entry.bus, > - PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); > + pr_err_ratelimited("%04x:%02x:%02x.%x: no pci_dev > found\n", > + entry.domain, entry.bus, > + PCI_SLOT(entry.devfn), > + PCI_FUNC(entry.devfn)); > continue; > } > pci_print_aer(pdev, entry.severity, entry.regs); > @@ -1283,7 +1323,21 @@ static void aer_isr_one_error_type(struct pci_dev > *root, > bool found; > > found = find_source_device(root, info); > - aer_print_source(root, info, found ? "" : " (no details found"); > + > + /* > + * If we're going to log error messages, we've already set > + * "info->ratelimit" to non-zero (which enables printing) because > + * this is either an ERR_FATAL or we found a device with an error > + * logged in its AER Capability. > + * > + * If we didn't find the Error Source device, at least log the > + * Requester ID from the ERR_* Message received by the Root Port or > + * RCEC, ratelimited by the RP or RCEC. > + */ > + if (info->ratelimit || > + (!found && aer_ratelimit(root, info->severity))) > + aer_print_source(root, info, found ? "" : " (no details found"); > + > if (found) > aer_process_err_devices(info); > } > @@ -1317,12 +1371,14 @@ static void aer_isr_one_error(struct pci_dev *root, > aer_isr_one_error_type(root, &e_info); > } > > + /* Note that messages for ERR_FATAL are never ratelimited */ > if (status & PCI_ERR_ROOT_UNCOR_RCV) { > int fatal = status & PCI_ERR_ROOT_FATAL_RCV; > int multi = status & PCI_ERR_ROOT_MULTI_UNCOR_RCV; > struct aer_err_info e_info = { > .id = ERR_UNCOR_ID(e_src->id), > .severity = fatal ? AER_FATAL : AER_NONFATAL, > + .ratelimit = fatal ? 1 : 0, > .level = KERN_ERR, > .multi_error_valid = multi ? 1 : 0, > }; > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > index 6c98fabdba57..530c5e2cf7e8 100644 > --- a/drivers/pci/pcie/dpc.c > +++ b/drivers/pci/pcie/dpc.c > @@ -271,6 +271,7 @@ void dpc_process_error(struct pci_dev *pdev) > status); > if (dpc_get_aer_uncorrect_severity(pdev, &info) && > aer_get_device_error_info(pdev, &info)) { > + info.ratelimit = 1; /* ERR_FATAL; no ratelimit */ > aer_print_error(pdev, &info); > pci_aer_clear_nonfatal_status(pdev); > pci_aer_clear_fatal_status(pdev); >