Kernel prints respective warnings about various EPOW events for
user information/action after parsing EPOW interrupts.Prompting
user to take action depending upon the severity of the event.

At times EPOW reset event warning, such as below could flood
kernel log, over a period of time.

May 25 03:46:34 alp kernel: Non critical power or cooling issue cleared
May 25 03:46:52 alp kernel: Non critical power or cooling issue cleared
May 25 03:53:48 alp kernel: Non critical power or cooling issue cleared
May 25 03:55:46 alp kernel: Non critical power or cooling issue cleared
May 25 03:56:34 alp kernel: Non critical power or cooling issue cleared
May 25 03:59:04 alp kernel: Non critical power or cooling issue cleared
May 25 04:02:01 alp kernel: Non critical power or cooling issue cleared
May 25 04:04:24 alp kernel: Non critical power or cooling issue cleared
May 25 04:07:18 alp kernel: Non critical power or cooling issue cleared
May 25 04:13:04 alp kernel: Non critical power or cooling issue cleared
May 25 04:22:04 alp kernel: Non critical power or cooling issue cleared
May 25 04:22:26 alp kernel: Non critical power or cooling issue cleared
May 25 04:22:36 alp kernel: Non critical power or cooling issue cleared

This patch avoids these multiple EPOW reset warnings by using a boolean
flag. This flag is initialized to false and is set to true upon arrival
of EPOW event. This same flag is checked and reset during EPOW_RESET
scenario to filter out valid EPOW reset events and avoid multiple warning
logs.

Also, merged adjacent pr_err/pr_emerg into single one to reduce
the number of lines printed per warning.

Suggested-by: Vipin K Parashar <vi...@linux.vnet.ibm.com>
[Vipin: edited the changelog]
Cc: Anshuman Khandual <khand...@linux.vnet.ibm.com>
Cc: Anton Blanchard <an...@samba.org>
Cc: Michael Ellerman <m...@ellerman.id.au>
Signed-off-by: Kamalesh Babulal <kamal...@linux.vnet.ibm.com>
---
v3 Changes:
   - Limit warning printed by EPOW RESET event, by guarding it with bool flag.
     Instead of rate limiting all the EPOW events.

v2 Changes:
   - Merged multiple adjacent pr_err/pr_emerg into single line to reduce 
multi-line
     warnings, based on Michael's comments.

 arch/powerpc/platforms/pseries/ras.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index 02e4a17..b30396a 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -40,6 +40,9 @@ static int ras_check_exception_token;
 #define EPOW_SENSOR_TOKEN      9
 #define EPOW_SENSOR_INDEX      0
 
+/* Flag to limit EPOW RESET warning. */
+static bool epow_state;
+
 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
@@ -145,21 +148,27 @@ static void rtas_parse_epow_errlog(struct rtas_error_log 
*log)
 
        switch (action_code) {
        case EPOW_RESET:
-               pr_err("Non critical power or cooling issue cleared");
+               if (epow_state) {
+                       pr_err("Non critical power or cooling issue cleared");
+                       epow_state = false;
+               }
                break;
 
        case EPOW_WARN_COOLING:
-               pr_err("Non critical cooling issue reported by firmware");
-               pr_err("Check RTAS error log for details");
+               pr_err("Non critical cooling issue reported by firmware, "
+                      "Check RTAS error log for details");
+               epow_state = true;
                break;
 
        case EPOW_WARN_POWER:
-               pr_err("Non critical power issue reported by firmware");
-               pr_err("Check RTAS error log for details");
+               pr_err("Non critical power issue reported by firmware, "
+                      "Check RTAS error log for details");
+               epow_state = true;
                break;
 
        case EPOW_SYSTEM_SHUTDOWN:
                handle_system_shutdown(epow_log->event_modifier);
+               epow_state = true;
                break;
 
        case EPOW_SYSTEM_HALT:
@@ -169,9 +178,8 @@ static void rtas_parse_epow_errlog(struct rtas_error_log 
*log)
 
        case EPOW_MAIN_ENCLOSURE:
        case EPOW_POWER_OFF:
-               pr_emerg("Critical power/cooling issue reported by firmware");
-               pr_emerg("Check RTAS error log for details");
-               pr_emerg("Immediate power off");
+               pr_emerg("Critical power/cooling issue reported by firmware, "
+                        "Check RTAS error log for details. Immediate power 
off.");
                emergency_sync();
                kernel_power_off();
                break;
@@ -179,6 +187,7 @@ static void rtas_parse_epow_errlog(struct rtas_error_log 
*log)
        default:
                pr_err("Unknown power/cooling event (action code %d)",
                        action_code);
+               epow_state = true;
        }
 }
 
-- 
2.1.2

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to