> On 30-Apr-2015, at 11:37, Vipin K Parashar <vi...@linux.vnet.ibm.com> wrote: > > This patch adds support for FSP EPOW (Early Power Off Warning) and > DPO (Delayed Power Off) events support for PowerNV platform. EPOW events > are generated by SPCN/FSP due to various critical system conditions that > need system shutdown. Few examples of these conditions are high ambient > temperature or system running on UPS power with low UPS battery. DPO event > is generated in response to admin initiated system shutdown request. > This patch enables host kernel on PowerNV platform to handle OPAL > notifications for these events and initiate system poweroff. Since EPOW > notifications are sent in advance of impending shutdown event and thus this > patch also adds functionality to wait for EPOW condition to return to > normal. If EPOW condition doesn't return to normal in estimated time it > proceeds with graceful system shutdown. System admin can also add host > userspace scripts to perform any specific actions like graceful guest > shutdown upon system poweroff. > > Signed-off-by: Vipin K Parashar <vi...@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/opal-api.h | 30 ++ > arch/powerpc/include/asm/opal.h | 3 +- > arch/powerpc/platforms/powernv/Makefile | 1 + > .../platforms/powernv/opal-poweroff-events.c | 358 +++++++++++++++++++++ > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > 5 files changed, 392 insertions(+), 1 deletion(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-poweroff-events.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index 0321a90..03b3cef 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -730,6 +730,36 @@ struct opal_i2c_request { > __be64 buffer_ra; /* Buffer real address */ > }; > > +/* > + * EPOW status sharing (OPAL and the host) > + * > + * The host will pass on OPAL, a buffer of length OPAL_EPOW_MAX_CLASSES > + * to fetch system wide EPOW status. Each element in the returned buffer > + * will contain bitwise EPOW status for each EPOW sub class. > + */ > + > +/* EPOW types */ > +enum OpalEpow { > + OPAL_EPOW_POWER = 0, /* Power EPOW */ > + OPAL_EPOW_TEMP = 1, /* Temperature EPOW */ > + OPAL_EPOW_COOLING = 2, /* Cooling EPOW */ > + OPAL_MAX_EPOW_CLASSES = 3, /* Max EPOW categories */ > +}; > + > +/* Power EPOW events */ > +enum OpalEpowPower { > + OPAL_EPOW_POWER_UPS = 0x1, /* System on UPS power */ > + OPAL_EPOW_POWER_UPS_LOW = 0x2, /* System on UPS power with low battery*/ > +}; > + > +/* Temperature EPOW events */ > +enum OpalEpowTemp { > + OPAL_EPOW_TEMP_HIGH_AMB = 0x1, /* High ambient temperature */ > + OPAL_EPOW_TEMP_CRIT_AMB = 0x2, /* Critical ambient temperature */ > + OPAL_EPOW_TEMP_HIGH_INT = 0x4, /* High internal temperature */ > + OPAL_EPOW_TEMP_CRIT_INT = 0x8, /* Critical internal temperature */ > +}; > + > #endif /* __ASSEMBLY__ */ > > #endif /* __OPAL_API_H */ > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 042af1a..0777864 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -141,7 +141,6 @@ int64_t opal_pci_fence_phb(uint64_t phb_id); > int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t > data); > int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t > error_type, uint8_t mask_action); > int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t > led_type, uint8_t led_action); > -int64_t opal_get_epow_status(__be64 *status); > int64_t opal_set_system_attention_led(uint8_t led_action); > int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, > __be16 *pci_error_type, __be16 *severity); > @@ -200,6 +199,8 @@ int64_t opal_flash_write(uint64_t id, uint64_t offset, > uint64_t buf, > uint64_t size, uint64_t token); > int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size, > uint64_t token); > +int32_t opal_get_epow_status(__be32 *status, __be32 *num_classes); > +int32_t opal_get_dpo_status(__be32 *timeout); > > /* Internal functions */ > extern int early_init_dt_scan_opal(unsigned long node, const char *uname, > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index 33e44f3..b817bdb 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,6 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o > obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o > obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o > opal-sensor.o > obj-y += opal-msglog.o opal-hmi.o opal-power.o > +obj-y += opal-poweroff-events.o > > obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o > obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o > diff --git a/arch/powerpc/platforms/powernv/opal-poweroff-events.c > b/arch/powerpc/platforms/powernv/opal-poweroff-events.c > new file mode 100644 > index 0000000..9b169e2 > --- /dev/null > +++ b/arch/powerpc/platforms/powernv/opal-poweroff-events.c > @@ -0,0 +1,358 @@ Instead of creating a new file can you merge this functionality with existing code in opal-power.c. It seems to be doing work similar to what you are doing in this code. > +/* > + * PowerNV poweroff events support > + * > + * Copyright 2015 IBM Corp. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#define pr_fmt(fmt) "POWEROFF_EVENT: " fmt > + > +#include <linux/kernel.h> > +#include <linux/spinlock.h> > +#include <linux/timer.h> > +#include <linux/reboot.h> > +#include <asm/opal.h> > +#include <asm/machdep.h> > + > +/* System EPOW status */ > +u32 epow_status[OPAL_MAX_EPOW_CLASSES]; > +int num_epow_classes; > + > +/* EPOW event timer and corresponding locks */ > +static struct timer_list epow_timer; > +static DEFINE_SPINLOCK(epow_timer_spinlock); > + > +/* EPOW, DPO event status values */ > +#define DPO_DETECTED 1 > +#define EPOW_DETECTED 1 Instead of macros for this can you simply return a bool from corresponding functions. > + > +/* EPOW events supported */ > +#define EPOW_POWER_UPS 0 > +#define EPOW_POWER_UPS_LOW 1 > +#define EPOW_TEMP_HIGH_AMB 2 > +#define EPOW_TEMP_CRIT_AMB 3 > +#define EPOW_TEMP_HIGH_INT 4 > +#define EPOW_TEMP_CRIT_INT 5 > +#define MAX_EPOW_EVENTS 6 > + > +/* EPOW events description */ > +static const char * const epow_events_map[] = { > + [EPOW_POWER_UPS] = "UPS", > + [EPOW_POWER_UPS_LOW] = "UPS-low", > + [EPOW_TEMP_HIGH_AMB] = "high-ambient-temp", > + [EPOW_TEMP_CRIT_AMB] = "crit-ambient-temp", > + [EPOW_TEMP_HIGH_INT] = "high-internal-temp", > + [EPOW_TEMP_CRIT_INT] = "crit-internal-temp", > +}; > + > +/* EPOW events timeout values */ > +static int epow_timeout[MAX_EPOW_EVENTS]; > + > +/* > + * TODO: Export various event timeout values via device tree. > + * Zero timeout value for any event suggests that it needs > + * immediate shutdown. > + */ > +#define TIMEOUT_EPOW_POWER_UPS 450 > +#define TIMEOUT_EPOW_TEMP_HIGH_AMB 450 > + > +/* > + * Get various EPOW event timeouts. > + * TODO: For now hardcoding timeout values but they need to be > + * obtained via firmware device-tree. > + */ > +void get_epow_timeouts(void) > +{ > + epow_timeout[EPOW_POWER_UPS] = TIMEOUT_EPOW_POWER_UPS; > + epow_timeout[EPOW_TEMP_HIGH_AMB] = TIMEOUT_EPOW_TEMP_HIGH_AMB; > +} > + Instead of having a separate function function for initialising the array you can use array initialisers. > +/* EPOW poweroff function. */ > +static void epow_poweroff(unsigned long event) > +{ > + pr_info("Powering off system due to %s EPOW event\n", > + epow_events_map[event]); > + orderly_poweroff(true); > +} > + > +/* Start EPOW poweroff timer */ > +static void start_epow_timer(unsigned long event, int32_t timeout) > +{ > + unsigned long flags; > + > + spin_lock_irqsave(&epow_timer_spinlock, flags); > + /* Check for already running epow poweroff timer */ > + if (timer_pending(&epow_timer)) { > + /* Timer for same event */ > + if (epow_timer.data == event) { > + spin_unlock_irqrestore(&epow_timer_spinlock, flags); > + return; > + } > + > + /* Timer with early poweroff timeout */ > + if (epow_timer.expires < (jiffies + timeout * HZ)) { > + event = epow_timer.data; > + spin_unlock_irqrestore(&epow_timer_spinlock, flags); > + pr_info("Poweroff already scheduled for %s EPOW event " > + "with earlier timeout.\n", > + epow_events_map[event]); > + return; > + } > + } > + > + /* Start a new timer/modify existing timer with new timeout value */ > + epow_timer.data = event; > + mod_timer(&epow_timer, jiffies + timeout * HZ); > + spin_unlock_irqrestore(&epow_timer_spinlock, flags); > + pr_info("Scheduled system poweroff due to %s EPOW event " > + "after %d seconds\n", epow_events_map[event], timeout); > +} > + > +/* Stop poweroff timer */ > +static void stop_epow_timer(void) > +{ > + int rc; > + unsigned long flags; > + > + spin_lock_irqsave(&epow_timer_spinlock, flags); > + rc = del_timer(&epow_timer); > + spin_unlock_irqrestore(&epow_timer_spinlock, flags); > + > + if (rc) > + pr_info("Poweroff timer deactivated\n"); > +} > + > +/* Get DPO status */ > +static int get_dpo_status(int32_t *dpo_timeout) > +{ > + int rc; > + __be32 opal_dpo_timeout; > + > + rc = opal_get_dpo_status(&opal_dpo_timeout); > + if (rc == OPAL_WRONG_STATE) { > + *dpo_timeout = 0; > + return 0; > + } > + > + *dpo_timeout = be32_to_cpu(opal_dpo_timeout); > + return DPO_DETECTED; > +} > + > +/* Process DPO event */ > +void process_dpo(void) > +{ > + pr_info("Powering off system due to poweroff request.\n"); > + orderly_poweroff(true); > +} Function too small and can be combined with function opal_dpo_event. > + > +/* Get EPOW status */ > +static int get_epow_status(void) > +{ > + int i; > + bool epow_detected = false; > + > + __be32 opal_epow_status[OPAL_MAX_EPOW_CLASSES]; > + __be32 opal_epow_classes; > + > + opal_epow_classes = cpu_to_be32(OPAL_MAX_EPOW_CLASSES); > + for (i = 0; i < OPAL_MAX_EPOW_CLASSES; i++) > + opal_epow_status[i] = cpu_to_be32(0); 0 is represented same in be or le. So instead use static initialiser for the array. > + > + /* Get EPOW events information from OPAL */ > + opal_get_epow_status(opal_epow_status, &opal_epow_classes); Since you get the number of classes copied to the array back from opal, you may not need to initialise it to zero in the loop above. > + > + /* Copy EPOW status */ > + memset(epow_status, 0, sizeof(epow_status[0] * OPAL_MAX_EPOW_CLASSES); Simple memset(epow_status, 0, sizeof(epow_status)) should have the same effect > + num_epos_classes = be32_to_cpu(opal_epow_classes); > + for (i = 0; i < num_epow_classes; i++) { > + epow_status[i] = be32_to_cpu(opal_epow_status[i]); > + if (epow_status[i]) > + epow_detected = true; > + } > + > + pr_info("EPOW classes supported OPAL = %d, Host = %d " > + "EPOW Status = 0x%x, 0x%x, 0x%x\n", > + num_epow_classes, OPAL_MAX_EPOW_CLASSES, > + epow_status[0], epow_status[1], epow_status[2]); > + > + if (epow_detected) > + return EPOW_DETECTED; > + > + return 0; > +} > + > +/* Process EPOW information */ > +void process_epow(void) > +{ > + int i, timeout = 0, event = -1; > + bool epow_normal = false; > + > + /* Check for EPOW return to normal state */ > + for (i = 0; i < OPAL_MAX_EPOW_CLASSES; i++) { > + if (epow_status[i]) > + break; > + } This is the same check you do in func get_epow_status. Instead refactor the code to do this check only once. > + > + if (i == OPAL_MAX_EPOW_CLASSES) > + epow_normal = true; > + > + /* Cancel any pending shutdown timer due to EPOW normal state.*/ > + if (epow_normal) { > + stop_epow_timer(); > + return; > + } Can merge the two if conditions above to eliminate variable epow_normal. > + > + /* Determine EPOW events and poweroff timeouts */ > + if (epow_status[OPAL_EPOW_POWER] & OPAL_EPOW_POWER_UPS) { > + pr_info("EPOW due to system running on UPS power\n"); > + event = EPOW_POWER_UPS; > + timeout = epow_timeout[EPOW_POWER_UPS]; > + } > + > + if (epow_status[OPAL_EPOW_POWER] & OPAL_EPOW_POWER_UPS_LOW) { > + pr_info("EPOW due to system running on UPS power " > + "with low battery\n > + event = EPOW_POWER_UPS_LOW; > + timeout = epow_timeout[EPOW_POWER_UPS_LOW]; > + } > + > + if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_HIGH_AMB) { > + pr_info("EPOW due to high ambient temperature\n"); > + event = EPOW_TEMP_HIGH_AMB; > + timeout = epow_timeout[EPOW_TEMP_HIGH_AMB]; > + } > + > + if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_CRIT_AMB) { > + pr_info("EPOW due to critical ambient temperature\n"); > + event = EPOW_TEMP_CRIT_AMB; > + timeout = epow_timeout[EPOW_TEMP_CRIT_AMB]; > + } > + > + if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_HIGH_INT) { > + pr_info("EPOW due to high internal temperature\n"); > + event = EPOW_TEMP_HIGH_INT; > + timeout = epow_timeout[EPOW_TEMP_HIGH_INT]; > + } > + > + if (epow_status[OPAL_EPOW_TEMP] & OPAL_EPOW_TEMP_CRIT_INT) { > + pr_info("EPOW due to critical internal temperature\n"); > + event = EPOW_TEMP_CRIT_INT; > + timeout = epow_timeout[EPOW_TEMP_CRIT_INT]; > + } > + > + if (event == -1) { > + pr_err("Unknown EPOW event\n"); > + return; > + } > + > + /* Start EPOW poweroff timer */ > + start_epow_timer(event, timeout); > +} > + > +/* Check for any existing EPOW, DPO events and process them, if existing */ > +static void process_existing_poweroff_events(void) > +{ > + int rc; > + int32_t dpo_timeout; > + > + /* Check for any existing DPO event */ > + rc = get_dpo_status(&dpo_timeout); > + if (rc == DPO_DETECTED) { > + pr_info("Existing DPO event detected\n"); > + process_dpo(); > + return; > + } else > + pr_info("No existing DPO event detected\n"); > + > + /* Check for any existing EPOW event */ > + rc = get_epow_status(); > + if (rc == EPOW_DETECTED) { > + pr_info("Existing EPOW event detected.\n"); > + process_epow(); > + } else > + pr_info("No existing EPOW event detected\n"); > + > +} > + > +/* Platform EPOW message received */ > +static int opal_epow_event(struct notifier_block *nb, > + unsigned long msg_type, void *msg) > +{ > + pr_info("EPOW event received\n"); > + > + /* Get EPOW event details */ > + get_epow_status(); > + > + /* Process EPOW event information */ > + process_epow(); > + > + return 0; > +} > + > + > +/* Platform DPO message received */ > +static int opal_dpo_event(struct notifier_block *nb, > + unsigned long msg_type, void *msg) > +{ > + pr_info("DPO event received.\n"); > + process_dpo(); > + > + return 0; > +} > + > + > +/* OPAL EPOW event notifier block */ > +static struct notifier_block opal_epow_nb = { > + .notifier_call = opal_epow_event, > + .next = NULL, > + .priority = 0, > +}; > + > +/* OPAL DPO event notifier block */ > +static struct notifier_block opal_dpo_nb = { > + .notifier_call = opal_dpo_event, > + .next = NULL, > + .priority = 0, > +}; > + > +/* Poweroff events init */ > +static int opal_poweroff_events_init(void) > +{ > + int ret; > + > + /* Initialize poweroff timer */ > + init_timer(&epow_timer); > + epow_timer.function = epow_poweroff; > + > + /* Get EPOW event timeout values */ > + get_epow_timeouts(); > + > + /* Check for any existing EPOW or DPO events. */ > + process_existing_poweroff_events(); > + > + /* Register EPOW event notifier */ > + ret = opal_message_notifier_register(OPAL_MSG_EPOW, &opal_epow_nb); > + if (ret) { > + pr_err("EPOW event notifier registration failed\n"); > + return ret; > + } Timing issue. You may miss some events that occur between process existing power off event and registering message notifies. Instead first register your notifier and then process any existing events. > + > + /* Register DPO event notifier */ > + ret = opal_message_notifier_register(OPAL_MSG_DPO, &opal_dpo_nb); > + if (ret) { > + pr_err("DPO event notifier registration failed\n"); > + opal_notifier_unregister(&opal_epow_nb); > + return ret; > + } > + > + > + pr_info("OPAL poweroff events support initialized\n"); > + > + return 0; > +} > + > +machine_subsys_initcall(powernv, opal_poweroff_events_init); > diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S > b/arch/powerpc/platforms/powernv/opal-wrappers.S > index a7ade94..5d3c8e3 100644 > --- a/arch/powerpc/platforms/powernv/opal-wrappers.S > +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S > @@ -249,6 +249,7 @@ OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT); > OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); > OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); > OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); > +OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS); > OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); > OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); > OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); > -- > 1.9.3 > > _______________________________________________ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev
_______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev