Vaidy, > Create sysfs interface to export data from H_BEST_ENERGY hcall > that can be used by administrative tools on supported pseries > platforms for energy management optimizations. > > /sys/device/system/cpu/pseries_(de)activate_hint_list and > /sys/device/system/cpu/cpuN/pseries_(de)activate_hint will provide > hints for activation and deactivation of cpus respectively. > > Added new driver module > arch/powerpc/platforms/pseries/pseries_energy.c > under new config option CONFIG_PSERIES_ENERGY
Can you provide some documentation on how to use these hints and what format they are provided from sysfs. Looks like two separate interfaces two the same thing (one a comma sep list and 1 per cpu, why do need both?). What is the difference between activate and deactivate, with out me having to read PAPR :-) ?? Other comments below. > > Signed-off-by: Vaidyanathan Srinivasan <sva...@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/hvcall.h | 3 > arch/powerpc/platforms/pseries/Kconfig | 10 + > arch/powerpc/platforms/pseries/Makefile | 1 > arch/powerpc/platforms/pseries/pseries_energy.c | 258 +++++++++++++++++++++ ++ > 4 files changed, 271 insertions(+), 1 deletions(-) > create mode 100644 arch/powerpc/platforms/pseries/pseries_energy.c > > diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvc all.h > index 5119b7d..34b66e0 100644 > --- a/arch/powerpc/include/asm/hvcall.h > +++ b/arch/powerpc/include/asm/hvcall.h > @@ -231,7 +231,8 @@ > #define H_GET_EM_PARMS 0x2B8 > #define H_SET_MPP 0x2D0 > #define H_GET_MPP 0x2D4 > -#define MAX_HCALL_OPCODE H_GET_MPP > +#define H_BEST_ENERGY 0x2F4 > +#define MAX_HCALL_OPCODE H_BEST_ENERGY > > #ifndef __ASSEMBLY__ > > diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/ pseries/Kconfig > index c667f0f..b3dd108 100644 > --- a/arch/powerpc/platforms/pseries/Kconfig > +++ b/arch/powerpc/platforms/pseries/Kconfig > @@ -33,6 +33,16 @@ config PSERIES_MSI > depends on PCI_MSI && EEH > default y > > +config PSERIES_ENERGY Probably need a less generic name. PSERIES_ENERGY_MANAGEMENT? PSERIES_ENERGY_HOTPLUG_HINTS? > + tristate "pseries energy management capabilities driver" > + depends on PPC_PSERIES > + default y > + help > + Provides interface to platform energy management capabilities > + on supported PSERIES platforms. > + Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list > + and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint > + > config SCANLOG > tristate "Scanlog dump interface" > depends on RTAS_PROC && PPC_PSERIES > diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms /pseries/Makefile > index 3dbef30..32ae72e 100644 > --- a/arch/powerpc/platforms/pseries/Makefile > +++ b/arch/powerpc/platforms/pseries/Makefile > @@ -16,6 +16,7 @@ obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_e vent.o eeh_sysfs.o > obj-$(CONFIG_KEXEC) += kexec.o > obj-$(CONFIG_PCI) += pci.o pci_dlpar.o > obj-$(CONFIG_PSERIES_MSI) += msi.o > +obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o > > obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o > obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o > diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/p latforms/pseries/pseries_energy.c > new file mode 100644 > index 0000000..9a936b1 > --- /dev/null > +++ b/arch/powerpc/platforms/pseries/pseries_energy.c > @@ -0,0 +1,258 @@ > +/* > + * POWER platform energy management driver > + * Copyright (C) 2010 IBM Corporation > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * version 2 as published by the Free Software Foundation. > + * > + * This pseries platform device driver provides access to > + * platform energy management capabilities. > + */ > + > +#include <linux/module.h> > +#include <linux/types.h> > +#include <linux/errno.h> > +#include <linux/init.h> > +#include <linux/seq_file.h> > +#include <linux/sysdev.h> > +#include <linux/cpu.h> > +#include <linux/of.h> > +#include <asm/cputhreads.h> > +#include <asm/page.h> > +#include <asm/hvcall.h> > + > + > +#define MODULE_VERS "1.0" Argh, I hate module versions... but this one is less of an issue since it doesn't seem to be being used anyway :-) > +#define MODULE_NAME "pseries_energy" Unused too. > + > +/* Helper Routines to convert between drc_index to cpu numbers */ > + > +static u32 cpu_to_drc_index(int cpu) > +{ > + struct device_node *dn = NULL; > + const int *indexes; > + int i; > + dn = of_find_node_by_path("/cpus"); > + if (dn == NULL) > + goto err; Humm, I not sure this is really needed. If you don't have /cpus you are probably not going to boot. > + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); > + if (indexes == NULL) > + goto err; These checks should probably be moved to module init rather than /sfs read time. If they fail, don't load the module and print a warning. These HCALLS and device-tree entire aren't going to be dynamic. > + /* Convert logical cpu number to core number */ > + i = cpu_core_of_thread(cpu); > + /* > + * The first element indexes[0] is the number of drc_indexes > + * returned in the list. Hence i+1 will get the drc_index > + * corresponding to core number i. > + */ > + WARN_ON(i > indexes[0]); > + return indexes[i + 1]; > +err: > + printk(KERN_WARNING "cpu_to_drc_index(%d) failed", cpu); > + return 0; > +} > + > +static int drc_index_to_cpu(u32 drc_index) > +{ > + struct device_node *dn = NULL; > + const int *indexes; > + int i, cpu; > + dn = of_find_node_by_path("/cpus"); > + if (dn == NULL) > + goto err; same here > + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); > + if (indexes == NULL) > + goto err; > + /* > + * First element in the array is the number of drc_indexes > + * returned. Search through the list to find the matching > + * drc_index and get the core number > + */ > + for (i = 0; i < indexes[0]; i++) { > + if (indexes[i + 1] == drc_index) > + break; > + } > + /* Convert core number to logical cpu number */ > + cpu = cpu_first_thread_of_core(i); > + return cpu; > +err: > + printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index); > + return 0; > +} > + > +/* > + * pseries hypervisor call H_BEST_ENERGY provides hints to OS on > + * preferred logical cpus to activate or deactivate for optimized > + * energy consumption. > + */ > + > +#define FLAGS_MODE1 0x004E200000080E01 > +#define FLAGS_MODE2 0x004E200000080401 > +#define FLAGS_ACTIVATE 0x100 > + > +static ssize_t get_best_energy_list(char *page, int activate) > +{ > + int rc, cnt, i, cpu; > + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; > + unsigned long flags = 0; > + u32 *buf_page; > + char *s = page; > + > + buf_page = (u32 *) get_zeroed_page(GFP_KERNEL); > + if (!buf_page) > + return -ENOMEM; > + > + flags = FLAGS_MODE1; > + if (activate) > + flags |= FLAGS_ACTIVATE; > + > + rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page), > + 0, 0, 0, 0, 0, 0); > + if (rc != H_SUCCESS) { > + free_page((unsigned long) buf_page); > + return -EINVAL; > + } > + > + cnt = retbuf[0]; > + for (i = 0; i < cnt; i++) { > + cpu = drc_index_to_cpu(buf_page[2*i+1]); > + if ((cpu_online(cpu) && !activate) || > + (!cpu_online(cpu) && activate)) > + s += sprintf(s, "%d,", cpu); > + } > + if (s > page) { /* Something to show */ > + s--; /* Suppress last comma */ > + s += sprintf(s, "\n"); > + } > + > + free_page((unsigned long) buf_page); > + return s-page; > +} > + > +static ssize_t get_best_energy_data(struct sys_device *dev, > + char *page, int activate) > +{ > + int rc; > + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; > + unsigned long flags = 0; > + > + flags = FLAGS_MODE2; > + if (activate) > + flags |= FLAGS_ACTIVATE; > + > + rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, > + cpu_to_drc_index(dev->id), > + 0, 0, 0, 0, 0, 0, 0); > + > + if (rc != H_SUCCESS) > + return -EINVAL; > + > + return sprintf(page, "%lu\n", retbuf[1] >> 32); > +} > + > +/* Wrapper functions */ > + > +static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class, > + struct sysdev_class_attribute *attr, char *page) > +{ > + return get_best_energy_list(page, 1); > +} > + > +static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class, > + struct sysdev_class_attribute *attr, char *page) > +{ > + return get_best_energy_list(page, 0); > +} > + > +static ssize_t percpu_activate_hint_show(struct sys_device *dev, > + struct sysdev_attribute *attr, char *page) > +{ > + return get_best_energy_data(dev, page, 1); > +} > + > +static ssize_t percpu_deactivate_hint_show(struct sys_device *dev, > + struct sysdev_attribute *attr, char *page) > +{ > + return get_best_energy_data(dev, page, 0); > +} > + > +/* > + * Create sysfs interface: > + * /sys/devices/system/cpu/pseries_activate_hint_list > + * /sys/devices/system/cpu/pseries_deactivate_hint_list > + * Comma separated list of cpus to activate or deactivate > + * /sys/devices/system/cpu/cpuN/pseries_activate_hint > + * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint > + * Per-cpu value of the hint Do we really need both interfaces? Seems like awk could generate one from the other in userspace? > + */ > + > +struct sysdev_class_attribute attr_cpu_activate_hint_list = > + _SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444, > + cpu_activate_hint_list_show, NULL); > + > +struct sysdev_class_attribute attr_cpu_deactivate_hint_list = > + _SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444, > + cpu_deactivate_hint_list_show, NULL); > + > +struct sysdev_attribute attr_percpu_activate_hint = > + _SYSDEV_ATTR(pseries_activate_hint, 0444, > + percpu_activate_hint_show, NULL); > + > +struct sysdev_attribute attr_percpu_deactivate_hint = > + _SYSDEV_ATTR(pseries_deactivate_hint, 0444, > + percpu_deactivate_hint_show, NULL); > + > +static int __init pseries_energy_init(void) > +{ > + int cpu, err; > + struct sys_device *cpu_sys_dev; > + > + /* Create the sysfs files */ > + err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, > + &attr_cpu_activate_hint_list.attr); > + if (!err) > + err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, > + &attr_cpu_deactivate_hint_list.attr); > + > + for_each_possible_cpu(cpu) { > + cpu_sys_dev = get_cpu_sysdev(cpu); > + err = sysfs_create_file(&cpu_sys_dev->kobj, > + &attr_percpu_activate_hint.attr); > + if (err) > + break; > + err = sysfs_create_file(&cpu_sys_dev->kobj, > + &attr_percpu_deactivate_hint.attr); > + if (err) > + break; > + } > + return err; > + > +} > + > +static void __exit pseries_energy_cleanup(void) > +{ > + int cpu; > + struct sys_device *cpu_sys_dev; > + > + /* Remove the sysfs files */ > + sysfs_remove_file(&cpu_sysdev_class.kset.kobj, > + &attr_cpu_activate_hint_list.attr); > + > + sysfs_remove_file(&cpu_sysdev_class.kset.kobj, > + &attr_cpu_deactivate_hint_list.attr); > + > + for_each_possible_cpu(cpu) { > + cpu_sys_dev = get_cpu_sysdev(cpu); > + sysfs_remove_file(&cpu_sys_dev->kobj, > + &attr_percpu_activate_hint.attr); > + sysfs_remove_file(&cpu_sys_dev->kobj, > + &attr_percpu_deactivate_hint.attr); > + } > +} > + > +module_init(pseries_energy_init); > +module_exit(pseries_energy_cleanup); > +MODULE_DESCRIPTION("Driver for pseries platform energy management"); Needs a less generic description. > +MODULE_AUTHOR("Vaidyanathan Srinivasan"); > +MODULE_LICENSE("GPL"); > > _______________________________________________ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev