[PATCH 1/4 v7] AMD64 EDAC: Add multi-domain support
Fix get_node_id to match northbridge IDs from the array of detected ones, allowing multi-server support such as with Numascale's NumaConnect, renaming to 'amd_get_node_id' for consistency. v7: Refactor patches grouping changes Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h | 17 + drivers/edac/amd64_edac.c |6 +++--- drivers/edac/amd64_edac.h |6 -- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..417eb24 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } +static inline u16 amd_get_node_id(struct pci_dev *pdev) +{ + struct pci_dev *misc; + int i; + + for (i = 0; i != amd_nb_num(); i++) { + misc = node_to_amd_nb(i)->misc; + + if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; + } + + WARN(1, "Unable to find AMD Northbridge identifier for %s\n", pci_name(pdev)); + return 0; +} + #else #define amd_nb_num(x) 0 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cc8e7c7..9ba70a5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2546,7 +2546,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2637,7 +2637,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2687,7 +2687,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8c41396..cecd0c4 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -290,12 +290,6 @@ /* MSRs */ #define MSR_MCGCTL_NBE BIT(4) -/* AMD sets the first MC device at device ID 0x18. */ -static inline u8 get_node_id(struct pci_dev *pdev) -{ - return PCI_SLOT(pdev->devfn) - 0x18; -} - enum amd_families { K8_CPUS = 0, F10_CPUS, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/4 v7] AMD64 EDAC: Consistently use u16 for northbridge IDs in amd_get_nb_id
Change amd_get_nb_id to return u16 to support >255 memory controllers, and related consistency fixes. v7: Refactor patches grouping changes Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/processor.h |2 +- arch/x86/kernel/cpu/amd.c|4 ++-- drivers/edac/amd64_edac.c|5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ad1fc85..eb3ba58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -extern int amd_get_nb_id(int cpu); +extern u16 amd_get_nb_id(int cpu); struct aperfmperf { u64 aperf, mperf; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1b7d165..2e298e9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } -int amd_get_nb_id(int cpu) +u16 amd_get_nb_id(int cpu) { - int id = 0; + u16 id = 0; #ifdef CONFIG_SMP id = per_cpu(cpu_llc_id, cpu); #endif diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9ba70a5..60e93fa 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -2253,7 +2254,7 @@ static int init_csrows(struct mem_ctl_info *mci) } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) { int cpu; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/4 v7] AMD64 EDAC: Fix PCI function lookup
Fix locating sibling memory controller PCI functions by using the correct PCI domain. v7: Refactor patches grouping changes Signed-off-by: Daniel J Blueman --- drivers/edac/amd64_edac.c | 40 +--- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 60e93fa..62b7b17 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -983,6 +983,24 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + dev = pci_get_device(vendor, device, dev); + while (dev) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + dev = pci_get_device(vendor, device, dev); + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1002,11 +1020,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) /* Factor in CC6 save area by reading dst node's limit reg */ if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; + struct pci_dev *misc, *f1 = NULL; u8 nid = dram_dst_node(pvt, range); u32 llim; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); + misc = node_to_amd_nb(nid)->misc; + f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc); if (WARN_ON(!f1)) return; @@ -1713,23 +1732,6 @@ static struct amd64_family_type amd64_family_types[] = { }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/4 v7] AMD64 EDAC: Fix type usage in NB IDs and memory ranges
Use appropriate types for northbridge IDs and memory ranges. v7: Refactor patches grouping changes Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h |2 +- drivers/edac/amd64_edac.c | 20 ++-- drivers/edac/amd64_edac.h |6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 417eb24..d2e703b 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline struct amd_northbridge *node_to_amd_nb(u16 node) { return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 62b7b17..b27412a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * DRAM base/limit associated with node_id */ static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) + u8 nid) { u64 addr; @@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -1348,7 +1348,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1399,7 +1399,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, * checks if the csrow passed in is marked as SPARED, if so returns the new * spare row */ -static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) +static int f10_process_possible_spare(struct amd64_pvt *pvt, u16 dct, int csrow) { int tmp_cs; @@ -1424,7 +1424,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -2266,7 +2266,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; @@ -2299,7 +2299,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2337,7 +2337,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2389,7 +2389,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2428,7 +2428,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index cecd0c4..a558084 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -332,7 +332,7 @@ struct amd64_pvt { /* pci_device handles which we utilize */ struct pci_dev *F1, *F2, *F3; - unsigned mc_node_id;/* MC index of this MC node */ + u16 mc_node_id; /* MC index of this MC node */ int ext_model; /* extended model value of this node */ int channel_coun
switcheroo registration vs switching race...
-codec: out of range cmd 0:0:5:707:fffc hda-codec: out of range cmd 0:0:5:707:ffbf hda-codec: out of range cmd 0:0:5:707: hda-codec: out of range cmd 0:0:5:707:fffc hda-codec: out of range cmd 0:0:5:707: hda-codec: out of range cmd 0:0:5:707:fffc hda-codec: out of range cmd 0:0:5:707:ffbf hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707:ffbf hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707:ffbf hda-codec: out of range cmd 0:0:4:707: hda-codec: out of range cmd 0:0:4:707:fffc hda-codec: out of range cmd 0:0:4:707: hda-codec: out of range cmd 0:0:4:707:fffc azx_single_send_cmd: 179 callbacks suppressed hda-codec: out of range cmd 0:0:4:707:ffbf hda-codec: out of range cmd 0:0:4:707: hda-codec: out of range cmd 0:0:4:707:fffc hda-codec: out of range cmd 0:0:4:707: hda-codec: out of range cmd 0:0:4:707:fffc hda-codec: out of range cmd 0:0:4:707:ffbf hda-codec: out of range cmd 0:0:4:707: hda-codec: out of range cmd 0:0:4:707:fffc hda-codec: out of range cmd 0:0:4:707: hda-codec: out of range cmd 0:0:4:707:fffc hda-codec: out of range cmd 0:0:4:707:ffbf hda-codec: out of range cmd 0:0:5:707: hda-codec: out of range cmd 0:0:5:707:fffc hda-codec: out of range cmd 0:0:5:707: hda-codec: out of range cmd 0:0:5:707:fffc hda-codec: out of range cmd 0:0:5:707:ffbf hda-codec: out of range cmd 0:0:5:707: hda-codec: out of range cmd 0:0:5:707:fffc hda-codec: out of range cmd 0:0:5:707: hda-codec: out of range cmd 0:0:5:707:fffc hda-codec: out of range cmd 0:0:5:707:ffbf hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707:ffbf hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707: hda-codec: out of range cmd 0:0:7:707:fffc hda-codec: out of range cmd 0:0:7:707:ffbf -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 RESEND] Add NumaChip remote PCI support
Hi Bjorn, On 29/11/2012 07:08, Bjorn Helgaas wrote: On Wed, Nov 21, 2012 at 1:39 AM, Daniel J Blueman wrote: Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but preventing access to AMD Northbridges which shouldn't respond. v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/numachip/numachip.h | 20 + arch/x86/kernel/apic/apic_numachip.c |2 + arch/x86/pci/Makefile|1 + arch/x86/pci/numachip.c | 134 ++ 4 files changed, 157 insertions(+) create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/pci/numachip.c diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h new file mode 100644 index 000..d35e71a --- /dev/null +++ b/arch/x86/include/asm/numachip/numachip.h @@ -0,0 +1,20 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific header file + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + */ + +#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H +#define _ASM_X86_NUMACHIP_NUMACHIP_H + +extern int __init pci_numachip_init(void); + +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */ + diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a65829a..9c2aa89 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -179,6 +180,7 @@ static int __init numachip_system_init(void) return 0; x86_cpuinit.fixup_cpu_id = fixup_cpu_id; + x86_init.pci.arch_init = pci_numachip_init; map_csrs(); diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 3af5a1e..ee0af58 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11) += sta2x11-fixup.o obj-$(CONFIG_X86_VISWS)+= visws.o obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o +obj-$(CONFIG_X86_NUMACHIP) += numachip.o It looks like this depends on CONFIG_PCI_MMCONFIG for pci_mmconfig_lookup(). Are there config constraints that force CONFIG_PCI_MMCONFIG=y when CONFIG_X86_NUMACHIP=y? I'll revise the patch with this constraint after we work out the best approach for below. obj-$(CONFIG_X86_INTEL_MID)+= mrst.o diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c new file mode 100644 index 000..3773e05 --- /dev/null +++ b/arch/x86/pci/numachip.c @@ -0,0 +1,129 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific PCI code + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + * PCI accessor functions derived from mmconfig_64.c + * + */ + +#include +#include + +static u8 limit __read_mostly; + +static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +{ + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; +} Most of this file is copied directly from mmconfig_64.c (as you mentioned above). I wonder if we could avoid the code duplication by making the pci_dev_base() implementation in mmconfig_64.c a weak definition. Then you could just supply a non-weak pci_dev_base() here that would override that default version. Your version would look something like: char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); if (cfg && cfg->virt && devfn < limit) return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); return NULL; } That would be different from what you have in this patch because reads & writes to devices above "limit" would return -EINVAL rather than 0 as you do here. Would that be a problem? That would work nicely (pointer lookup and inlining etc aside) if there was the runtime ability to override pci_dev_base only if the NumaChip signature was detected. We could expose pci_dev_base via struct x86_init_pci; the extra complexity and performance tradeoff may not be worth it for a single case perhaps? Thanks, Daniel -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel&q
[PATCH 3/4 v8] AMD64 EDAC: Fix PCI function lookup
Fix locating sibling memory controller PCI functions by using the correct PCI domain and use Northbridge only if found. Tested on multi-socket server and multi-server, multi-socket NumaConnect setup. v7: Refactor patches grouping changes v8: Restructure searching for PCI function for clarity; use Northbridge only if found Signed-off-by: Daniel J Blueman --- drivers/edac/amd64_edac.c | 43 --- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 60e93fa..6c1005f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -983,6 +983,22 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + while ((dev = pci_get_device(vendor, device, dev))) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1002,11 +1018,17 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) /* Factor in CC6 save area by reading dst node's limit reg */ if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; + struct pci_dev *misc, *f1 = NULL; u8 nid = dram_dst_node(pvt, range); + struct amd_northbridge *nb = node_to_amd_nb(nid); u32 llim; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); + /* If DRAM base/limit registers point to a non-AMD device, nb won't have been found */ + if (!nb) + return; + + misc = nb->misc; + f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc); if (WARN_ON(!f1)) return; @@ -1713,23 +1735,6 @@ static struct amd64_family_type amd64_family_types[] = { }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges
Use appropriate types for northbridge IDs and memory ranges. Mark immutable data const and keep within compilation unit on related structures. Tested on multi-socket server and multi-server, multi-socket NumaConnect setup. v7: Refactor patches grouping changes v8: Drop unneeded change; use const and static where appropriate Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h |2 +- drivers/edac/amd64_edac.c | 26 +- drivers/edac/amd64_edac.h |6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 417eb24..d2e703b 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline struct amd_northbridge *node_to_amd_nb(u16 node) { return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 6c1005f..30149e4 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -31,7 +31,7 @@ static struct ecc_settings **ecc_stngs; * *FIXME: Produce a better mapping/linearisation. */ -struct scrubrate { +static const struct scrubrate { u32 scrubval; /* bit pattern for scrub rate */ u32 bandwidth; /* bandwidth consumed (bytes/sec) */ } scrubrates[] = { @@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * DRAM base/limit associated with node_id */ static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) + u8 nid) { u64 addr; @@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -1351,7 +1351,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1427,7 +1427,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -1742,7 +1742,7 @@ static struct amd64_family_type amd64_family_types[] = { * * Algorithm courtesy of Ross LaFetra from AMD. */ -static u16 x4_vectors[] = { +static const u16 x4_vectors[] = { 0x2f57, 0x1afe, 0x66cc, 0xdd88, 0x11eb, 0x3396, 0x7f4c, 0xeac8, 0x0001, 0x0002, 0x0004, 0x0008, @@ -1781,7 +1781,7 @@ static u16 x4_vectors[] = { 0x19a9, 0x2efe, 0xb5cc, 0x6f88, }; -static u16 x8_vectors[] = { +static const u16 x8_vectors[] = { 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, @@ -1803,7 +1803,7 @@ static u16 x8_vectors[] = { 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, }; -static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs, +static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs, unsigned v_dim) { unsigned int i, err_sym; @@ -2269,7 +2269,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; @@ -2302,7 +2302,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2340,7 +2340,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2392,7 +2392,7 @@
Re: [PATCH, resubmit] ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver
Hi Freddy, > Michael, could you give me more information about how do you test this driver? > I have tried to reproduce the issue by using "ifconfig ethX mtu 1500", but I > didn't confront the same issue. > Thank you in advance for your help. I found the same by just starting with 'ifconfig eth0 1500' and testing as high as 4000; pinging another host with a large payload of size mtu-40 starts failing; after ~30s, I see the transmit time out trace [1]. Of course, a default MTU size of 1500 is essential to avoid fragmentation issues, so should be fixed too. Jumbo frames support is essential these days too. Thanks, Daniel --- [1] usb 4-1: new SuperSpeed USB device number 3 using xhci_hcd ax88179_178a 4-1:1.0 eth0: register 'ax88179_178a' at usb-:00:14.0-1, ASIX AX88179 USB 3.0 Gigibit Ethernet, 00:0a:cd:21:46:a7 ax88179_178a 4-1:1.0 eth2: ax88179 - Link status is: 1 [ cut here ] WARNING: at net/sched/sch_generic.c:254 dev_watchdog+0x26b/0x280() Hardware name: MacBookPro10,1 NETDEV WATCHDOG: eth2 (ax88179_178a): transmit queue 0 timed out Modules linked in: fuse snd_hda_codec_hdmi snd_hda_codec_cirrus joydev hid_apple bcm5974 coretemp kvm_intel kvm ghash_clmulni_intel b43 ssb ax88179_178a usbnet mii uvcvideo videobuf2_core videobuf2_vmalloc videobuf2_memops applesmc input_polldev microcode bcma bnep rfcomm lpc_ich mfd_core snd_hda_intel snd_hda_codec snd_hwdep snd_pcm nouveau apple_gmux snd_timer i915 ttm drm_kms_helper snd hwmon binfmt_misc mxm_wmi snd_page_alloc video apple_bl nls_iso8859_1 Pid: 0, comm: swapper/0 Not tainted 3.8.0-rc6-expert+ #2 Call Trace: [] ? dev_watchdog+0x250/0x280 [] warn_slowpath_common+0x7a/0xb0 [] warn_slowpath_fmt+0x41/0x50 [] dev_watchdog+0x26b/0x280 [] ? pfifo_fast_dequeue+0xe0/0xe0 [] call_timer_fn+0x74/0xf0 [] ? usleep_range+0x40/0x40 [] ? pfifo_fast_dequeue+0xe0/0xe0 [] run_timer_softirq+0x18b/0x220 [] __do_softirq+0xc2/0x180 [] ? tick_program_event+0x1f/0x30 [] ? read_measured_perf_ctrs+0x70/0x70 [] call_softirq+0x1c/0x30 [] do_softirq+0x7d/0xb0 [] irq_exit+0x9e/0xc0 [] smp_apic_timer_interrupt+0x69/0xa0 [] apic_timer_interrupt+0x6c/0x80 [] ? get_next_timer_interrupt+0x1c4/0x290 [] ? cpuidle_wrap_enter+0x50/0x90 [] ? cpuidle_wrap_enter+0x4c/0x90 [] cpuidle_enter_tk+0x10/0x20 [] cpuidle_idle_call+0x7c/0x110 [] cpu_idle+0x7a/0xf0 [] rest_init+0x144/0x150 [] ? csum_partial_copy_generic+0x170/0x170 [] ? efi_free_boot_services+0x53/0x58 [] start_kernel+0x359/0x366 [] ? repair_env_string+0x5e/0x5e [] x86_64_start_reservations+0x131/0x135 [] ? early_idt_handlers+0x120/0x120 [] x86_64_start_kernel+0xd3/0xd7 ---[ end trace 58c12634a365560a ]--- -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
3.8-rc6: nouveau lockdep recursive lock acquisition
[ DRM] 3: core 1000MHz shader 2000MHz memory 1080MHz voltage 630mV nouveau [ DRM] 4: core 1254MHz shader 2508MHz memory 1080MHz voltage 630mV nouveau [ DRM] c: nouveau [ DRM] MM: using COPY for buffer copies nouveau :01:00.0: No connectors reported connected with modes [drm] Cannot find any crtc or sizes - going 1024x768 nouveau [ DRM] allocated 1024x768 fb: 0x8, bo 88025b966800 nouveau :01:00.0: fb1: nouveaufb frame buffer device [drm] Initialized nouveau 1.1.0 20120801 for :01:00.0 on minor 1 snd_hda_intel :01:00.1: enabling device ( -> 0002) hda-intel :01:00.1: Handle VGA-switcheroo audio client snd_hda_intel :01:00.1: irq 49 for MSI/MSI-X input: HDA NVidia HDMI/DP,pcm=8 as /devices/pci:00/:00:01.0/:01:00.1/sound/card1/input11 input: HDA NVidia HDMI/DP,pcm=7 as /devices/pci:00/:00:01.0/:01:00.1/sound/card1/input12 input: HDA NVidia HDMI/DP,pcm=3 as /devices/pci:00/:00:01.0/:01:00.1/sound/card1/input13 hda-intel :01:00.1: Disabling via VGA-switcheroo VGA switcheroo: switched nouveau off nouveau [ DRM] suspending fbcon... nouveau [ DRM] suspending display... nouveau [ DRM] unpinning framebuffer(s)... nouveau [ DRM] evicting buffers... nouveau [ DRM] suspending client object trees... nouveau E[ I2C][:01:00.0] AUXCH(3): begin idle timeout 0x nouveau E[ I2C][:01:00.0] AUXCH(2): begin idle timeout 0x nouveau E[ I2C][:01:00.0] AUXCH(1): begin idle timeout 0x applesmc: light sensor data length set to 10 nouveau E[ I2C][:01:00.0] AUXCH(1): begin idle timeout 0x nouveau E[ I2C][:01:00.0] AUXCH(3): begin idle timeout 0x nouveau E[ I2C][:01:00.0] AUXCH(2): begin idle timeout 0x -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
3.8-rc1 patch_cirrus 4.0 regression...
Hi Takashi, The v3.8-rc kernels have regressed from v3.7 with the quad-speaker arrangement on my Macbook Pro 10,1 - only the higher-frequency speakers work despite the front and rear channels being exposed in the mixer. Reverting f37bc7 [1] restored the correct behaviour ([2] was reverted to compile, but the problem still occurs with only [2] reverted). How can I help to debug this and find the right approach? Thanks, Daniel --- [1] commit f37bc7a88d374448a1f4bba9267d308606d78bf2 Author: Takashi Iwai Date: Thu Nov 8 15:59:23 2012 +0100 ALSA: hda - Give standard "Bass Speaker" mixer for 2.1 speakers When two built-in speakers are found on the machine, we can suppose it's rather a 2.1 speaker system with a bass output instead of front/surround channels. Signed-off-by: Takashi Iwai --- [2] commit ee81abb623cb5e03c182d16871bb4fb34fdc9b4f Author: Takashi Iwai Date: Thu Nov 8 17:12:10 2012 +0100 ALSA: hda - Apply a proper chmap for built-in 2.1 speakers When 2.1 speakers are detected, use the corresponding channel map instead of the standard map with front+rear surrounds. Signed-off-by: Takashi Iwai -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 3.8-rc1 patch_cirrus 4.0 regression...
On 6 February 2013 00:16, Takashi Iwai wrote: > At Wed, 6 Feb 2013 00:10:30 +0800, > Daniel J Blueman wrote: >> >> Hi Takashi, >> >> The v3.8-rc kernels have regressed from v3.7 with the quad-speaker >> arrangement on my Macbook Pro 10,1 - only the higher-frequency >> speakers work despite the front and rear channels being exposed in the >> mixer. > > Interesting. So you have a machine with 4.0 built-in speaker instead > of 2.1? Then we need to add a device-specific flag for it. Currently > the driver assumes 2.1 system blindly because majority of machines > have that. > > FWIW, the codec parser code has been totally rewritten for 3.9, so any > patch to 3.8 won't be applied to 3.9 (and vice versa)... > > Could you give alsa-info.sh output on 3.8 kernel? Then I'll try to > cook it for 3.9 (and maybe backport to 3.8). Here's the output from the current alsa-info.sh on 3.8-rc6 with the two cited patches reverted [1]; let me know if you'd like 3.8-rc6 pure. The sound is pretty impressive for a laptop when the low-frequecy speakers are enabled. Thanks, Takashi! Daniel --- [1] upload=true&script=true&cardinfo= !! !!ALSA Information Script v 0.4.61 !! !!Script ran on: Tue Feb 5 16:22:01 UTC 2013 !!Linux Distribution !!-- Ubuntu 12.10 \n \l DISTRIB_ID=Ubuntu DISTRIB_DESCRIPTION="Ubuntu 12.10" NAME="Ubuntu" ID=ubuntu ID_LIKE=debian PRETTY_NAME="Ubuntu quantal (12.10)" !!DMI Information !!--- Manufacturer: Apple Inc. Product Name: MacBookPro10,1 Product Version: 1.0 Firmware Version: MBP101.88Z.00EE.B02.1208081132 !!Kernel Information !!-- Kernel release:3.8.0-rc6-ninja+ Operating System: GNU/Linux Architecture: x86_64 Processor: x86_64 SMP Enabled: Yes !!ALSA Version !! Driver version: k3.8.0-rc6-ninja+ Library version:1.0.25 Utilities version: 1.0.25 !!Loaded ALSA modules !!--- snd_hda_intel snd_hda_intel !!Sound Servers on this system !! Pulseaudio: Installed - Yes (/usr/bin/pulseaudio) Running - Yes !!Soundcards recognised by ALSA !!- 0 [PCH]: HDA-Intel - HDA Intel PCH HDA Intel PCH at 0xc1c1 irq 50 1 [NVidia ]: HDA-Intel - HDA NVidia HDA NVidia at 0xc108 irq 51 !!PCI Soundcards installed in the system !!-- 00:1b.0 Audio device: Intel Corporation 7 Series/C210 Series Chipset Family High Definition Audio Controller (rev 04) 01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff) !!Advanced information - PCI Vendor/Device/Subsystem ID's !!--- 00:1b.0 0403: 8086:1e20 (rev 04) Subsystem: 8086:7270 -- 01:00.1 0403: 10de:0e1b (rev ff) (prog-if ff) !!! Unknown header type 7f !!Modprobe options (Sound related) !! snd-atiixp-modem: index=-2 snd-intel8x0m: index=-2 snd-via82xx-modem: index=-2 snd-usb-audio: index=-2 snd-usb-caiaq: index=-2 snd-usb-ua101: index=-2 snd-usb-us122l: index=-2 snd-usb-usx2y: index=-2 snd-cmipci: mpu_port=0x330 fm_port=0x388 snd-pcsp: index=-2 snd-usb-audio: index=-2 snd_hda_intel: enable_msi=1 power_save=1 !!Loaded sound module options !!--- !!Module: snd_hda_intel align_buffer_size : -1 bdl_pos_adj : 1,32,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 enable : Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y enable_msi : 1 id : (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null) index : -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 jackpoll_ms : 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 model : (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null) patch : (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null) position_fix : -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 power_save : 0 power_save_control
Re: 3.8-rc1 patch_cirrus 4.0 regression...
On 6 February 2013 01:40, Takashi Iwai wrote: > At Tue, 05 Feb 2013 17:34:15 +0100, > Takashi Iwai wrote: >> >> At Wed, 6 Feb 2013 00:29:54 +0800, >> Daniel J Blueman wrote: >> > >> > On 6 February 2013 00:16, Takashi Iwai wrote: >> > > At Wed, 6 Feb 2013 00:10:30 +0800, >> > > Daniel J Blueman wrote: >> > >> >> > >> Hi Takashi, >> > >> >> > >> The v3.8-rc kernels have regressed from v3.7 with the quad-speaker >> > >> arrangement on my Macbook Pro 10,1 - only the higher-frequency >> > >> speakers work despite the front and rear channels being exposed in the >> > >> mixer. >> > > >> > > Interesting. So you have a machine with 4.0 built-in speaker instead >> > > of 2.1? Then we need to add a device-specific flag for it. Currently >> > > the driver assumes 2.1 system blindly because majority of machines >> > > have that. >> > > >> > > FWIW, the codec parser code has been totally rewritten for 3.9, so any >> > > patch to 3.8 won't be applied to 3.9 (and vice versa)... >> > > >> > > Could you give alsa-info.sh output on 3.8 kernel? Then I'll try to >> > > cook it for 3.9 (and maybe backport to 3.8). >> > >> > Here's the output from the current alsa-info.sh on 3.8-rc6 with the >> > two cited patches reverted [1]; let me know if you'd like 3.8-rc6 >> > pure. >> >> Thanks. >> >> > The sound is pretty impressive for a laptop when the low-frequecy >> > speakers are enabled. >> >> Which program are you using for testing the surrounds? >> I'm interested in it because the commit you reverted is basically >> providing only an additional information for the channel map, and it >> doesn't change anything else. It implies that some applications are >> really referring to the chmap info. > > Or, it might be that the mixer value is simply not set correct. > > To be sure, could you try again 3.8-rc6 without reversing patches, > adjust "Speaker" and "Bass Speaker" volumes properly, and retest? > If it still doesn't work, please take alsa-info.sh snapshot at this > state for comparing with the previous result. My apologies! We do now have "Bass Speaker" which affects both bass speakers. It was always being restored to level 0 and works when set up. There is a "Subwoofer" slider in addition to "Balance" and "Fade" in the GNOME mixer UI which is greyed out; presumably this is intended as the same mixer control? Dan -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[3.8-rc7] PCI hotplug wakeup oops
With 3.8-rc7, when unplugging the Thunderbolt ethernet adapter (bus 0a [1]) on a Macbook Pro 10,1, we see the PCIe port correctly released: pciehp :06:03.0:pcie24: Card not present on Slot(3) tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not clear MAC_TX_MODE= tg3 :0a:00.0 eth0: No firmware running tg3 :0a:00.0 eth0: Link is down [sched_delayed] sched: RT throttling activated pcieport :00:01.1: System wakeup enabled by ACPI pciehp :09:00.0:pcie24: unloading service driver pciehp pci_bus :0a: busn_res: [bus 0a] is released pci_bus :09: busn_res: [bus 09-0a] is released After some activity later (eg I can reproduce this by switching to a text console and back), often we'll see an oops: Unable to handle kernel paging request at 1070 pci_pme_list_scan+0x3d/0xe0 Call Trace: process_one_work+0x193 ? process_one_work+0x131 ? pci_pme_wakeup+0x60 worker_thread+0x15d (gdb) list *(pci_pme_list_scan+0x3d) 0x8123f6dd is in pci_pme_list_scan (drivers/pci/pci.c:1556). 1551/* 1552 * If bridge is in low power state, the 1553 * configuration space of subordinate devices 1554 * may be not accessible 1555 */ 1556if (bridge && bridge->current_state != PCI_D0) 1557continue; 1558pci_pme_wakeup(pme_dev->dev, NULL); 1559} else { 1560list_del(&pme_dev->list); Since a panic in vsnprintf happens after the oops (hence I can't catch it with EFI pstore), it is almost certainly significant heap corruption; this would explain why pme_dev became null (the load has been ordered ahead). I'll see what I can find out with memory poisoning and list debugging. Thanks, Daniel --- [1] $ lspci 00:00.0 Host bridge: Intel Corporation 3rd Gen Core processor DRAM Controller (rev 09) 00:01.0 PCI bridge: Intel Corporation Xeon E3-1200 v2/3rd Gen Core processor PCI Express Root Port (rev 09) 00:01.1 PCI bridge: Intel Corporation Xeon E3-1200 v2/3rd Gen Core processor PCI Express Root Port (rev 09) 00:01.2 PCI bridge: Intel Corporation Xeon E3-1200 v2/3rd Gen Core processor PCI Express Root Port (rev 09) 00:02.0 VGA compatible controller: Intel Corporation 3rd Gen Core processor Graphics Controller (rev 09) 00:14.0 USB controller: Intel Corporation 7 Series/C210 Series Chipset Family USB xHCI Host Controller (rev 04) 00:16.0 Communication controller: Intel Corporation 7 Series/C210 Series Chipset Family MEI Controller #1 (rev 04) 00:1a.0 USB controller: Intel Corporation 7 Series/C210 Series Chipset Family USB Enhanced Host Controller #2 (rev 04) 00:1b.0 Audio device: Intel Corporation 7 Series/C210 Series Chipset Family High Definition Audio Controller (rev 04) 00:1c.0 PCI bridge: Intel Corporation 7 Series/C210 Series Chipset Family PCI Express Root Port 1 (rev c4) 00:1c.1 PCI bridge: Intel Corporation 7 Series/C210 Series Chipset Family PCI Express Root Port 2 (rev c4) 00:1d.0 USB controller: Intel Corporation 7 Series/C210 Series Chipset Family USB Enhanced Host Controller #1 (rev 04) 00:1f.0 ISA bridge: Intel Corporation HM77 Express Chipset LPC Controller (rev 04) 00:1f.2 SATA controller: Intel Corporation 7 Series Chipset Family 6-port SATA Controller [AHCI mode] (rev 04) 00:1f.3 SMBus: Intel Corporation 7 Series/C210 Series Chipset Family SMBus Controller (rev 04) 01:00.0 VGA compatible controller: NVIDIA Corporation Device 0fd5 (rev ff) 01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff) 03:00.0 Ethernet controller: Broadcom Corporation Device 16a3 (rev 10) 03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765 Memory Card Reader (rev 10) 04:00.0 Network controller: Broadcom Corporation BCM4331 802.11a/b/g/n (rev 02) 05:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus Ridge] (rev 03) 06:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus Ridge] (rev 03) 06:03.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus Ridge] (rev 03) 06:04.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus Ridge] (rev 03) 06:05.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus Ridge] (rev 03) 06:06.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus Ridge] (rev 03) 07:00.0 System peripheral: Intel Corporation DSL3510 Thunderbolt Port [Cactus Ridge] (rev 03) 08:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Controller [Cactus Ridge] 09:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Controller [Cactus Ridge] 0a:00.0 Ethernet controller: Broadcom Corporation NetXtreme BCM57762 Gigabit Ethernet PCIe -- Daniel J Blueman -- To unsubscribe from this list: send the line "u
Re: [3.8-rc7] PCI hotplug wakeup oops
On 11 February 2013 21:03, Daniel J Blueman wrote: > With 3.8-rc7, when unplugging the Thunderbolt ethernet adapter (bus 0a > [1]) on a Macbook Pro 10,1, we see the PCIe port correctly released: > > pciehp :06:03.0:pcie24: Card not present on Slot(3) > tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not > clear MAC_TX_MODE= > tg3 :0a:00.0 eth0: No firmware running > tg3 :0a:00.0 eth0: Link is down > [sched_delayed] sched: RT throttling activated > pcieport :00:01.1: System wakeup enabled by ACPI > pciehp :09:00.0:pcie24: unloading service driver pciehp > pci_bus :0a: busn_res: [bus 0a] is released > pci_bus :09: busn_res: [bus 09-0a] is released > > After some activity later (eg I can reproduce this by switching to a > text console and back), often we'll see an oops: > > Unable to handle kernel paging request at 1070 > pci_pme_list_scan+0x3d/0xe0 > Call Trace: > process_one_work+0x193 > ? process_one_work+0x131 > ? pci_pme_wakeup+0x60 > worker_thread+0x15d > > (gdb) list *(pci_pme_list_scan+0x3d) > 0x8123f6dd is in pci_pme_list_scan (drivers/pci/pci.c:1556). > 1551/* > 1552 * If bridge is in low power state, > the > 1553 * configuration space of subordinate > devices > 1554 * may be not accessible > 1555 */ > 1556if (bridge && bridge->current_state > != PCI_D0) > 1557continue; > 1558pci_pme_wakeup(pme_dev->dev, NULL); > 1559} else { > 1560list_del(&pme_dev->list); > > Since a panic in vsnprintf happens after the oops (hence I can't catch > it with EFI pstore), it is almost certainly significant heap > corruption; this would explain why pme_dev became null (the load has > been ordered ahead). > > I'll see what I can find out with memory poisoning and list debugging. Enabling a bunch of related debugging, we see pme_dev is non-null and: BUG: Unable to handle NULL pointer dereference at pci_bus_read_config_word+0x6c PGD 26314c067 PUD 2633f9067 PMD 0 Oops: [#1] SMP pci_check_pme_status+0x4f pci_pme_wakeup+0x21 pci_pme_list_scan+0xd5 process_one_work+0x1ca ? process_one_work+0x160 ? pci_pme_wakeup+0x60 worker_thread+0x14e Anyway, it looks like the device being unplugged wasn't removed from pci_pme_list as pci_pme_active(dev, false) wasn't called. >From a quick review, I wasn't able to find the right place in the call-chain which I only see releases the child busses and PCIe port drivers. Anyone? Thanks, Daniel -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [3.8-rc7] PCI hotplug wakeup oops
On 12 February 2013 03:49, Rafael J. Wysocki wrote: > On Monday, February 11, 2013 08:27:49 PM Rafael J. Wysocki wrote: >> On Monday, February 11, 2013 12:01:37 PM Bjorn Helgaas wrote: >> > [+cc Rafael] >> > >> > On Mon, Feb 11, 2013 at 10:08 AM, Daniel J Blueman >> > wrote: >> > > On 11 February 2013 21:03, Daniel J Blueman wrote: >> > >> With 3.8-rc7, when unplugging the Thunderbolt ethernet adapter (bus 0a >> > >> [1]) on a Macbook Pro 10,1, we see the PCIe port correctly released: >> > >> >> > >> pciehp :06:03.0:pcie24: Card not present on Slot(3) >> > >> tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not >> > >> clear MAC_TX_MODE= >> > >> tg3 :0a:00.0 eth0: No firmware running >> > >> tg3 :0a:00.0 eth0: Link is down >> > >> [sched_delayed] sched: RT throttling activated >> > >> pcieport :00:01.1: System wakeup enabled by ACPI >> > >> pciehp :09:00.0:pcie24: unloading service driver pciehp >> > >> pci_bus :0a: busn_res: [bus 0a] is released >> > >> pci_bus :09: busn_res: [bus 09-0a] is released >> > >> >> > >> After some activity later (eg I can reproduce this by switching to a >> > >> text console and back), often we'll see an oops: >> > >> >> > >> Unable to handle kernel paging request at 1070 >> > >> pci_pme_list_scan+0x3d/0xe0 >> > >> Call Trace: >> > >> process_one_work+0x193 >> > >> ? process_one_work+0x131 >> > >> ? pci_pme_wakeup+0x60 >> > >> worker_thread+0x15d >> > >> >> > >> (gdb) list *(pci_pme_list_scan+0x3d) >> > >> 0x8123f6dd is in pci_pme_list_scan (drivers/pci/pci.c:1556). >> > >> 1551/* >> > >> 1552 * If bridge is in low power >> > >> state, the >> > >> 1553 * configuration space of >> > >> subordinate devices >> > >> 1554 * may be not accessible >> > >> 1555 */ >> > >> 1556if (bridge && >> > >> bridge->current_state != PCI_D0) >> > >> 1557continue; >> > >> 1558pci_pme_wakeup(pme_dev->dev, >> > >> NULL); >> > >> 1559} else { >> > >> 1560list_del(&pme_dev->list); >> > >> >> > >> Since a panic in vsnprintf happens after the oops (hence I can't catch >> > >> it with EFI pstore), it is almost certainly significant heap >> > >> corruption; this would explain why pme_dev became null (the load has >> > >> been ordered ahead). >> > >> >> > >> I'll see what I can find out with memory poisoning and list debugging. >> > > >> > > Enabling a bunch of related debugging, we see pme_dev is non-null and: >> > > >> > > BUG: Unable to handle NULL pointer dereference at >> > > pci_bus_read_config_word+0x6c >> > > PGD 26314c067 PUD 2633f9067 PMD 0 >> > > Oops: [#1] SMP >> > > pci_check_pme_status+0x4f >> > > pci_pme_wakeup+0x21 >> > > pci_pme_list_scan+0xd5 >> > > process_one_work+0x1ca >> > > ? process_one_work+0x160 >> > > ? pci_pme_wakeup+0x60 >> > > worker_thread+0x14e >> > > >> > > Anyway, it looks like the device being unplugged wasn't removed from >> > > pci_pme_list as pci_pme_active(dev, false) wasn't called. >> > > >> > > From a quick review, I wasn't able to find the right place in the >> > > call-chain which I only see releases the child busses and PCIe port >> > > drivers. Anyone? >> > >> > It looks like drivers *add* devices to pci_pme_list when they use >> > pci_enable_wake() or pci_wake_from_d3(). But many drivers never >> > remove their devices, and I don't see any place where the core does it >> > either. My guess is we need to remove it in pci_stop_dev() (we >> > already do pcie_aspm_exit_link_state() there) or somewhere similar
[PATCH] x86, amd, mce: Prevent potential cpu-online oops
On platforms where all Northbridges may not be visible (due to routing, eg on NumaConnect systems), prevent oopsing due to stale pointer access when offlining cores. Signed-off-by: Steffen Persvold Signed-off-by: Daniel J Blueman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 ++- 1 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1ac581f..53a58c2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -578,8 +578,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (shared_bank[bank]) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); + if (WARN_ON_ONCE(!nb)) + goto out; + /* threshold descriptor already initialized on this node? */ - if (nb && nb->bank4) { + if (nb->bank4) { /* yes, use it */ b = nb->bank4; err = kobject_add(b->kobj, &dev->kobj, name); @@ -613,10 +616,8 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) atomic_set(&b->cpus, 1); /* nb is already initialized, see above */ - if (nb) { - WARN_ON(nb->bank4); - nb->bank4 = b; - } + WARN_ON(nb->bank4); + nb->bank4 = b; } err = allocate_threshold_blocks(cpu, bank, 0, -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[3.8.0-rc6] xhci NULL deref
With a couple of communication devices attached [1] on 3.8.0-rc6, XHCI ran into a zero-page access [2]. A quick check of the code [3,4] suggests that struct xhci_virt_device dev was NULL. I'll see if I can have netconsole enabled for when it occurs again, to catch any related error information. Thanks, Daniel --- [1] $ lsusb Bus 001 Device 002: ID 8087:0024 Intel Corp. Integrated Rate Matching Hub Bus 002 Device 002: ID 8087:0024 Intel Corp. Integrated Rate Matching Hub Bus 003 Device 004: ID 0424:2412 Standard Microsystems Corp. Bus 003 Device 003: ID 04e8:6863 Samsung Electronics Co., Ltd Bus 001 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub Bus 002 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub Bus 003 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub Bus 004 Device 001: ID 1d6b:0003 Linux Foundation 3.0 root hub Bus 001 Device 003: ID 05ac:8510 Apple, Inc. Bus 002 Device 003: ID 0424:2512 Standard Microsystems Corp. USB 2.0 Hub Bus 003 Device 005: ID 0403:6010 Future Technology Devices International, Ltd FT2232C Dual USB-UART/FIFO IC Bus 002 Device 008: ID 05ac:8286 Apple, Inc. Bus 002 Device 004: ID 0a5c:4500 Broadcom Corp. BCM2046B1 USB 2.0 Hub (part of BCM2046 Bluetooth) Bus 002 Device 005: ID 05ac:0262 Apple, Inc. --- [2] Unable to handle NULL pointer dereference 000508 RAX: 00500 RBX: 0 RCX: 00508 RDX: 0 RSI: 6 RDI: 0 RBP: 88026f203dc8 xhci_stream_id_to_ring+0x40 handle_cmd_completion+0x16a ? rebalance_domains+0x96 xhci_irq+0x27b --- [3] (gdb) disassemble xhci_stream_id_to_ring+0x40 0x813384e0 <+0>: mov%esi,%esi 0x813384e2 <+2>: push %rbp 0x813384e3 <+3>: lea(%rsi,%rsi,2),%rax 0x813384e7 <+7>: mov%rsp,%rbp 0x813384ea <+10>:lea(%rsi,%rax,4),%rax 0x813384ee <+14>:shl$0x4,%rax 0x813384f2 <+18>:test %edx,%edx 0x813384f4 <+20>:lea0x20(%rdi,%rax,1),%rax 0x813384f9 <+25>:lea0x8(%rax),%rcx 0x813384fd <+29>:je 0x81338520 0x813384ff <+31>:mov0x8(%rcx),%rax 0x81338503 <+35>:test %rax,%rax 0x81338506 <+38>:je 0x81338530 0x81338508 <+40>:cmp0x8(%rax),%edx // deref --- [4] struct xhci_ring *xhci_stream_id_to_ring( struct xhci_virt_device *dev, unsigned int ep_index, unsigned int stream_id) { struct xhci_virt_ep *ep = &dev->eps[ep_index]; if (stream_id == 0) return ep->ring; // deref if (!ep->stream_info) return NULL; if (stream_id > ep->stream_info->num_streams) return NULL; return ep->stream_info->stream_rings[stream_id]; } -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] Prevent USB hub remove oops
When initialisation of one or more USB hub ports fails, we can hit a null pointer dereference when dropping the hub. Analysis shows there's a false assumption about the ports being setup, so address this. hub 2-3:1.0: USB hub found hub 2-3:1.0: 7 ports detected hub 2-3:1.0: hub_hub_status failed (err = -11) hub 2-3:1.0: config failed, can't get hub status (err -11) BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] hub_quiesce+0x46/0xb0 PGD 0 Oops: [#1] SMP CPU 2 Pid: 3364, comm: khubd Not tainted 3.8.0-advanced+ #21 IBM IBM System X3755 M3 -[7164Z63]-/94Y6321 RIP: 0010:[] [] hub_quiesce+0x46/0xb0 RSP: 0018:88046c535978 EFLAGS: 00010246 RAX: 88046c4fc100 RBX: RCX: 000b RDX: RSI: 0001 RDI: RBP: 88046c6b8400 R08: 0002 R09: 129b R10: R11: 88046c53550e R12: 88046c098000 R13: 88046c0a8430 R14: 81af72e0 R15: 88046c6b8400 FS: 7fcbd36196e0() GS:88046fc8() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: CR3: 01e0c000 CR4: 07e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process khubd (pid: 3364, threadinfo 88046c534000, task 88046c58ad00) Stack: 88046c0a8400 88046c6b8400 88046c098088 816901a5 88046c098088 88046c0a8400 88046c098088 88046c098000 88046c4fc180 816904c1 8169a261 88046c0a8430 Call Trace: [] ? hub_disconnect+0x75/0x140 [] ? hub_probe+0x251/0x260 [] ? usb_match_one_id+0x31/0x70 [] ? usb_probe_interface+0x1a6/0x260 [] ? driver_probe_device+0x68/0x210 [] ? __driver_attach+0xa0/0xa0 [] ? bus_for_each_drv+0x3e/0x80 [] ? device_attach+0x98/0xb0 [] ? bus_probe_device+0x80/0xb0 [] ? device_add+0x5be/0x680 [] ? usb_string+0x11e/0x1e0 [] ? usb_set_configuration+0x4cd/0x7c0 [] ? sysfs_do_create_link+0xed/0x220 [] ? generic_probe+0x2f/0x90 [] ? driver_probe_device+0x68/0x210 [] ? __driver_attach+0xa0/0xa0 [] ? bus_for_each_drv+0x3e/0x80 [] ? device_attach+0x98/0xb0 [] ? bus_probe_device+0x80/0xb0 [] ? device_add+0x5be/0x680 [] ? mix_pool_bytes.constprop.19+0x3f/0x60 [] ? usb_new_device+0x158/0x210 [] ? hub_port_connect_change+0x570/0x9c0 [] ? hub_thread+0x26f/0x7c0 [] ? __wake_up_common+0x4c/0x80 [] ? abort_exclusive_wait+0xb0/0xb0 [] ? usb_reset_device+0x140/0x140 [] ? kthread+0xb3/0xc0 [] ? __kthread_parkme+0x80/0x80 [] ? ret_from_fork+0x7c/0xb0 [] ? __kthread_parkme+0x80/0x80 Code: e4 00 00 00 02 83 fb 02 74 38 41 8b 84 24 40 04 00 00 85 c0 7e 2c 31 db 0f 1f 44 00 00 48 8b 85 f8 01 00 00 48 63 d3 48 8b 3c d0 <48> 83 3f 00 74 05 e8 9f fe ff ff ff c3 41 39 9c 24 40 04 00 00 RIP [] hub_quiesce+0x46/0xb0 RSP CR2: (gdb) list *(hub_quiesce+0x46) 0x81690056 is in hub_quiesce (drivers/usb/core/hub.c:1266). 1261hub->quiescing = 1; 1262 1263if (type != HUB_SUSPEND) { 1264/* Disconnect all the children */ 1265for (i = 0; i < hdev->maxchild; ++i) { 1266if (hub->ports[i]->child) 1267usb_disconnect(&hub->ports[i]->child); 1268 } 1269 } Signed-off-by: Daniel J Blueman --- drivers/usb/core/hub.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index cbf7168..a7abc57 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1263,7 +1263,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) if (type != HUB_SUSPEND) { /* Disconnect all the children */ for (i = 0; i < hdev->maxchild; ++i) { - if (hub->ports[i]->child) + if (hub->ports[i] && hub->ports[i]->child) usb_disconnect(&hub->ports[i]->child); } } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges
Hi Boris, On 05/12/2012 00:01, Borislav Petkov wrote: On Tue, Dec 04, 2012 at 05:24:16PM +0800, Daniel J Blueman wrote: It works well on fam10h and fam15h boxes, with and without Numaconnect. Good, thanks for testing. I will send it upstream after the upcoming merge window closes since it is too late for this one now and I wouldn't want to rush it if it is not necessary to do so and it hasn't seen enough testing in linux-next and -tip trees. Which means that it will end up in 3.9; I hope that is OK with you guys. Alas your merges missed the v3.8 merge window, but it looks like your v3.9 pull request has dropped these patches [1]. Any chance you can get them in during this merge window? Many thanks, Daniel [1] https://groups.google.com/forum/?fromgroups=#!topic/linux.kernel/2DLVw1Rv8bQ -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges
On 19/02/2013 22:59, Borislav Petkov wrote: On Tue, Feb 19, 2013 at 10:40:26PM +0800, Daniel J Blueman wrote: Alas your merges missed the v3.8 merge window, but it looks like your v3.9 pull request has dropped these patches [1]. Any chance you can get them in during this merge window? They should go in anytime now. They're in tip:x86/platform and Ingo is sending pull requests to Linus as we speak. Since they touch x86 code I asked x86 guys to send them upstream instead of me. Superb; thanks for your help Boris and Ingo! Daniel -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
False-positive RCU stall warnings on large systems...
Hi Paul, On some of our larger servers with many hundreds of cores and when under high duress, we can see scheduler RCU stall warnings [1], so find we have to increase the hardcoded RCU_STALL_RAT_DELAY up from 2 and RCU_JIFFIES_TILL_FORCE_QS up from 3. Is there a more sustainable way to account for this to avoid it being hard-coded, such as making it and dependent timeouts a fraction of CONFIG_RCU_CPU_STALL_TIMEOUT? On the other hand, perhaps this is just caused by clock jitter (eg due to distance from a contended clock source)? So increasing these a bit may just be adequate in general... Many thanks, Daniel --- [1] [ 3939.010085] INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 1, t=29662 jiffies, g=3053, c=3052, q=598) [ 3939.020008] INFO: Stall ended before state dump start -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: False-positive RCU stall warnings on large systems...
On 20/02/2013 02:16, Paul E. McKenney wrote: On Wed, Feb 20, 2013 at 12:34:12AM +0800, Daniel J Blueman wrote: Hi Paul, On some of our larger servers with many hundreds of cores and when under high duress, we can see scheduler RCU stall warnings [1], so find we have to increase the hardcoded RCU_STALL_RAT_DELAY up from 2 and RCU_JIFFIES_TILL_FORCE_QS up from 3. Is there a more sustainable way to account for this to avoid it being hard-coded, such as making it and dependent timeouts a fraction of CONFIG_RCU_CPU_STALL_TIMEOUT? On the other hand, perhaps this is just caused by clock jitter (eg due to distance from a contended clock source)? So increasing these a bit may just be adequate in general... Hmmm... What version of the kernel are you running? The example below occurs with v3.8, but we see the same with previous kernels eg v3.5. Of course, when using the local TSC, you'd see no jitter relative to coherent transactions (eg memory writes), but when the HPET is used across a large system, coherent transactions to distant cores are just so much faster, as there's massive congestion to the shared HPET behind various HT and PCIe bridges. This could be where the jitter arises from, if I'm guessing jitter is the problem here. Thanks, Daniel --- [1] [ 3939.010085] INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 1, t=29662 jiffies, g=3053, c=3052, q=598) [ 3939.020008] INFO: Stall ended before state dump start -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] nouveau: Fix kernel log mangling
Add missing newline to prevent the following kernel log line getting appended to the currnet one. Signed-off-by: Daniel J Blueman --- drivers/gpu/drm/nouveau/nouveau_dp.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index 7e289d2..2a9294f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -61,7 +61,7 @@ auxch_init(struct drm_device *dev, int ch) ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50)); udelay(1); if (!timeout--) { - AUX_ERR("begin idle timeout 0x%08x", ctrl); + AUX_ERR("begin idle timeout 0x%08x\n", ctrl); return -EBUSY; } } while (ctrl & 0x0301); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Prevent AMD MCE oops on multi-server system
On 02/10/2012 02:01, Borislav Petkov wrote: On Tue, Oct 02, 2012 at 12:12:31AM +0800, Daniel J Blueman wrote: On 01/10/2012 18:06, Borislav Petkov wrote: On Mon, Oct 01, 2012 at 02:42:05PM +0800, Daniel J Blueman wrote: When booting on a federated multi-server system, the processor Northbridge lookup returns NULL; add guards to prevent this causing an oops. Interesting. What does lspci say on those systems? Thanks. As NumaConnect remote-server I/O is in a pre-release stage, we only expose I/O on the first (root) server, so the lspci on eg my three server, single-socket C32 development system is uninteresting [1]. Yeah, I was looking for the NB devices: 00:18.0 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor HyperTransport Configuration 00:18.1 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor Address Map 00:18.2 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor DRAM Controller 00:18.3 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor Miscellaneous Control 00:18.4 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor Link Control [ … ] We map MMCONFIG addresses in the global address map to the respective server, which is how we access the processor Northbridges in the bootloader before Linux loads, so they are accessible and get enumerated when we enable remote I/O with the ACPI SSDT we generate, however since the AMD APIC IDs (hence NB IDs) are only 8-bit, the present amd_get_nb_id will produce duplicate NB IDs at best (but in this case, as we disable I/O routing, there is no structure); later, we may propose to using eg bits 23:8 for the server ID. That's another discussion though. Ah yes, I remember now. We had this discussion already, AFAIR. So if you say you disable I/O routing, what actually doesn't work out as expected is the NB enumeration in amd_nb.c where pci_get_device simply fails? Because if you had duplicate APIC IDs, you'd atleast get some NB descriptor, even if not the correct one? With remote-I/O disabled, since only the first PCI domain has been enumerated, the array of Northbridge IDs has structures only for the root (first) server's northbridges, thus the lookup returns NULL for later ones. Yes, we see the duplicates with remote I/O enabled [1, 2], stemming from amd64_edac.h: static inline u8 get_node_id(struct pci_dev *pdev) { return PCI_SLOT(pdev->devfn) - 0x18; } How about a patch that would add the PCI domain eg in bits 8 and up? The minimal patch at least corrects the oops regression which didn't happen in earlier kernels. Right, I beefed it up a bit and added a stable tag, pls take a look and let me know if it is ok. I'll run it on a couple of machines but I don't expect any issues so I'll send it upstream soon. Looks good! Thanks Boris, Daniel --- [1] EDAC MC: Ver: 3.0.0 AMD64 EDAC driver v3.4.0 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 0). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 0MB 3: 0MB EDAC amd64: MC: 4: 2048MB 5: 2048MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 0MB 3: 0MB EDAC amd64: MC: 4: 2048MB 5: 2048MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x4 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS4: Unbuffered DDR3 RAM EDAC amd64: CS5: Unbuffered DDR3 RAM EDAC MC0: Giving out device to 'amd64_edac' 'F10h': DEV :00:18.2 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 0). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 0MB 3: 0MB EDAC amd64: MC: 4: 2048MB 5: 2048MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 0MB 3: 0MB EDAC amd64: MC: 4: 2048MB 5: 2048MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x4 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS4: Unbuffered DDR3 RAM EDAC amd64: CS5: Unbuffered DDR3 RAM EDAC MC: bug in low-level driver: attempt to assign duplicate mc_idx 0 in add_mc_to_global_list() EDAC amd64: Error probing instance: 0 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 0). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 0MB 3: 0MB EDAC amd64: MC: 4: 2048MB 5: 2048MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 0MB 3: 0MB EDAC amd64: MC: 4: 2048MB 5: 2048MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x4 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS4: Unbuffered DDR3 RAM EDAC amd64: CS5: Unbuffered DDR3 RAM EDAC MC: bug in low-level driver: attempt to assign duplicate mc_idx 0 in add_mc_to_global_list() EDAC amd64: Error probing instance: 0 EDAC PCI0: Giving out device to module 'amd64_edac' controller 'EDAC PCI
[PATCH] RFC: Fix AMD Northbridge-ID contiguity assumptions
The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs are contiguous, which no longer holds on federated systems with multiple HyperTransport fabrics with multiple PCI domains. Address this assumption by searching the Northbridge ID array, rather than directly indexing it, using the upper bits for the PCI domain. Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h | 17 +++-- arch/x86/kernel/amd_nb.c | 15 --- drivers/edac/amd64_edac.c | 18 +- drivers/edac/amd64_edac.h |4 ++-- 4 files changed, 34 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..016448c 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -47,6 +47,7 @@ struct threshold_bank { }; struct amd_northbridge { + u32 node; struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; @@ -76,15 +77,27 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline int node_to_amd_index(u32 node) { - return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; + int i; + + for (i = 0; i < amd_northbridges.num; i++) + if (amd_northbridges.nb[i].node == node) + return i; + + return 0; +} + +static inline struct amd_northbridge *node_to_amd_nb(u32 node) +{ + return &amd_northbridges.nb[node_to_amd_index(node)]; } #else #define amd_nb_num(x) 0 #define amd_nb_has_feature(x) false +#define node_to_amd_index(x) 0 #define node_to_amd_nb(x) NULL #endif diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index aadf335..011eca1 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -75,10 +75,9 @@ int amd_cache_northbridges(void) link = misc = NULL; for (i = 0; i != amd_nb_num(); i++) { - node_to_amd_nb(i)->misc = misc = - next_northbridge(misc, amd_nb_misc_ids); - node_to_amd_nb(i)->link = link = - next_northbridge(link, amd_nb_link_ids); + nb->misc = misc = next_northbridge(misc, amd_nb_misc_ids); + nb->link = link = next_northbridge(link, amd_nb_link_ids); + nb++; } /* some CPU families (e.g. family 0x11) do not support GART */ @@ -212,6 +211,7 @@ int amd_set_subcaches(int cpu, int mask) static int amd_cache_gart(void) { u16 i; + struct amd_northbridge *nb = amd_northbridges.nb; if (!amd_nb_has_feature(AMD_NB_GART)) return 0; @@ -222,9 +222,10 @@ static int amd_cache_gart(void) return -ENOMEM; } - for (i = 0; i != amd_nb_num(); i++) - pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, - &flush_words[i]); + for (i = 0; i != amd_nb_num(); i++) { + pci_read_config_dword(nb->misc, 0x9c, &flush_words[i]); + nb++; + } return 0; } diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5a297a2..9c35565 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2549,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u32 nid = get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2640,7 +2640,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u32 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2656,7 +2656,7 @@ static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, if (!s) goto err_out; - ecc_stngs[nid] = s; + ecc_stngs[node_to_amd_index(nid)] = s; if (!ecc_enabled(F3, nid)) { ret = -ENODEV; @@ -2680,7 +2680,7 @@ static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, err_enable: kfree(s); - ecc_stngs[nid] = NULL; + ecc_stngs[node_to_amd_index(nid)] = NULL; err_out: return ret; @@ -2690,9 +2690,9 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u32 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_am
[PATCH v2] Fix AMD Northbridge-ID contiguity assumptions
The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs are contiguous, which no longer holds on federated systems with multiple HyperTransport fabrics and multiple PCI domains. Address this assumption by searching the Northbridge ID array, rather than directly indexing it, using the upper bits for the PCI domain. v2: Fix Northbridge entry initialisation Tested on a single-socket system and 3-server federated system. Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h | 23 +-- arch/x86/kernel/amd_nb.c | 16 +--- drivers/edac/amd64_edac.c | 18 +- drivers/edac/amd64_edac.h |6 -- 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..0fd2f0c 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -47,6 +47,7 @@ struct threshold_bank { }; struct amd_northbridge { + u32 node; struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; @@ -76,15 +77,33 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline int node_to_amd_index(u32 node) { - return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; + int i; + + for (i = 0; i < amd_northbridges.num; i++) + if (amd_northbridges.nb[i].node == node) + return i; + + return 0; +} + +static inline struct amd_northbridge *node_to_amd_nb(u32 node) +{ + return &amd_northbridges.nb[node_to_amd_index(node)]; +} + +/* AMD sets the first MC device at device ID 0x18 */ +static inline u32 get_node_id(struct pci_dev *pdev) +{ + return (pci_domain_nr(pdev->bus) << 8) | (PCI_SLOT(pdev->devfn) - 0x18); } #else #define amd_nb_num(x) 0 #define amd_nb_has_feature(x) false +#define node_to_amd_index(x) 0 #define node_to_amd_nb(x) NULL #endif diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index aadf335..c29ce39 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -75,10 +75,10 @@ int amd_cache_northbridges(void) link = misc = NULL; for (i = 0; i != amd_nb_num(); i++) { - node_to_amd_nb(i)->misc = misc = - next_northbridge(misc, amd_nb_misc_ids); - node_to_amd_nb(i)->link = link = - next_northbridge(link, amd_nb_link_ids); + nb->misc = misc = next_northbridge(misc, amd_nb_misc_ids); + nb->node = get_node_id(misc); + nb->link = link = next_northbridge(link, amd_nb_link_ids); + nb++; } /* some CPU families (e.g. family 0x11) do not support GART */ @@ -212,6 +212,7 @@ int amd_set_subcaches(int cpu, int mask) static int amd_cache_gart(void) { u16 i; + struct amd_northbridge *nb = amd_northbridges.nb; if (!amd_nb_has_feature(AMD_NB_GART)) return 0; @@ -222,9 +223,10 @@ static int amd_cache_gart(void) return -ENOMEM; } - for (i = 0; i != amd_nb_num(); i++) - pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, - &flush_words[i]); + for (i = 0; i != amd_nb_num(); i++) { + pci_read_config_dword(nb->misc, 0x9c, &flush_words[i]); + nb++; + } return 0; } diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5a297a2..9c35565 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2549,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u32 nid = get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2640,7 +2640,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u32 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2656,7 +2656,7 @@ static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, if (!s) goto err_out; - ecc_stngs[nid] = s; + ecc_stngs[node_to_amd_index(nid)] = s; if (!ecc_enabled(F3, nid)) { ret = -ENODEV; @@ -2680,7 +2680,7 @@ static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, err_enable: kfree(s); - ecc_stngs
[PATCH] Fix Intel PIIX4 I2C driver build failure
Fix build failure in Intel PIIX4 I2C driver. Signed-off-by: Daniel J Blueman --- drivers/i2c/busses/i2c-piix4.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index ef511df..8bbd6ec 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3] Add support for AMD64 EDAC on multiple PCI domains
The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs are contiguous, which no longer holds on federated systems with multiple HyperTransport fabrics and multiple PCI domains, eg on Numascale's Numaconnect systems with NumaChip. Address this assumption by searching the Northbridge ID array, rather than directly indexing it, using the upper bits for the PCI domain. RFC->v2: Correct array initialisation v2->v3: Add Boris's neater linked list approach Todo: 1. fix kobject/sysfs oops (see http://quora.org/2012/16-server-boot.txt later) 2. reorder amd64_edac.c or add amd64_per_family_init/pci_get_related_function forward declarations, based on feedback Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h| 63 +++- arch/x86/include/asm/numachip/numachip.h | 22 ++ arch/x86/kernel/amd_gart_64.c|8 +- arch/x86/kernel/amd_nb.c | 85 - arch/x86/pci/numachip.c | 121 ++ drivers/char/agp/amd64-agp.c | 12 +-- drivers/edac/amd64_edac.c| 34 + drivers/edac/amd64_edac.h|6 -- 8 files changed, 283 insertions(+), 68 deletions(-) create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/pci/numachip.c diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..6a27226 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -4,6 +4,8 @@ #include #include +#define NUM_POSSIBLE_NBS 8 + struct amd_nb_bus_dev_range { u8 bus; u8 dev_base; @@ -51,12 +53,22 @@ struct amd_northbridge { struct pci_dev *link; struct amd_l3_cache l3_cache; struct threshold_bank *bank4; + u16 node; + struct list_head nbl; }; struct amd_northbridge_info { u16 num; u64 flags; - struct amd_northbridge *nb; + + /* +* The first 8 elems are for fast lookup of NB descriptors on single- +* system setups, i.e. "normal" boxes. The nb_list, OTOH, is list of +* additional NB descriptors which exist on confederate systems +* like using Numascale's Numaconnect/NumaChip. +*/ + struct amd_northbridge *nbs[NUM_POSSIBLE_NBS]; + struct list_head nb_list; }; extern struct amd_northbridge_info amd_northbridges; @@ -78,7 +90,54 @@ static inline bool amd_nb_has_feature(unsigned feature) static inline struct amd_northbridge *node_to_amd_nb(int node) { - return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; + struct amd_northbridge_info *nbi = &amd_northbridges; + struct amd_northbridge *nb; + int i; + + /* Quick search for first domain */ + if (node < NUM_POSSIBLE_NBS) { + if (node < nbi->num) + return nbi->nbs[node]; + else + return NULL; + } + + /* Search for NBs from later domains in array */ + for (i = 0; i < NUM_POSSIBLE_NBS; i++) + if (nbi->nbs[i]->node == node) + return nbi->nbs[i]; + + list_for_each_entry(nb, &nbi->nb_list, nbl) + if (node == nb->node) + return nb; + + return NULL; +} + +static inline struct amd_northbridge *index_to_amd_nb(int index) +{ + struct amd_northbridge_info *nbi = &amd_northbridges; + struct amd_northbridge *nb; + int count = NUM_POSSIBLE_NBS; + + if (index < NUM_POSSIBLE_NBS) { + if (index < nbi->num) + return nbi->nbs[index]; + else + return NULL; + } + + list_for_each_entry(nb, &nbi->nb_list, nbl) { + if (count++ == index) + return nb; + } + + return NULL; +} + +static inline u16 amd_get_node_id(struct pci_dev *pdev) +{ + return (pci_domain_nr(pdev->bus) << 3) | (PCI_SLOT(pdev->devfn) - 0x18); } #else diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e663112..4f56487 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -573,7 +573,7 @@ static void enable_gart_translations(void) return; for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; + struct pci_dev *dev = index_to_amd_nb(i)->misc; enable_gart_translation(dev, __pa(agp_gatt_table)); } @@ -610,7 +610,7 @@ static void gart_fixup_northbridges(void) pr_info("PCI-DMA: Restoring GART aperture settings\n"); for (i = 0; i < amd_nb_num(); i++) { - struct pci_dev *dev = node_to_amd_nb(i)->misc; +
[2.6.24-rc8] page allocation failure...
32 kB NFS_Unstable:0 kB Bounce: 0 kB CommitLimit:761392 kB Committed_AS: 541628 kB VmallocTotal: 34359738367 kB VmallocUsed:264820 kB VmallocChunk: 34359473499 kB --- /proc/zoneinfo Node 0, zone DMA pages free 1259 min 10 low 12 high 15 scanned 0 (a: 2 i: 16) spanned 4096 present 2559 nr_free_pages 1259 nr_inactive 66 nr_active7 nr_anon_pages 0 nr_mapped0 nr_file_pages 73 nr_dirty 0 nr_writeback 0 nr_slab_reclaimable 1296 nr_slab_unreclaimable 65 nr_page_table_pages 0 nr_unstable 0 nr_bounce0 nr_vmscan_write 787 protection: (0, 992, 992, 992) pagesets cpu: 0 pcp: 0 count: 0 high: 0 batch: 1 cpu: 0 pcp: 1 count: 0 high: 0 batch: 1 vm stats threshold: 4 cpu: 1 pcp: 0 count: 0 high: 0 batch: 1 cpu: 1 pcp: 1 count: 0 high: 0 batch: 1 vm stats threshold: 4 all_unreclaimable: 0 prev_priority: 12 start_pfn: 0 Node 0, zoneDMA32 pages free 12514 min 1002 low 1252 high 1503 scanned 0 (a: 0 i: 0) spanned 257504 present 253984 nr_free_pages 12514 nr_inactive 60499 nr_active115446 nr_anon_pages 93837 nr_mapped4550 nr_file_pages 82403 nr_dirty 7 nr_writeback 0 nr_slab_reclaimable 56383 nr_slab_unreclaimable 4290 nr_page_table_pages 1008 nr_unstable 0 nr_bounce0 nr_vmscan_write 7348 protection: (0, 0, 0, 0) pagesets cpu: 0 pcp: 0 count: 102 high: 186 batch: 31 cpu: 0 pcp: 1 count: 64 high: 62 batch: 15 vm stats threshold: 16 cpu: 1 pcp: 0 count: 102 high: 186 batch: 31 cpu: 1 pcp: 1 count: 65 high: 62 batch: 15 vm stats threshold: 16 all_unreclaimable: 0 prev_priority: 12 start_pfn: 4096 -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2] Add NumaChip remote PCI support
Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but preventing access to AMD Northbridges which shouldn't respond. v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/numachip/numachip.h | 20 + arch/x86/kernel/apic/apic_numachip.c |2 + arch/x86/pci/Makefile|1 + arch/x86/pci/numachip.c | 134 ++ 4 files changed, 157 insertions(+) create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/pci/numachip.c diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h new file mode 100644 index 000..d35e71a --- /dev/null +++ b/arch/x86/include/asm/numachip/numachip.h @@ -0,0 +1,20 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific header file + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + */ + +#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H +#define _ASM_X86_NUMACHIP_NUMACHIP_H + +extern int __init pci_numachip_init(void); + +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */ + diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a65829a..9c2aa89 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -179,6 +180,7 @@ static int __init numachip_system_init(void) return 0; x86_cpuinit.fixup_cpu_id = fixup_cpu_id; + x86_init.pci.arch_init = pci_numachip_init; map_csrs(); diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 3af5a1e..ee0af58 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11) += sta2x11-fixup.o obj-$(CONFIG_X86_VISWS)+= visws.o obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o +obj-$(CONFIG_X86_NUMACHIP) += numachip.o obj-$(CONFIG_X86_INTEL_MID)+= mrst.o diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c new file mode 100644 index 000..3773e05 --- /dev/null +++ b/arch/x86/pci/numachip.c @@ -0,0 +1,129 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific PCI code + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + * PCI accessor functions derived from mmconfig_64.c + * + */ + +#include +#include + +static u8 limit __read_mostly; + +static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +{ + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; +} + +static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { +err: *value = -1; + return -EINVAL; + } + + /* Ensure AMD Northbridges don't decode reads to other devices */ + if (unlikely(bus == 0 && devfn >= limit)) { + *value = -1; + return 0; + } + + rcu_read_lock(); + addr = pci_dev_base(seg, bus, devfn); + if (!addr) { + rcu_read_unlock(); + goto err; + } + + switch (len) { + case 1: + *value = mmio_config_readb(addr + reg); + break; + case 2: + *value = mmio_config_readw(addr + reg); + break; + case 4: + *value = mmio_config_readl(addr + reg); + break; + } + rcu_read_unlock(); + + return 0; +} + +static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) + return -EINVAL; + + /* Ensure AMD Northbridges don't decode writes to other devices */ + if (unlikely(bus == 0 && devfn >= limit)) + return 0; + + rcu_read_lock(); + addr = pci_dev_base(seg, bus,
[PATCH] Fix nouveau hang after switcheroo
After switcherooing to integrated and starting X, when X fails to start and causes a console switch, we get hit with a hanger (below). Fix by checking if we're already in D3. BUG: soft lockup - CPU#0 stuck for 22s! [Xorg:1703] [] nv04_timer_read+0x28/0x70 [nouveau] [] nouveau_timer_wait_eq+0x7c/0xe0 [nouveau] [] nvd0_sor_dpms+0xde/0x1a0 [nouveau] [] ? fb_set_var+0xe9/0x3a0 [] ? __pte_alloc+0xa9/0x160 [] ? nvd0_sor_dp_link_set+0x2c0/0x2c0 [nouveau] [] drm_helper_connector_dpms+0xbc/0x100 [drm_kms_helper] [] drm_fb_helper_dpms.isra.13+0xa5/0xf0 [drm_kms_helper] [] drm_fb_helper_blank+0x49/0x80 [drm_kms_helper] [] fb_blank+0x56/0xc0 [] do_fb_ioctl+0x59b/0x5f0 [] ? vma_interval_tree_insert+0x83/0x90 [] fb_ioctl+0x45/0x50 [] do_vfs_ioctl+0x8a/0x340 [] sys_ioctl+0x91/0xb0 Signed-off-by: Daniel J Blueman --- drivers/gpu/drm/nouveau/nvd0_display.c |4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvd0_display.c b/drivers/gpu/drm/nouveau/nvd0_display.c index c402fca..c3285bf 100644 --- a/drivers/gpu/drm/nouveau/nvd0_display.c +++ b/drivers/gpu/drm/nouveau/nvd0_display.c @@ -1364,6 +1364,10 @@ nvd0_sor_dpms(struct drm_encoder *encoder, int mode) int or = nv_encoder->or; u32 dpms_ctrl; + /* prevent hanging after hardware is in D3 */ + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return; + nv_encoder->last_dpms = mode; list_for_each_entry(partner, &dev->mode_config.encoder_list, head) { -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] HDA: Fix digital microphone on CS420x
Correctly enable the digital microphones with the right bits in the right coeffecient registers on Cirrus CS4206/7 codecs. It also prevents misconfiguring ADC1/2. This fixes the digital mic on the Macbook Pro 10,1/Retina. Based-on-patch-by: Alexander Stein Signed-off-by: Daniel J Blueman --- sound/pci/hda/patch_cirrus.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 61a7113..859a119 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -101,8 +101,8 @@ enum { #define CS420X_VENDOR_NID 0x11 #define CS_DIG_OUT1_PIN_NID0x10 #define CS_DIG_OUT2_PIN_NID0x15 -#define CS_DMIC1_PIN_NID 0x12 -#define CS_DMIC2_PIN_NID 0x0e +#define CS_DMIC1_PIN_NID 0x0e +#define CS_DMIC2_PIN_NID 0x12 /* coef indices */ #define IDX_SPDIF_STAT 0x @@ -1079,14 +1079,18 @@ static void init_input(struct hda_codec *codec) cs_automic(codec, NULL); coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */ + cs_vendor_coef_set(codec, IDX_ADC_CFG, coef); + + coef = cs_vendor_coef_get(codec, IDX_BEEP_CFG); if (is_active_pin(codec, CS_DMIC2_PIN_NID)) - coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */ + coef |= 1 << 4; /* DMIC2 2 chan on, GPIO1 off */ if (is_active_pin(codec, CS_DMIC1_PIN_NID)) - coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off + coef |= 1 << 3; /* DMIC1 2 chan on, GPIO0 off * No effect if SPDIF_OUT2 is * selected in IDX_SPDIF_CTL. */ - cs_vendor_coef_set(codec, IDX_ADC_CFG, coef); + + cs_vendor_coef_set(codec, IDX_BEEP_CFG, coef); } else { if (spec->mic_detect) cs_automic(codec, NULL); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] HDA: Mark CS260x immutable structures const
Mark structures that won't change const. Signed-off-by: Daniel J Blueman --- sound/pci/hda/patch_cirrus.c |5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 859a119..d5f3a26 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -1732,8 +1732,7 @@ static int cs421x_mux_enum_put(struct snd_kcontrol *kcontrol, } -static struct snd_kcontrol_new cs421x_capture_source = { - +static const struct snd_kcontrol_new cs421x_capture_source = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capture Source", .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, @@ -1950,7 +1949,7 @@ static int cs421x_suspend(struct hda_codec *codec) } #endif -static struct hda_codec_ops cs421x_patch_ops = { +static const struct hda_codec_ops cs421x_patch_ops = { .build_controls = cs421x_build_controls, .build_pcms = cs_build_pcms, .init = cs421x_init, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/3, v5] AMD64 EDAC: Add muli-domain support
Fix the handling of memory controller detection to index the array of detected Northbridges, allowing memory controllers over multiple PCI domains in federated systems eg using Numascale's NumaConnect/ NumaChip. v4: Generate linear Northbridge ID by indexing detected Northbridges v5: Reorder functions to prevent extra function declaration; merge 4th patch; simplify Fam15h code; add detail to warning Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h | 13 +++ drivers/edac/amd64_edac.c | 49 ++--- drivers/edac/amd64_edac.h |6 - 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..9f5532a 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } +static inline u16 amd_get_node_id(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i != amd_nb_num(); i++) + if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; + + WARN(1, "Unable to find AMD Northbridge identifier for %s\n", pci_name(pdev)); + return 0; +} + #else #define amd_nb_num(x) 0 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cc8e7c7..852f1cd 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -982,6 +982,24 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + dev = pci_get_device(vendor, device, dev); + while (dev) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + dev = pci_get_device(vendor, device, dev); + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1001,11 +1019,13 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) /* Factor in CC6 save area by reading dst node's limit reg */ if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; - u8 nid = dram_dst_node(pvt, range); + struct pci_dev *misc, *f1 = NULL; + struct amd64_family_type *fam_type; + u16 nid = dram_dst_node(pvt, range); u32 llim; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); + misc = node_to_amd_nb(nid)->misc; + f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc); if (WARN_ON(!f1)) return; @@ -1712,23 +1732,6 @@ static struct amd64_family_type amd64_family_types[] = { }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm @@ -2546,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u8 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2637,7 +2640,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u8 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
[PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers
As the AMD64 last-level-cache ID is 16-bits and federated systems eg using Numascale's NumaConnect/NumaChip can have more than 255 memory controllers, use 16-bits to store the ID. v2: Avoid change to intlv_en variable v3: Drop unneeded change to index Signed-off-by: Daniel J Blueman --- drivers/edac/amd64_edac.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 852f1cd..5dfe452 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -2299,7 +2300,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2337,7 +2338,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2389,7 +2390,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2428,7 +2429,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2549,7 +2550,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = amd_get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2640,7 +2641,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = amd_get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2690,7 +2691,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = amd_get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/3, v2] AMD64 EDAC: Cleanup type usage to be consistent
As the Northbridge IDs are at most 16-bits, use the same type consistently and cleanup some indexes to use smaller types. v2: Drop unneeded changes and changes Boris will cleanup later Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h|2 +- arch/x86/include/asm/processor.h |2 +- arch/x86/kernel/cpu/amd.c|4 ++-- drivers/edac/amd64_edac.c| 14 +++--- drivers/edac/amd64_edac.h|6 +++--- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 9f5532a..b0815a0 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline struct amd_northbridge *node_to_amd_nb(u16 node) { return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ad1fc85..eb3ba58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -extern int amd_get_nb_id(int cpu); +extern u16 amd_get_nb_id(int cpu); struct aperfmperf { u64 aperf, mperf; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2..52cab1f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } -int amd_get_nb_id(int cpu) +u16 amd_get_nb_id(int cpu) { - int id = 0; + u16 id = 0; #ifdef CONFIG_SMP id = per_cpu(cpu_llc_id, cpu); #endif diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5dfe452..a3e297a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * DRAM base/limit associated with node_id */ static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) + u8 nid) { u64 addr; @@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -1349,7 +1349,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1400,7 +1400,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, * checks if the csrow passed in is marked as SPARED, if so returns the new * spare row */ -static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) +static int f10_process_possible_spare(struct amd64_pvt *pvt, u16 dct, int csrow) { int tmp_cs; @@ -1425,7 +1425,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u16 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -2257,7 +2257,7 @@ static int init_csrows(struct mem_ctl_info *mci) } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) { int cpu; @@ -2267,7 +2267,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 90cae61..a2ea6a4 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -332,7 +332,7 @@ struct amd64_pvt { /* pci_device handles which we utilize */ struct pci_dev *F1, *F2, *F3; - unsigned mc_node_id;/* MC index of this MC node */ + u16 mc_node_id; /* MC index of this MC node */ int ext_model; /* extended model value of this node */
[PATCH] Add Etron XHCI quirk to avoid warning spam
When various USB3 devices with Etron XHCI controllers, we see a bunch of warnings: xhci_hcd :02:00.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk? Acknowledge the issue by adding the quirk. Signed-off-by: Daniel J Blueman --- drivers/usb/host/xhci-pci.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 18b231b..715ad11 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -95,10 +95,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->limit_active_eps = 64; xhci->quirks |= XHCI_SW_BW_CHECKING; } - if (pdev->vendor == PCI_VENDOR_ID_ETRON && - pdev->device == PCI_DEVICE_ID_ASROCK_P67) { - xhci->quirks |= XHCI_RESET_ON_RESUME; - xhci_dbg(xhci, "QUIRK: Resetting on resume\n"); + if (pdev->vendor == PCI_VENDOR_ID_ETRON) { + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + + if (pdev->device == PCI_DEVICE_ID_ASROCK_P67) { + xhci->quirks |= XHCI_RESET_ON_RESUME; + xhci_dbg(xhci, "QUIRK: Resetting on resume\n"); + } } if (pdev->vendor == PCI_VENDOR_ID_VIA) xhci->quirks |= XHCI_RESET_ON_RESUME; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Add Etron XHCI quirk to avoid warning spam
On 27 July 2012 14:14, Sarah Sharp wrote: > On Fri, Jul 27, 2012 at 12:03:44PM +0800, Daniel J Blueman wrote: >> When various USB3 devices with Etron XHCI controllers, we see a bunch of >> warnings: >> xhci_hcd :02:00.0: WARN Successful completion on short TX: needs >> XHCI_TRUST_TX_LENGTH quirk? >> >> Acknowledge the issue by adding the quirk. >> >> Signed-off-by: Daniel J Blueman [] > I already have a patch in my queue for this. However, it keys off the > PCI_DEVICE_ID_ASROCK_P67 PCI device ID. Do you have another Etron > device with a different device ID that needs this quirk? Yes, the subsystem ID is different [1] (but Zotac program it the same as the vendor and device IDs here), however what you say suggests the problem is general to this Etron XHCI controller (1b6f:7023), as we'd suspect anyway. Thus the more general patch I posted makes better sense perhaps? Thanks, Daniel --- [1] # lspci -vs 02:00.0 02:00.0 USB controller: Etron Technology, Inc. EJ168 USB 3.0 Host Controller (rev 01) (prog-if 30 [XHCI]) Subsystem: Etron Technology, Inc. EJ168 USB 3.0 Host Controller [] # lspci -vns 02:00.0 02:00.0 0c03: 1b6f:7023 (rev 01) (prog-if 30 [XHCI]) Subsystem: 1b6f:7023 -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Add Etron XHCI quirk to avoid warning spam
On 28 July 2012 01:10, Sarah Sharp wrote: > On Fri, Jul 27, 2012 at 02:40:56PM +0800, Daniel J Blueman wrote: >> On 27 July 2012 14:14, Sarah Sharp wrote: >> > On Fri, Jul 27, 2012 at 12:03:44PM +0800, Daniel J Blueman wrote: >> >> When various USB3 devices with Etron XHCI controllers, we see a bunch of >> >> warnings: >> >> xhci_hcd :02:00.0: WARN Successful completion on short TX: needs >> >> XHCI_TRUST_TX_LENGTH quirk? >> >> >> >> Acknowledge the issue by adding the quirk. >> >> >> >> Signed-off-by: Daniel J Blueman >> [] >> > I already have a patch in my queue for this. However, it keys off the >> > PCI_DEVICE_ID_ASROCK_P67 PCI device ID. Do you have another Etron >> > device with a different device ID that needs this quirk? >> >> Yes, the subsystem ID is different [1] (but Zotac program it the same >> as the vendor and device IDs here), however what you say suggests the >> problem is general to this Etron XHCI controller (1b6f:7023), as we'd >> suspect anyway. >> >> Thus the more general patch I posted makes better sense perhaps? > > I'd really like to keep this quirk specific to the particular PCI vendor > and device ID. It's possible that their next chip version will have the > opposite issue (short TX completion code and bad untransferred length). > > Your patch turned it on for all Etron hosts, so I would rather keep my > version: > > http://git.kernel.org/?p=linux/kernel/git/sarah/xhci.git;a=commit;h=12751f75720391bb2b607acdb2537f02e313251e [] Ok, the patch is correct since PCI_DEVICE_ID_ASROCK_P67 evaluates to 0x7023, which is the Etron EJ168 device ID. Board-specific IDs (as the definition name suggests) are used for the subsystem IDs, so this name is misleading (and misled me at least). Can you fix this up with a patch to change PCI_DEVICE_ID_ASROCK_P67 to PCI_DEVICE_ID_ETRON_EJ168, else I can cook and test a patch? Thanks, Daniel -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[3.6-rc7] switcheroo race with Intel HDA...
On my Macbook with a discrete Nvidia GPU, there is a race between selecting the integrated GPU and putting the discrete GPU into D3 [1], reliably causing a kernel oops [2]. Introducing a delay of ~1s between the calls prevents this. When the second 'OFF' write path executes, it looks like struct azx at card->private_data hasn't yet been allocated yet [3], so there is likely some locking missing. I'm happy to perform further testing and debug of course... Thanks, Daniel --- [1] echo IGD > /sys/kernel/debug/vgaswitcheroo/switch echo OFF > /sys/kernel/debug/vgaswitcheroo/switch --- [2] BUG: unable to handle kernel NULL pointer dereference at 0170 IP: [] azx_vs_set_state+0x26/0x178 [snd_hda_intel] PGD 259c26067 PUD 25a0fd067 PMD 0 Oops: [#1] SMP DEBUG_PAGEALLOC Modules linked in: snd_hda_codec_hdmi bnep rfcomm b43 joydev nfsd ssb nfs_acl auth_rpcgss binfmt_misc nfs lockd sunrpc uvcvideo bcm5974 videobuf2_core videobuf2_vmalloc videobuf2_memops coretemp kvm_intel snd_hda_codec_cirrus kvm applesmc input_polldev microcode bcma lpc_ich mfd_core mei snd_hda_intel(+) snd_hda_codec snd_hwdep snd_pcm snd_timer snd snd_page_alloc nls_iso8859_1 apple_gmux mac_hid apple_bl btrfs hid_apple sdhci_pci ghash_clmulni_intel tg3 sdhci i915 nouveau ttm drm_kms_helper hwmon mxm_wmi video CPU 2 Pid: 961, comm: sh Not tainted 3.6.0-rc7 #2 Apple Inc. MacBookPro10,1/Mac-C3EC7CD22292981F RIP: 0010:[] [] azx_vs_set_state+0x26/0x178 [snd_hda_intel] RSP: 0018:880264271e48 EFLAGS: 00010286 RAX: RBX: 88025a2f5280 RCX: RDX: 0006 RSI: RDI: 880265479098 RBP: 880264271e68 R08: R09: R10: R11: R12: 880265479098 R13: R14: 880264271f50 R15: FS: 7fa4fe183700() GS:88026f28() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 0170 CR3: 0002641a7000 CR4: 001407e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process sh (pid: 961, threadinfo 88026427, task 880264503a00) Stack: 88025a2f5280 880264271e98 880264271e88 812e83a7 8802622835c0 0004 880264271ef8 812e89ac 88020a46464f 880264503a00 Call Trace: [] set_audio_state+0x67/0x70 [] vga_switcheroo_debugfs_write+0xbc/0x380 [] vfs_write+0xa3/0x160 [] sys_write+0x45/0xa0 [] system_call_fastpath+0x1a/0x1f Code: 00 00 00 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 4c 8d a7 98 00 00 00 4c 89 e7 48 89 5d e8 4c 89 6d f8 41 89 f5 e8 2a 35 13 e1 <48> 8b 98 70 01 00 00 0f b6 83 55 02 00 00 a8 08 75 34 45 85 ed RIP [] azx_vs_set_state+0x26/0x178 [snd_hda_intel] RSP CR2: 0170 --- [3] (gdb) list *(azx_vs_set_state+0x26) 0x2936 is in azx_vs_set_state (sound/pci/hda/hda_intel.c:2505). 2500 2501static void azx_vs_set_state(struct pci_dev *pci, 2502 enum vga_switcheroo_state state) 2503{ 2504struct snd_card *card = pci_get_drvdata(pci); 2505struct azx *chip = card->private_data; 2506bool disabled; 2507 2508if (chip->init_failed) 2509 return; -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3] Add support for AMD64 EDAC on multiple PCI domains
On 25/10/2012 19:03, Borislav Petkov wrote: On Thu, Oct 25, 2012 at 04:32:52PM +0800, Daniel J Blueman wrote: The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs are contiguous, which no longer holds on federated systems with multiple HyperTransport fabrics and multiple PCI domains, eg on Numascale's Numaconnect systems with NumaChip. Address this assumption by searching the Northbridge ID array, rather than directly indexing it, using the upper bits for the PCI domain. RFC->v2: Correct array initialisation v2->v3: Add Boris's neater linked list approach Todo: 1. fix kobject/sysfs oops (see http://quora.org/2012/16-server-boot.txt later) 2. reorder amd64_edac.c or add amd64_per_family_init/pci_get_related_function forward declarations, based on feedback Signed-off-by: Daniel J Blueman This patch contains code from both of us and thus needs both our SOBs: Signed-off-by: Borislav Petkov I'll use "Based-on-patch-from: Borislav Petkov ", great. --- arch/x86/include/asm/amd_nb.h| 63 +++- arch/x86/include/asm/numachip/numachip.h | 22 ++ arch/x86/kernel/amd_gart_64.c|8 +- arch/x86/kernel/amd_nb.c | 85 - arch/x86/pci/numachip.c | 121 ++ drivers/char/agp/amd64-agp.c | 12 +-- drivers/edac/amd64_edac.c| 34 + drivers/edac/amd64_edac.h|6 -- 8 files changed, 283 insertions(+), 68 deletions(-) create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/pci/numachip.c diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..6a27226 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -4,6 +4,8 @@ #include #include +#define NUM_POSSIBLE_NBS 8 + struct amd_nb_bus_dev_range { u8 bus; u8 dev_base; @@ -51,12 +53,22 @@ struct amd_northbridge { struct pci_dev *link; struct amd_l3_cache l3_cache; struct threshold_bank *bank4; + u16 node; + struct list_head nbl; }; struct amd_northbridge_info { u16 num; u64 flags; - struct amd_northbridge *nb; + + /* +* The first 8 elems are for fast lookup of NB descriptors on single- +* system setups, i.e. "normal" boxes. The nb_list, OTOH, is list of +* additional NB descriptors which exist on confederate systems +* like using Numascale's Numaconnect/NumaChip. +*/ + struct amd_northbridge *nbs[NUM_POSSIBLE_NBS]; + struct list_head nb_list; }; extern struct amd_northbridge_info amd_northbridges; @@ -78,7 +90,54 @@ static inline bool amd_nb_has_feature(unsigned feature) static inline struct amd_northbridge *node_to_amd_nb(int node) { - return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; + struct amd_northbridge_info *nbi = &amd_northbridges; + struct amd_northbridge *nb; + int i; + + /* Quick search for first domain */ + if (node < NUM_POSSIBLE_NBS) { + if (node < nbi->num) + return nbi->nbs[node]; + else + return NULL; + } Why change that here from what I had before? nbi->nbs[node] will either return a valid descriptor or NULL because it is statically allocated in amd_northbridge_info. So why add a conditional where you clearly don't need it? True; fixed up. + /* Search for NBs from later domains in array */ + for (i = 0; i < NUM_POSSIBLE_NBS; i++) + if (nbi->nbs[i]->node == node) + return nbi->nbs[i]; And then this is not needed. Eg with two servers with two Northbridges per server, interconnected, Linux sees two PCI domains (bits 15:3) and the nbs array would have node IDs: [0x00] [0x01] [0x08] [0x09] Without that check, searching for node 0x08 would only hit the linked list, though this doesn't affect the fast-path (id < 0x8) of course. We can use the static array for only the first PCI domain by changing _alloc_nb_desc to use the list when nbi->node > NUM_POSSIBLE_NBS, rather than nbi->num; we'd then need to introduce a variable to struct amd_northbridge_info to keep track of how many static array entries are used, for a linear lookup in index_to_amd_nb. + + list_for_each_entry(nb, &nbi->nb_list, nbl) + if (node == nb->node) + return nb; And why change the list_for_each_entry_safe variant? It is not needed now but who knows what code changes where in the future. Changed also. + + return NULL; +} + +static inline struct amd_northbridge *index_to_amd_nb(int index) +{ + struct amd_northbridge_inf
Re: [PATCH v3] Add support for AMD64 EDAC on multiple PCI domains
On 29/10/2012 14:17, Daniel J Blueman wrote: On 25/10/2012 19:03, Borislav Petkov wrote: On Thu, Oct 25, 2012 at 04:32:52PM +0800, Daniel J Blueman wrote: The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs are contiguous, which no longer holds on federated systems with multiple HyperTransport fabrics and multiple PCI domains, eg on Numascale's Numaconnect systems with NumaChip. Address this assumption by searching the Northbridge ID array, rather than directly indexing it, using the upper bits for the PCI domain. RFC->v2: Correct array initialisation v2->v3: Add Boris's neater linked list approach Todo: 1. fix kobject/sysfs oops (see http://quora.org/2012/16-server-boot.txt later) 2. reorder amd64_edac.c or add amd64_per_family_init/pci_get_related_function forward declarations, based on feedback Signed-off-by: Daniel J Blueman This patch contains code from both of us and thus needs both our SOBs: Signed-off-by: Borislav Petkov I'll use "Based-on-patch-from: Borislav Petkov ", great. --- arch/x86/include/asm/amd_nb.h| 63 +++- arch/x86/include/asm/numachip/numachip.h | 22 ++ arch/x86/kernel/amd_gart_64.c|8 +- arch/x86/kernel/amd_nb.c | 85 - arch/x86/pci/numachip.c | 121 ++ drivers/char/agp/amd64-agp.c | 12 +-- drivers/edac/amd64_edac.c| 34 + drivers/edac/amd64_edac.h|6 -- 8 files changed, 283 insertions(+), 68 deletions(-) create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/pci/numachip.c diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..6a27226 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -4,6 +4,8 @@ #include #include +#define NUM_POSSIBLE_NBS8 + struct amd_nb_bus_dev_range { u8 bus; u8 dev_base; @@ -51,12 +53,22 @@ struct amd_northbridge { struct pci_dev *link; struct amd_l3_cache l3_cache; struct threshold_bank *bank4; +u16 node; +struct list_head nbl; }; struct amd_northbridge_info { u16 num; u64 flags; -struct amd_northbridge *nb; + +/* + * The first 8 elems are for fast lookup of NB descriptors on single- + * system setups, i.e. "normal" boxes. The nb_list, OTOH, is list of + * additional NB descriptors which exist on confederate systems + * like using Numascale's Numaconnect/NumaChip. + */ +struct amd_northbridge *nbs[NUM_POSSIBLE_NBS]; +struct list_head nb_list; }; extern struct amd_northbridge_info amd_northbridges; @@ -78,7 +90,54 @@ static inline bool amd_nb_has_feature(unsigned feature) static inline struct amd_northbridge *node_to_amd_nb(int node) { -return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; +struct amd_northbridge_info *nbi = &amd_northbridges; +struct amd_northbridge *nb; +int i; + +/* Quick search for first domain */ +if (node < NUM_POSSIBLE_NBS) { +if (node < nbi->num) +return nbi->nbs[node]; +else +return NULL; +} Why change that here from what I had before? nbi->nbs[node] will either return a valid descriptor or NULL because it is statically allocated in amd_northbridge_info. So why add a conditional where you clearly don't need it? True; fixed up. +/* Search for NBs from later domains in array */ +for (i = 0; i < NUM_POSSIBLE_NBS; i++) +if (nbi->nbs[i]->node == node) +return nbi->nbs[i]; And then this is not needed. Eg with two servers with two Northbridges per server, interconnected, Linux sees two PCI domains (bits 15:3) and the nbs array would have node IDs: [0x00] [0x01] [0x08] [0x09] Without that check, searching for node 0x08 would only hit the linked list, though this doesn't affect the fast-path (id < 0x8) of course. We can use the static array for only the first PCI domain by changing _alloc_nb_desc to use the list when nbi->node > NUM_POSSIBLE_NBS, rather than nbi->num; we'd then need to introduce a variable to struct amd_northbridge_info to keep track of how many static array entries are used, for a linear lookup in index_to_amd_nb. + +list_for_each_entry(nb, &nbi->nb_list, nbl) +if (node == nb->node) +return nb; And why change the list_for_each_entry_safe variant? It is not needed now but who knows what code changes where in the future. Changed also. + +return NULL; +} + +static inline struct amd_northbridge *index_to_amd_nb(int index) +{ +struct amd_northbridge_info *nbi = &amd_northbridges; +struct amd_northbridge *nb; +int count = NUM_POSSIBLE_NBS; + +if (index <
Re: [PATCH v3] Add support for AMD64 EDAC on multiple PCI domains
On 29/10/2012 18:32, Borislav Petkov wrote: + Andreas. Dude, look at this boot log below: http://quora.org/2012/16-server-boot-2.txt That's 192 F10h's! We were booting 384 a while back, but I'll let you know when reach 4096! On Mon, Oct 29, 2012 at 04:54:59PM +0800, Daniel J Blueman wrote: A number of other callers lookup the PCI device based on index 0..amd_nb_num(), but we can't easily allocate contiguous northbridge IDs >from the PCI device in the first place. OTOH we can simply this code by changing amd_get_node_id to generate a linear northbridge ID from the index of the matching entry in the northbridge array. I'll get a patch together to see if there are any snags. I suspected that after we have this nice approach, you guys would come with non-contiguous node numbers. Maan, can't you build your systems so that software people can have it easy at least for once??! It depends on the definition of node, of course. The only changes we're considering is compliance with the Intel x2apic spec with using the upper 16-bits of the APIC ID as the server ("cluster") ID, since there are optimisations in Linux for this. This really is a lot less intrusive [1] and boots well on top of 3.7-rc3 on one of our 16-server/192-core/512GB systems [2]. If you're happy with this simpler approach for now, I'll present this and a separate patch cleaning up the inconsistent use of unsigned and u8 node ID variables to u16? Sure, bring it on. Yes, I've prepared a patch series and it tests out well. diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..b88fc7a 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,6 +81,18 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } +static inline u8 get_node_id(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i != amd_nb_num(); i++) + if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; Looks ok, can you send the whole patch please? + BUG(); I'm not sure about this - maybe WARN()? Are we absolutely sure we unconditionally should panic after not finding an NB descriptor? It looks like the only way we could be looking up a non-existent NB descriptor is if the array or variable in hand was corrupted. Maybe better to panic immediately debugging to be elusive later. I've tweaked this to warn and return the first Northbridge ID to avoid further issues, but even that isn't ideal. Btw, this shouldn't happen on those CPUs: [ 39.279131] TSC synchronization [CPU#0 -> CPU#12]: [ 39.287223] Measured 22750019569 cycles TSC warp between CPUs, turning off TSC clock. [0.03] tsc: Marking TSC unstable due to check_tsc_sync_source failed I guess TSCs are not starting at the same moment on all boards. As these are physically separate servers (off-the-shelf servers in fact, a key benefit of NumaConnect), the TSC clocks diverge. Later, I'll be cooking up a patch series to keep them in sync, allowing fast TSC use. You definitely need ucode on those too: [ 113.392460] microcode: CPU0: patch_level=0x Good tip! Thanks, Daniel -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/4, v4] AMD64 EDAC: Add multi-domain support to AMD EDAC
Fix the handling of memory controller detection to index the array of detected Northbridges, allowing memory controllers over multiple PCI domains in federated systems eg using Numascale's NumaConnect/ NumaChip. v4: Generate linear Northbridge ID by indexing detected Northbridges Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h | 12 drivers/edac/amd64_edac.c | 18 ++ drivers/edac/amd64_edac.h |6 -- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..b88fc7a 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } +static inline u16 get_node_id(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i != amd_nb_num(); i++) + if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; + + WARN(1, "Unable to find AMD Northbridge identifier\n"); + return 0; +} + #else #define amd_nb_num(x) 0 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cc8e7c7..18d404a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -982,6 +982,9 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt); +static struct pci_dev *pci_get_related_function(unsigned int vendor, unsigned int device, struct pci_dev *related); + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1001,11 +1004,17 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) /* Factor in CC6 save area by reading dst node's limit reg */ if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; - u8 nid = dram_dst_node(pvt, range); + struct pci_dev *misc, *f1 = NULL; + struct amd64_family_type *fam_type; + u16 nid = dram_dst_node(pvt, range); u32 llim; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); + misc = node_to_amd_nb(nid)->misc; + fam_type = amd64_per_family_init(pvt); + if (WARN_ON(!f1)) + return; + + f1 = pci_get_related_function(misc->vendor, fam_type->f1_id, misc); if (WARN_ON(!f1)) return; @@ -1720,7 +1729,8 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, dev = pci_get_device(vendor, device, dev); while (dev) { - if ((dev->bus->number == related->bus->number) && + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) break; dev = pci_get_device(vendor, device, dev); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8d48047..90cae61 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -290,12 +290,6 @@ /* MSRs */ #define MSR_MCGCTL_NBE BIT(4) -/* AMD sets the first MC device at device ID 0x18. */ -static inline u8 get_node_id(struct pci_dev *pdev) -{ - return PCI_SLOT(pdev->devfn) - 0x18; -} - enum amd_families { K8_CPUS = 0, F10_CPUS, -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/4] AMD64 EDAC: Add support for >255 memory controllers
As the AMD64 last-level-cache ID is 16-bits and federated systems eg using Numascale's NumaConnect/NumaChip can have more than 255 memory controllers, use 16-bits to store the ID. Signed-off-by: Daniel J Blueman --- drivers/edac/amd64_edac.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 18d404a..9920dfd 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -942,7 +942,7 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid, intlv_en; if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -1499,7 +1499,7 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, u8 channel; bool high_range = false; - u8 node_id= dram_dst_node(pvt, range); + u16 node_id = dram_dst_node(pvt, range); u8 intlv_en = dram_intlv_en(pvt, range); u32 intlv_sel = dram_intlv_sel(pvt, range); @@ -2306,7 +2306,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2344,7 +2344,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2396,7 +2396,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2435,7 +2435,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2556,7 +2556,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2647,7 +2647,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2697,7 +2697,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/4] AMD64 EDAC: Cleanup type usage to be consistent
As the Northbridge IDs are at most 16-bits, use the same type consistently. Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h|2 +- arch/x86/include/asm/processor.h |2 +- arch/x86/kernel/cpu/amd.c|4 ++-- drivers/edac/amd64_edac.c| 26 ++ drivers/edac/amd64_edac.h|2 +- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b88fc7a..0cc1045 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline struct amd_northbridge *node_to_amd_nb(u16 node) { return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ad1fc85..eb3ba58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -extern int amd_get_nb_id(int cpu); +extern u16 amd_get_nb_id(int cpu); struct aperfmperf { u64 aperf, mperf; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2..52cab1f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } -int amd_get_nb_id(int cpu) +u16 amd_get_nb_id(int cpu) { - int id = 0; + u16 id = 0; #ifdef CONFIG_SMP id = per_cpu(cpu_llc_id, cpu); #endif diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9920dfd..12cd675 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * DRAM base/limit associated with node_id */ static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) + u16 nid) { u64 addr; @@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u16 node_id; u32 intlv_en, bits; /* @@ -613,7 +613,8 @@ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr) static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr) { struct amd64_pvt *pvt; - unsigned node_id, intlv_shift; + u16 node_id; + unsigned intlv_shift; u64 bits, dram_addr; u32 intlv_sel; @@ -1337,7 +1338,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u16 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1413,7 +1414,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u16 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -1491,7 +1492,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) /* For a given @dram_range, check if @sys_addr falls within it. */ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, - u64 sys_addr, int *nid, int *chan_sel) + u64 sys_addr, u16 *nid, int *chan_sel) { int cs_found = -EINVAL; u64 chan_addr; @@ -1572,10 +1573,10 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, } static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, - int *node, int *chan_sel) + u16 *node, int *chan_sel) { int cs_found = -EINVAL; - unsigned range; + u16 range; for (range = 0; range < DRAM_RANGES; range++) { @@ -1607,7 +1608,8 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, { struct amd64_pvt *pvt = mci->pvt_info; u32 page, offset; - int nid, csrow, chan = 0; + int csrow, chan = 0; + u16 nid; error_address_to_page_and_offset(sys_addr, &page, &offs
[PATCH 4/4] AMD64 EDAC: Use appropriate name for NB indexing
Use the same 'amd' prefix as related functions for clarity. Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h |2 +- drivers/edac/amd64_edac.c |6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 0cc1045..39b5ddd 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,7 +81,7 @@ static inline struct amd_northbridge *node_to_amd_nb(u16 node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } -static inline u16 get_node_id(struct pci_dev *pdev) +static inline u16 amd_get_node_id(struct pci_dev *pdev) { int i; diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 12cd675..59658b9 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2558,7 +2558,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u16 nid = get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2649,7 +2649,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u16 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2699,7 +2699,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u16 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/4] AMD64 EDAC: Add support for >255 memory controllers
On 31/10/2012 16:18, Torsten Kaiser wrote: On Wed, Oct 31, 2012 at 6:55 AM, Daniel J Blueman wrote: As the AMD64 last-level-cache ID is 16-bits and federated systems eg using Numascale's NumaConnect/NumaChip can have more than 255 memory controllers, use 16-bits to store the ID. Signed-off-by: Daniel J Blueman --- drivers/edac/amd64_edac.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 18d404a..9920dfd 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -942,7 +942,7 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid, intlv_en; Is the change of intlv_en to u16 intentional? I assume its not, because... It's unintentional. Elsewhere, intlv_en is declared as unsigned, so perhaps that should be cleaned up later too. I'll issue an updated patch. if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -1499,7 +1499,7 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, u8 channel; bool high_range = false; - u8 node_id= dram_dst_node(pvt, range); + u16 node_id = dram_dst_node(pvt, range); u8 intlv_en = dram_intlv_en(pvt, range); ... here you keep it at u8. u32 intlv_sel = dram_intlv_sel(pvt, range); @@ -2306,7 +2306,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2344,7 +2344,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2396,7 +2396,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2435,7 +2435,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2556,7 +2556,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2647,7 +2647,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2697,7 +2697,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/4, v2] AMD64 EDAC: Add support for >255 memory controllers
As the AMD64 last-level-cache ID is 16-bits and federated systems eg using Numascale's NumaConnect/NumaChip can have more than 255 memory controllers, use 16-bits to store the ID. v2: Avoid change to intlv_en variable Signed-off-by: Daniel J Blueman --- drivers/edac/amd64_edac.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 18d404a..28b2005 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -1499,7 +1500,7 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, u8 channel; bool high_range = false; - u8 node_id= dram_dst_node(pvt, range); + u16 node_id = dram_dst_node(pvt, range); u8 intlv_en = dram_intlv_en(pvt, range); u32 intlv_sel = dram_intlv_sel(pvt, range); @@ -2306,7 +2307,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2344,7 +2345,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2396,7 +2397,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2435,7 +2436,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2556,7 +2557,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2647,7 +2648,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2697,7 +2698,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Fix printing when no interrupt is allocated
Hi Len, On 19/10/2012 09:21, Joe Perches wrote: On Fri, 2012-10-19 at 08:33 +0800, Daniel J Blueman wrote: Previously a new line is implicitly added in the no GSI case: [7.185182] pci 0001:00:12.0: can't derive routing for PCI INT A [7.191352] pci 0001:00:12.0: PCI INT A: no GSI [7.195956] - using ISA IRQ 10 The code thus prints a blank line where no legacy IRQ is available: [1.650124] pci :00:14.0: can't derive routing for PCI INT A [1.650126] pci :00:14.0: PCI INT A: no GSI [1.650126] [1.650180] pci :00:14.0: can't derive routing for PCI INT A Fix this by making the newline explicit and removing the superfluous one. I think this is a better fix: drivers/acpi/pci_irq.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 0eefa12..9b98f9f 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -459,19 +459,20 @@ int acpi_pci_irq_enable(struct pci_dev *dev) */ if (gsi < 0) { u32 dev_gsi; - dev_warn(&dev->dev, "PCI INT %c: no GSI", pin_name(pin)); /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF) && (acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) { - printk(" - using ISA IRQ %d\n", dev->irq); + dev_warn(&dev->dev, +"PCI INT %c: no GSI - using ISA IRQ %d\n", +pin_name(pin), dev->irq); acpi_register_gsi(&dev->dev, dev_gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); - return 0; } else { - printk("\n"); - return 0; + dev_warn(&dev->dev, "PCI INT %c: no GSI\n", +pin_name(pin)); } + return 0; } rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); We're still seeing this in 3.7-rc3. Any preference to the approach here? Thanks, Daniel -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
BCM57765 card reader: irq nobody cared
With the Broadcom BCM57765 card reader [1] in my Macbook Pro 10,1 (Mid 2012), we see the sdhci-pci interrupt handler not claim the interrupt generated when the module initialises [2]. Beyond the MMC subsystem output, what other data may be useful in diagnosing this? Many thanks, Daniel --- [1] $ sudo lspci -s 03:00.1 -v 03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765 Memory Card Reader (rev 10) (prog-if 01) Subsystem: Broadcom Corporation Device 96bc Flags: bus master, fast devsel, latency 0, IRQ 17 Memory at c182 (64-bit, prefetchable) [size=64K] Capabilities: [48] Power Management version 3 Capabilities: [58] MSI: Enable- Count=1/1 Maskable- 64bit+ Capabilities: [ac] Express Endpoint, MSI 00 Capabilities: [100] Advanced Error Reporting Capabilities: [150] Power Budgeting Capabilities: [160] Virtual Channel Kernel driver in use: sdhci-pci --- [2] sdhci: Secure Digital Host Controller Interface driver sdhci: Copyright(c) Pierre Ossman sdhci-pci :03:00.1: SDHCI controller found [14e4:16bc] (rev 10) mmc0: no vqmmc regulator found mmc0: no vmmc regulator found mmc0: SDHCI controller on PCI [:03:00.1] using ADMA [...] irq 17: nobody cared (try booting with the "irqpoll" option) CPU: 0 PID: 0 Comm: swapper/0 Tainted: GW 3.10.0-031000-generic #201306301935 Hardware name: Apple Inc. MacBookPro10,1/Mac-C3EC7CD22292981F, BIOS MBP101.88Z.00EE.B02.1208081132 08/08/2012 8802635cd89c 88026f203e48 81703460 88026f203e78 810f0d6d 00029d3daf00 8802635cd800 0011 88026f203ea8 810f1195 0011 Call Trace: [] dump_stack+0x19/0x1b [] __report_bad_irq+0x3d/0xe0 [] note_interrupt+0x135/0x190 [] handle_irq_event_percpu+0xa9/0x210 [] ? sched_clock+0x9/0x10 [] handle_irq_event+0x4e/0x80 [] handle_fasteoi_irq+0x64/0x120 [] handle_irq+0x22/0x40 [] do_IRQ+0x5a/0xe0 [] common_interrupt+0x6d/0x6d [] ? rcu_eqs_enter_common.isra.48+0x43/0x100 [] ? cpuidle_enter_state+0x61/0xe0 [] ? cpuidle_enter_state+0x57/0xe0 [] cpuidle_idle_call+0xc0/0x220 [] arch_cpu_idle+0xe/0x30 [] cpu_idle_loop+0x7e/0x250 [] cpu_startup_entry+0x6b/0x70 [] rest_init+0x77/0x80 [] start_kernel+0x40c/0x419 [] ? do_early_param+0x87/0x87 [] ? early_idt_handlers+0x120/0x120 [] x86_64_start_reservations+0x2a/0x2c [] x86_64_start_kernel+0xf3/0x102 handlers: [] sdhci_irq [sdhci] Disabling IRQ #17 -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] thp: support split page table lock
On Saturday, 7 September 2013 02:10:02 UTC+8, Naoya Horiguchi wrote: Hi Alex, On Fri, Sep 06, 2013 at 11:04:23AM -0500, Alex Thorlton wrote: > On Thu, Sep 05, 2013 at 05:27:46PM -0400, Naoya Horiguchi wrote: > > Thp related code also uses per process mm->page_table_lock now. > > So making it fine-grained can provide better performance. > > > > This patch makes thp support split page table lock by using page->ptl > > of the pages storing "pmd_trans_huge" pmds. > > > > Some functions like pmd_trans_huge_lock() and page_check_address_pmd() > > are expected by their caller to pass back the pointer of ptl, so this > > patch adds to those functions new arguments for that. Rather than that, > > this patch gives only straightforward replacement. > > > > ChangeLog v3: > > - fixed argument of huge_pmd_lockptr() in copy_huge_pmd() > > - added missing declaration of ptl in do_huge_pmd_anonymous_page() > > I've applied these and tested them using the same tests program that I > used when I was working on the same issue, and I'm running into some > bugs. Here's a stack trace: Thank you for helping testing. This bug is new to me. With 3.11, this patch series and CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS, I consistently hit the same failure when exiting one of my stress-testers [1] when using eg 24 cores. Doesn't happen with 8 cores, so likely needs enough virtual memory to use multiple split locks. Otherwise, this is very promising work! [1] http://quora.org/2013/fft3d.c -- Daniel J Blueman Principal Software Engineer, Numascale -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: BCM57765: timeout waiting for hardware interrupt
On 3 September 2013 19:04, Chris Ball wrote: > On Tue, Sep 03 2013, Daniel J Blueman wrote: >> Please let me know if there's a better vector for reporting and >> looking into this issue, if you can. > > Do you know whether it's ever worked on this hardware? If so, could > you try bisecting to find the first bad commit? Yes; the card reader works when an ethernet cable is plugged into the first PCI function on the chip, the Broadcom NIC (see comment #3): https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1067222 Google also use this in one of their Chromebook models: https://groups.google.com/a/chromium.org/forum/#!msg/chromium-os-reviews/nwFj3KVQy_Y/FOckx1trSkUJ > If not, I suspect it's going to be very difficult to debug/fix > remotely, and we need to wait for someone who has the hardware and > wants to create the fix. (Or get the hardware into the hands of > someone who's willing to take a look.) Got hardware and willing to take a look; just was wanting some tips on where to look with your experience of SDHCI/MMC. Doing some further debugging, the card reader's parent device (the NIC) is in D3 [1] while the card reader is in D0. I'll add the missing device IDs to the tg3 driver and force it to bring the NIC into D0 and see what happens. Daniel --- [1] $ sudo lspci -s 3:0 -vv 03:00.0 Ethernet controller: Broadcom Corporation Device 16a3 (rev 10) Subsystem: Broadcom Corporation Device 16b4 Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- Capabilities: [160 v1] Virtual Channel Caps:LPEVC=0 RefClk=100ns PATEntryBits=1 Arb:Fixed- WRR32- WRR64- WRR128- Ctrl:ArbSelect=Fixed Status:InProgress- VC0:Caps:PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- Arb:Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- Ctrl:Enable+ ID=0 ArbSelect=Fixed TC/VC=ff Status:NegoPending- InProgress- 03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765 Memory Card Reader (rev 10) (prog-if 01) Subsystem: Broadcom Corporation Device 96bc Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- Capabilities: [160 v1] Virtual Channel Caps:LPEVC=0 RefClk=100ns PATEntryBits=1 Arb:Fixed- WRR32- WRR64- WRR128- Ctrl:ArbSelect=Fixed Status:InProgress- VC0:Caps:PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- Arb:Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- Ctrl:Enable+ ID=0 ArbSelect=Fixed TC/VC=ff Status:NegoPending- InProgress- Kernel driver in use: sdhci-pci -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [3.6-rc7] switcheroo race with Intel HDA...
On 8 October 2012 20:58, Takashi Iwai wrote: > At Tue, 25 Sep 2012 13:20:05 +0800, > Daniel J Blueman wrote: >> On my Macbook with a discrete Nvidia GPU, there is a race between >> selecting the integrated GPU and putting the discrete GPU into D3 [1], >> reliably causing a kernel oops [2]. >> >> Introducing a delay of ~1s between the calls prevents this. When the >> second 'OFF' write path executes, it looks like struct azx at >> card->private_data hasn't yet been allocated yet [3], so there is >> likely some locking missing. > > It's rather pci_get_drvdata() returning NULL (i.e. card is NULL, thus > card->private_data causes Oops). Could you check the patch like below > and see whether you get a kernel warning (but no Oops) or the problem > gets fixed by shifting the assignment of pci drvdata? [...] Good patching. Calling pci_set_drvdata later prevents the oops in HDA, though we see unexpected 0x0 responses in the response ring buffer [1], which we don't see when there's a >~1.5s delay between IGD and OFF. Thanks, Daniel --- [1] snd_hda_intel :00:1b.0: enabling device ( -> 0002) snd_hda_intel :00:1b.0: irq 55 for MSI/MSI-X vga_switcheroo: enabled input: HDA Intel PCH Headphone as /devices/pci:00/:00:1b.0/sound/card0/input11 snd_hda_intel :01:00.1: enabling device ( -> 0002) {echo IGD >/sys/kernel/debug/vgaswitcheroo/switch} {echo OFF >/sys/kernel/debug/vgaswitcheroo/switch} hda_intel: Disabling MSI hda-intel: :01:00.1: Handle VGA-switcheroo audio client hda-intel: Disabling :01:00.1 via VGA-switcheroo VGA switcheroo: switched nouveau off [drm] nouveau :01:00.0: Disabling display... [drm] nouveau :01:00.0: Disabling fbcon... [drm] nouveau :01:00.0: Unpinning framebuffer(s)... [drm] nouveau :01:00.0: Evicting buffers... [drm] nouveau :01:00.0: Idling channels... [drm] nouveau :01:00.0: Suspending GPU objects... [drm] nouveau :01:00.0: And we're gone! hda-intel: spurious response 0x0:0x0, last cmd=0x1f0004 {repeats 220 times} hda-intel: spurious response 0x0:0x0, last cmd=0x1f0004 HDMI: failed to get afg sub nodes -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [3.6-rc7] switcheroo race with Intel HDA...
On 9 October 2012 18:07, Takashi Iwai wrote: > At Tue, 09 Oct 2012 12:04:08 +0200, > Takashi Iwai wrote: >> >> At Tue, 9 Oct 2012 00:34:09 +0800, >> Daniel J Blueman wrote: >> > >> > On 8 October 2012 20:58, Takashi Iwai wrote: >> > > At Tue, 25 Sep 2012 13:20:05 +0800, >> > > Daniel J Blueman wrote: >> > >> On my Macbook with a discrete Nvidia GPU, there is a race between >> > >> selecting the integrated GPU and putting the discrete GPU into D3 [1], >> > >> reliably causing a kernel oops [2]. >> > >> >> > >> Introducing a delay of ~1s between the calls prevents this. When the >> > >> second 'OFF' write path executes, it looks like struct azx at >> > >> card->private_data hasn't yet been allocated yet [3], so there is >> > >> likely some locking missing. >> > > >> > > It's rather pci_get_drvdata() returning NULL (i.e. card is NULL, thus >> > > card->private_data causes Oops). Could you check the patch like below >> > > and see whether you get a kernel warning (but no Oops) or the problem >> > > gets fixed by shifting the assignment of pci drvdata? >> > [...] >> > >> > Good patching. Calling pci_set_drvdata later prevents the oops in HDA, >> > though we see unexpected 0x0 responses in the response ring buffer >> > [1], which we don't see when there's a >~1.5s delay between IGD and >> > OFF. >> >> If the previous patch fixed, it means that the switching occurred >> during the device was being probed. Maybe a better approach to >> register the VGA switcheroo after the proper initialization. >> >> The patch below is a revised one. Please give it a try. > > Also, it's not clear which card spews the spurious response. > Apply the patch below in addition. [...] hda-intel: :01:00.1: spurious response 0x0:0x0, last cmd=0x1f0004 $ lspci -s :1:0.1 01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff) It's the NVIDIA device which presumably hasn't completed it's transition to D3 at the time the OFF is executed. Thanks, Daniel -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [3.6-rc7] switcheroo race with Intel HDA...
On 9 October 2012 21:04, Takashi Iwai wrote: > At Tue, 9 Oct 2012 19:23:56 +0800, > Daniel J Blueman wrote: >> On 9 October 2012 18:07, Takashi Iwai wrote: >> > At Tue, 09 Oct 2012 12:04:08 +0200, >> > Takashi Iwai wrote: >> >> At Tue, 9 Oct 2012 00:34:09 +0800, >> >> Daniel J Blueman wrote: >> >> > On 8 October 2012 20:58, Takashi Iwai wrote: >> >> > > At Tue, 25 Sep 2012 13:20:05 +0800, >> >> > > Daniel J Blueman wrote: >> >> > >> On my Macbook with a discrete Nvidia GPU, there is a race between >> >> > >> selecting the integrated GPU and putting the discrete GPU into D3 >> >> > >> [1], >> >> > >> reliably causing a kernel oops [2]. >> >> > >> >> >> > >> Introducing a delay of ~1s between the calls prevents this. When the >> >> > >> second 'OFF' write path executes, it looks like struct azx at >> >> > >> card->private_data hasn't yet been allocated yet [3], so there is >> >> > >> likely some locking missing. >> >> > > >> >> > > It's rather pci_get_drvdata() returning NULL (i.e. card is NULL, thus >> >> > > card->private_data causes Oops). Could you check the patch like below >> >> > > and see whether you get a kernel warning (but no Oops) or the problem >> >> > > gets fixed by shifting the assignment of pci drvdata? >> >> > [...] >> >> > >> >> > Good patching. Calling pci_set_drvdata later prevents the oops in HDA, >> >> > though we see unexpected 0x0 responses in the response ring buffer >> >> > [1], which we don't see when there's a >~1.5s delay between IGD and >> >> > OFF. >> >> >> >> If the previous patch fixed, it means that the switching occurred >> >> during the device was being probed. Maybe a better approach to >> >> register the VGA switcheroo after the proper initialization. >> >> >> >> The patch below is a revised one. Please give it a try. >> > >> > Also, it's not clear which card spews the spurious response. >> > Apply the patch below in addition. >> [...] >> >> hda-intel: :01:00.1: spurious response 0x0:0x0, last cmd=0x1f0004 >> $ lspci -s :1:0.1 >> 01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff) >> >> It's the NVIDIA device which presumably hasn't completed it's >> transition to D3 at the time the OFF is executed. > > OK, then could you try the patch below on the top of previous two > patches? The first IGD switcheroo command fails to switch to the integrated GPU: # cat /sys/kernel/debug/vgaswitcheroo/switch 0:DIS:+:Pwr::01:00.0 1:IGD: :Pwr::00:02.0 2:DIS-Audio: :Pwr::01:00.1 # echo IGD >/sys/kernel/debug/vgaswitcheroo/switch vga_switcheroo: client 1 refused switch I also instrumented snd_hda_lock_devices, but none of the failure paths are being taken, which would leave inconsistent state, as the return value isn't checked. Thanks, Daniel -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: switcheroo registration vs switching race...
On 3 December 2012 19:17, Takashi Iwai wrote: > At Wed, 28 Nov 2012 09:45:39 +0100, > Takashi Iwai wrote: >> >> At Wed, 28 Nov 2012 11:45:07 +0800, >> Daniel J Blueman wrote: >> > >> > Hi Seth, Dave, Takashi, >> > >> > If I power down the unused discrete GPU before lightdm starts by >> > fiddling with the sysfs file [1] in the upstart script, I see a race >> > manifesting as the discrete GPU's HDA controller timing out to >> > commands [2]. >> > >> > Adding some debug, I see that the registered audio devices are put >> > into D3 before the GPU is, but it turns out that the discrete (and >> > internal) GPU's HDA controller gets registered a bit later, so the >> > list is empty. The symptom is since the HDA driver it's talking to >> > hardware which is now in D3. >> > >> > We could add a mutex to nouveau to allow us to wait for the DGPU HDA >> > controller, but perhaps this should be solved at a higher level in the >> > vgaswitcheroo code; what do you think? >> >> Maybe it's a side effect for the recent effort to fix another race in >> the probe. A part of them problem is that the registration is done at >> the very last of probing. >> >> Instead of delaying the registration, how about the patch below? > > Ping. If this really works, I'd like to queue it for 3.8 merge, at > least... Ping ack; I was trying to find time to understand another race that occurs with GPU probing after switching, but is separate from the situation before switching, here. In the context of writing the switch, it looks like struct azx isn't allocated by the time azx_vs_set_state accesses it [1,2]; racing with azx_codec_create? The full dmesg output is at: http://quora.org/2012/hda-switch-oops.txt Thanks, Daniel --- [1] BUG: unable to handle kernel NULL pointer dereference at 0170 IP: [] azx_vs_set_state+0x26/0x1a0 [snd_hda_intel] PGD 26323d067 PUD 264f58067 PMD 0 Oops: [#1] SMP Modules linked in: snd_hda_codec_hdmi snd_hda_codec_cirrus rfcomm bnep nls_iso8859_1 joydev hid_apple bcm5974 nouveau coretemp kvm_intel b43 kvm uvcvideo videobuf2_core videobuf2_vmalloc videobuf2_memops ghash_clmulni_intel smsc75xx usbnet mii ttm snd_hda_intel(+) snd_hda_codec snd_hwdep ssb i915 snd_pcm mxm_wmi snd_timer apple_gmux applesmc mei lpc_ich microcode hwmon mfd_core input_polldev bcma snd drm_kms_helper snd_page_alloc video apple_bl sdhci_pci sdhci mmc_core CPU 1 Pid: 967, comm: sh Not tainted 3.7.0-rc7-expert+ #8 Apple Inc. MacBookPro10,1/Mac-C3EC7CD22292981F RIP: 0010:[] [] azx_vs_set_state+0x26/0x1a0 [snd_hda_intel] RSP: 0018:88025198de48 EFLAGS: 00010286 RAX: RBX: 880251960a00 RCX: RDX: RSI: RDI: 880265b41098 RBP: 88025198de68 R08: 0003 R09: 1000 R10: 7fffe481b730 R11: 0246 R12: 880265b41098 R13: R14: 88025198df50 R15: FS: 7f4961480700() GS:88026f24() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 0170 CR3: 000263cd3000 CR4: 001407e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process sh (pid: 967, threadinfo 88025198c000, task 88025d635820) Stack: 88025d635820 880251960a00 88025198de98 88025198de88 812b8e77 880263ef1740 0004 88025198def8 812b947c 88020a46464f 81107982 Call Trace: [] set_audio_state+0x67/0x70 [] vga_switcheroo_debugfs_write+0xbc/0x380 [] ? __alloc_fd+0x42/0x110 [] ? __fd_install+0x29/0x60 [] vfs_write+0xa3/0x160 [] sys_write+0x4d/0xa0 [] ? do_page_fault+0x9/0x10 [] system_call_fastpath+0x1a/0x1f Code: 00 00 00 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 4c 8d a7 98 00 00 00 4c 89 e7 48 89 5d e8 4c 89 6d f8 41 89 f5 e8 fa a4 0d e1 <48> 8b 98 70 01 00 00 0f b6 83 dd 01 00 00 a8 10 75 34 45 85 ed RIP [] azx_vs_set_state+0x26/0x1a0 [snd_hda_intel] RSP CR2: 0170 --- [2] $ gdb ./sound/pci/hda/snd-hda-intel.ko (gdb) list *(azx_vs_set_state+0x26) 0x3036 is in azx_vs_set_state (sound/pci/hda/hda_intel.c:2628). 2623 2624static void azx_vs_set_state(struct pci_dev *pci, 2625 enum vga_switcheroo_state state) 2626{ 2627struct snd_card *card = pci_get_drvdata(pci); 2628struct azx *chip = card->private_data; 2629bool disabled; 2630 2631if (chip->init_failed) 2632return; -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: switcheroo registration vs switching race...
On 3 December 2012 22:40, Takashi Iwai wrote: > At Mon, 3 Dec 2012 22:25:52 +0800, > Daniel J Blueman wrote: >> >> On 3 December 2012 19:17, Takashi Iwai wrote: >> > At Wed, 28 Nov 2012 09:45:39 +0100, >> > Takashi Iwai wrote: >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800, >> >> Daniel J Blueman wrote: >> >> > >> >> > Hi Seth, Dave, Takashi, >> >> > >> >> > If I power down the unused discrete GPU before lightdm starts by >> >> > fiddling with the sysfs file [1] in the upstart script, I see a race >> >> > manifesting as the discrete GPU's HDA controller timing out to >> >> > commands [2]. >> >> > >> >> > Adding some debug, I see that the registered audio devices are put >> >> > into D3 before the GPU is, but it turns out that the discrete (and >> >> > internal) GPU's HDA controller gets registered a bit later, so the >> >> > list is empty. The symptom is since the HDA driver it's talking to >> >> > hardware which is now in D3. >> >> > >> >> > We could add a mutex to nouveau to allow us to wait for the DGPU HDA >> >> > controller, but perhaps this should be solved at a higher level in the >> >> > vgaswitcheroo code; what do you think? >> >> >> >> Maybe it's a side effect for the recent effort to fix another race in >> >> the probe. A part of them problem is that the registration is done at >> >> the very last of probing. >> >> >> >> Instead of delaying the registration, how about the patch below? >> > >> > Ping. If this really works, I'd like to queue it for 3.8 merge, at >> > least... >> >> Ping ack; I was trying to find time to understand another race that >> occurs with GPU probing after switching, but is separate from the >> situation before switching, here. >> >> In the context of writing the switch, it looks like struct azx isn't >> allocated by the time azx_vs_set_state accesses it [1,2]; racing with >> azx_codec_create? > > It was allocated, but it wasn't assigned properly in pci drvdata. > > Below is the revised patch. Just moved pci_set_drvdata() before > register_vga_switcheroo(). Could you retest with it? Superb; this addresses the oops. ~1 second after the DGPU is put into D3, I still often see "hda-intel: spurious response 0x0:0x0, last cmd=0x170500": http://quora.org/2012/hda-switch-spurious.txt Presumably this implies the read of the ring-buffer pointer returned 0x, so the HDA driver understands the pointer to have wrapped and processes the 191 unwritten entries? Daniel -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: switcheroo registration vs switching race...
On 4 December 2012 00:23, Takashi Iwai wrote: > At Mon, 3 Dec 2012 23:08:28 +0800, > Daniel J Blueman wrote: >> >> On 3 December 2012 22:40, Takashi Iwai wrote: >> > At Mon, 3 Dec 2012 22:25:52 +0800, >> > Daniel J Blueman wrote: >> >> >> >> On 3 December 2012 19:17, Takashi Iwai wrote: >> >> > At Wed, 28 Nov 2012 09:45:39 +0100, >> >> > Takashi Iwai wrote: >> >> >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800, >> >> >> Daniel J Blueman wrote: >> >> >> > >> >> >> > Hi Seth, Dave, Takashi, >> >> >> > >> >> >> > If I power down the unused discrete GPU before lightdm starts by >> >> >> > fiddling with the sysfs file [1] in the upstart script, I see a race >> >> >> > manifesting as the discrete GPU's HDA controller timing out to >> >> >> > commands [2]. >> >> >> > >> >> >> > Adding some debug, I see that the registered audio devices are put >> >> >> > into D3 before the GPU is, but it turns out that the discrete (and >> >> >> > internal) GPU's HDA controller gets registered a bit later, so the >> >> >> > list is empty. The symptom is since the HDA driver it's talking to >> >> >> > hardware which is now in D3. >> >> >> > >> >> >> > We could add a mutex to nouveau to allow us to wait for the DGPU HDA >> >> >> > controller, but perhaps this should be solved at a higher level in >> >> >> > the >> >> >> > vgaswitcheroo code; what do you think? >> >> >> >> >> >> Maybe it's a side effect for the recent effort to fix another race in >> >> >> the probe. A part of them problem is that the registration is done at >> >> >> the very last of probing. >> >> >> >> >> >> Instead of delaying the registration, how about the patch below? >> >> > >> >> > Ping. If this really works, I'd like to queue it for 3.8 merge, at >> >> > least... >> >> >> >> Ping ack; I was trying to find time to understand another race that >> >> occurs with GPU probing after switching, but is separate from the >> >> situation before switching, here. >> >> >> >> In the context of writing the switch, it looks like struct azx isn't >> >> allocated by the time azx_vs_set_state accesses it [1,2]; racing with >> >> azx_codec_create? >> > >> > It was allocated, but it wasn't assigned properly in pci drvdata. >> > >> > Below is the revised patch. Just moved pci_set_drvdata() before >> > register_vga_switcheroo(). Could you retest with it? >> >> Superb; this addresses the oops. > > OK, I'll queue it to sound tree for 3.8 kernel with Cc to stable. > >> ~1 second after the DGPU is put into D3, I still often see "hda-intel: >> spurious response 0x0:0x0, last cmd=0x170500": >> http://quora.org/2012/hda-switch-spurious.txt > > Hm, it's not clear who triggers these messages. I'll try to check the > code paths. > >> Presumably this implies the read of the ring-buffer pointer returned >> 0x, so the HDA driver understands the pointer to have wrapped >> and processes the 191 unwritten entries? > > Good point. Actually there is one bug that looks obviously wrong > (writing 32bit value to CORBWP). Maybe it has been working just > because writing CORBRP doesn't influence except for the reset bit. > > Reading CORBWP as a byte is OK, but this could be better in a word so > that we can check 0x as invalid. > > A test patch is below. Hopefully this improves the situation... I'll check this out tomorrow and also instrument the code to get a backtrace, since there may still be an underlying race with the previous patches: [8.203827] snd_hda_intel :00:1b.0: enabling device ( -> 0002) [8.203936] snd_hda_intel :00:1b.0: irq 51 for MSI/MSI-X [ 10.981297] VGA switcheroo: switched nouveau off [ 10.981383] nouveau [ DRM] suspending fbcon... [ 10.981388] nouveau [ DRM] suspending display... [ 10.981687] nouveau [ DRM] unpinning framebuffer(s)... [ 10.981825] nouveau [ DRM] evicting buffers... [ 10.992948] nouveau [ DRM] suspending client object trees... [ 11.310697] hda-intel: azx_get_response timeout, switching to polling mode: l
Re: [PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges
On 01/12/2012 01:17, Borislav Petkov wrote: On Fri, Nov 30, 2012 at 04:44:20PM +0800, Daniel J Blueman wrote: Use appropriate types for northbridge IDs and memory ranges. Mark immutable data const and keep within compilation unit on related structures. Tested on multi-socket server and multi-server, multi-socket NumaConnect setup. v7: Refactor patches grouping changes v8: Drop unneeded change; use const and static where appropriate Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h |2 +- drivers/edac/amd64_edac.c | 26 +- drivers/edac/amd64_edac.h |6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 417eb24..d2e703b 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline struct amd_northbridge *node_to_amd_nb(u16 node) I'm dropping this change from this patch because if we go with it, we'll have to change all callsites of node_to_amd_nb which would cause unnecessary churn. So, I've applied the final patchset and uploaded a branch here: git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git numascale Please give it a run on both configurations and let me know if something is still amiss. It works well on fam10h and fam15h boxes, with and without Numaconnect. Thanks, Daniel -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3] Add NumaChip remote PCI support
Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but preventing access to AMD Northbridges which shouldn't respond. v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes v3: Express dependency on MMCONFIG Signed-off-by: Daniel J Blueman --- arch/x86/Kconfig |2 + arch/x86/include/asm/numachip/numachip.h | 20 + arch/x86/kernel/apic/apic_numachip.c |2 + arch/x86/pci/Makefile|1 + arch/x86/pci/numachip.c | 129 ++ 5 files changed, 154 insertions(+) create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/pci/numachip.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff..50e8700 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -374,6 +374,7 @@ config X86_NUMACHIP depends on NUMA depends on SMP depends on X86_X2APIC + depends on PCI_MMCONFIG ---help--- Adds support for Numascale NumaChip large-SMP systems. Needed to enable more than ~168 cores. diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h new file mode 100644 index 000..fe7f60c --- /dev/null +++ b/arch/x86/include/asm/numachip/numachip.h @@ -0,0 +1,20 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific header file + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + */ + +#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H +#define _ASM_X86_NUMACHIP_NUMACHIP_H + +extern int __init pci_numachip_init(void); + +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */ + diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a65829a..9c2aa89 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -179,6 +180,7 @@ static int __init numachip_system_init(void) return 0; x86_cpuinit.fixup_cpu_id = fixup_cpu_id; + x86_init.pci.arch_init = pci_numachip_init; map_csrs(); diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 3af5a1e..ee0af58 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11) += sta2x11-fixup.o obj-$(CONFIG_X86_VISWS)+= visws.o obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o +obj-$(CONFIG_X86_NUMACHIP) += numachip.o obj-$(CONFIG_X86_INTEL_MID)+= mrst.o diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c new file mode 100644 index 000..7307d9d --- /dev/null +++ b/arch/x86/pci/numachip.c @@ -0,0 +1,129 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific PCI code + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + * PCI accessor functions derived from mmconfig_64.c + * + */ + +#include +#include + +static u8 limit __read_mostly; + +static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +{ + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; +} + +static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { +err: *value = -1; + return -EINVAL; + } + + /* Ensure AMD Northbridges don't decode reads to other devices */ + if (unlikely(bus == 0 && devfn >= limit)) { + *value = -1; + return 0; + } + + rcu_read_lock(); + addr = pci_dev_base(seg, bus, devfn); + if (!addr) { + rcu_read_unlock(); + goto err; + } + + switch (len) { + case 1: + *value = mmio_config_readb(addr + reg); + break; + case 2: + *value = mmio_config_readw(addr + reg); + break; + case 4: + *value = mmio_config_readl(addr + reg); + break; + } + rcu_read_unlock(); + + return 0; +} + +static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus, + unsigned
Re: switcheroo registration vs switching race...
On 4 December 2012 01:10, Takashi Iwai wrote: > At Tue, 4 Dec 2012 00:50:56 +0800, > Daniel J Blueman wrote: >> >> On 4 December 2012 00:23, Takashi Iwai wrote: >> > At Mon, 3 Dec 2012 23:08:28 +0800, >> > Daniel J Blueman wrote: >> >> >> >> On 3 December 2012 22:40, Takashi Iwai wrote: >> >> > At Mon, 3 Dec 2012 22:25:52 +0800, >> >> > Daniel J Blueman wrote: >> >> >> >> >> >> On 3 December 2012 19:17, Takashi Iwai wrote: >> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100, >> >> >> > Takashi Iwai wrote: >> >> >> >> >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800, >> >> >> >> Daniel J Blueman wrote: >> >> >> >> > >> >> >> >> > Hi Seth, Dave, Takashi, >> >> >> >> > >> >> >> >> > If I power down the unused discrete GPU before lightdm starts by >> >> >> >> > fiddling with the sysfs file [1] in the upstart script, I see a >> >> >> >> > race >> >> >> >> > manifesting as the discrete GPU's HDA controller timing out to >> >> >> >> > commands [2]. >> >> >> >> > >> >> >> >> > Adding some debug, I see that the registered audio devices are put >> >> >> >> > into D3 before the GPU is, but it turns out that the discrete (and >> >> >> >> > internal) GPU's HDA controller gets registered a bit later, so the >> >> >> >> > list is empty. The symptom is since the HDA driver it's talking to >> >> >> >> > hardware which is now in D3. >> >> >> >> > >> >> >> >> > We could add a mutex to nouveau to allow us to wait for the DGPU >> >> >> >> > HDA >> >> >> >> > controller, but perhaps this should be solved at a higher level >> >> >> >> > in the >> >> >> >> > vgaswitcheroo code; what do you think? >> >> >> >> >> >> >> >> Maybe it's a side effect for the recent effort to fix another race >> >> >> >> in >> >> >> >> the probe. A part of them problem is that the registration is done >> >> >> >> at >> >> >> >> the very last of probing. >> >> >> >> >> >> >> >> Instead of delaying the registration, how about the patch below? >> >> >> > >> >> >> > Ping. If this really works, I'd like to queue it for 3.8 merge, at >> >> >> > least... >> >> >> >> >> >> Ping ack; I was trying to find time to understand another race that >> >> >> occurs with GPU probing after switching, but is separate from the >> >> >> situation before switching, here. >> >> >> >> >> >> In the context of writing the switch, it looks like struct azx isn't >> >> >> allocated by the time azx_vs_set_state accesses it [1,2]; racing with >> >> >> azx_codec_create? >> >> > >> >> > It was allocated, but it wasn't assigned properly in pci drvdata. >> >> > >> >> > Below is the revised patch. Just moved pci_set_drvdata() before >> >> > register_vga_switcheroo(). Could you retest with it? >> >> >> >> Superb; this addresses the oops. >> > >> > OK, I'll queue it to sound tree for 3.8 kernel with Cc to stable. >> > >> >> ~1 second after the DGPU is put into D3, I still often see "hda-intel: >> >> spurious response 0x0:0x0, last cmd=0x170500": >> >> http://quora.org/2012/hda-switch-spurious.txt >> > >> > Hm, it's not clear who triggers these messages. I'll try to check the >> > code paths. >> > >> >> Presumably this implies the read of the ring-buffer pointer returned >> >> 0x, so the HDA driver understands the pointer to have wrapped >> >> and processes the 191 unwritten entries? >> > >> > Good point. Actually there is one bug that looks obviously wrong >> > (writing 32bit value to CORBWP). Maybe it has been working just >> > because writing CORBRP doesn't influence
[PATCH] HDA: Add PCI device prefix for clarity
When printing, use a prefix of the PCI domain, bus, device and function as in other drivers, to differentiate multiple devices. Important for reporting and debugging. Signed-off-by: Daniel J Blueman --- sound/pci/hda/hda_intel.c | 110 +++-- 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index f9d870e..cdfebbd 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -185,7 +185,7 @@ MODULE_DESCRIPTION("Intel HDA driver"); #ifdef CONFIG_SND_VERBOSE_PRINTK #define SFX/* nop */ #else -#define SFX"hda-intel: " +#define SFX"hda-intel %s: " #endif #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO) @@ -703,7 +703,7 @@ static int azx_alloc_cmd_io(struct azx *chip) snd_dma_pci_data(chip->pci), PAGE_SIZE, &chip->rb); if (err < 0) { - snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n"); + snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n", pci_name(chip->pci)); return err; } mark_pages_wc(chip, &chip->rb, true); @@ -836,7 +836,7 @@ static void azx_update_rirb(struct azx *chip) chip->rirb.cmds[addr]--; } else snd_printk(KERN_ERR SFX "spurious response %#x:%#x, " - "last cmd=%#08x\n", + "last cmd=%#08x\n", pci_name(chip->pci), res, res_ex, chip->last_cmd[addr]); } @@ -881,7 +881,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, if (!chip->polling_mode && chip->poll_count < 2) { snd_printdd(SFX "azx_get_response timeout, " "polling the codec once: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); do_poll = 1; chip->poll_count++; goto again; @@ -891,7 +891,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, if (!chip->polling_mode) { snd_printk(KERN_WARNING SFX "azx_get_response timeout, " "switching to polling mode: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); chip->polling_mode = 1; goto again; } @@ -899,7 +899,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, if (chip->msi) { snd_printk(KERN_WARNING SFX "No response from codec, " "disabling MSI: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); free_irq(chip->irq, chip); chip->irq = -1; pci_disable_msi(chip->pci); @@ -966,7 +966,7 @@ static int azx_single_wait_for_response(struct azx *chip, unsigned int addr) } if (printk_ratelimit()) snd_printd(SFX "get_response timeout: IRS=0x%x\n", - azx_readw(chip, IRS)); + pci_name(chip->pci), azx_readw(chip, IRS)); chip->rirb.res[addr] = -1; return -EIO; } @@ -994,7 +994,7 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val) } if (printk_ratelimit()) snd_printd(SFX "send_cmd timeout: IRS=0x%x, val=0x%x\n", - azx_readw(chip, IRS), val); + pci_name(chip->pci), azx_readw(chip, IRS), val); return -EIO; } @@ -1080,7 +1080,7 @@ static int azx_reset(struct azx *chip, int full_reset) __skip: /* check to see if controller is ready */ if (!azx_readb(chip, GCTL)) { - snd_printd(SFX "azx_reset: controller not ready!\n"); + snd_printd(SFX "azx_reset: controller not ready!\n", pci_name(chip->pci)); return -EBUSY; } @@ -1092,7 +1092,7 @@ static int azx_reset(struct azx *chip, int full_reset) /* detect codecs */ if (!chip->codec_mask) { chip->codec_mask = azx_readw(chip, STATESTS); - snd_printdd(SFX "codec_mask = 0x%x\n", chip->codec_mask); + snd_printdd(SFX "codec_mask = 0x%x\n", pci_name(chip->pci), chip->codec_mask); } re
Re: switcheroo registration vs switching race...
On 4 December 2012 21:55, Takashi Iwai wrote: > At Tue, 04 Dec 2012 14:23:05 +0100, > Takashi Iwai wrote: >> >> At Tue, 4 Dec 2012 20:58:55 +0800, >> Daniel J Blueman wrote: >> > >> > On 4 December 2012 01:10, Takashi Iwai wrote: >> > > At Tue, 4 Dec 2012 00:50:56 +0800, >> > > Daniel J Blueman wrote: >> > >> >> > >> On 4 December 2012 00:23, Takashi Iwai wrote: >> > >> > At Mon, 3 Dec 2012 23:08:28 +0800, >> > >> > Daniel J Blueman wrote: >> > >> >> >> > >> >> On 3 December 2012 22:40, Takashi Iwai wrote: >> > >> >> > At Mon, 3 Dec 2012 22:25:52 +0800, >> > >> >> > Daniel J Blueman wrote: >> > >> >> >> >> > >> >> >> On 3 December 2012 19:17, Takashi Iwai wrote: >> > >> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100, >> > >> >> >> > Takashi Iwai wrote: >> > >> >> >> >> >> > >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800, >> > >> >> >> >> Daniel J Blueman wrote: >> > >> >> >> >> > >> > >> >> >> >> > Hi Seth, Dave, Takashi, >> > >> >> >> >> > >> > >> >> >> >> > If I power down the unused discrete GPU before lightdm >> > >> >> >> >> > starts by >> > >> >> >> >> > fiddling with the sysfs file [1] in the upstart script, I >> > >> >> >> >> > see a race >> > >> >> >> >> > manifesting as the discrete GPU's HDA controller timing out >> > >> >> >> >> > to >> > >> >> >> >> > commands [2]. >> > >> >> >> >> > >> > >> >> >> >> > Adding some debug, I see that the registered audio devices >> > >> >> >> >> > are put >> > >> >> >> >> > into D3 before the GPU is, but it turns out that the >> > >> >> >> >> > discrete (and >> > >> >> >> >> > internal) GPU's HDA controller gets registered a bit later, >> > >> >> >> >> > so the >> > >> >> >> >> > list is empty. The symptom is since the HDA driver it's >> > >> >> >> >> > talking to >> > >> >> >> >> > hardware which is now in D3. >> > >> >> >> >> > >> > >> >> >> >> > We could add a mutex to nouveau to allow us to wait for the >> > >> >> >> >> > DGPU HDA >> > >> >> >> >> > controller, but perhaps this should be solved at a higher >> > >> >> >> >> > level in the >> > >> >> >> >> > vgaswitcheroo code; what do you think? >> > >> >> >> >> >> > >> >> >> >> Maybe it's a side effect for the recent effort to fix another >> > >> >> >> >> race in >> > >> >> >> >> the probe. A part of them problem is that the registration is >> > >> >> >> >> done at >> > >> >> >> >> the very last of probing. >> > >> >> >> >> >> > >> >> >> >> Instead of delaying the registration, how about the patch >> > >> >> >> >> below? >> > >> >> >> > >> > >> >> >> > Ping. If this really works, I'd like to queue it for 3.8 >> > >> >> >> > merge, at >> > >> >> >> > least... >> > >> >> >> >> > >> >> >> Ping ack; I was trying to find time to understand another race >> > >> >> >> that >> > >> >> >> occurs with GPU probing after switching, but is separate from the >> > >> >> >> situation before switching, here. >> > >> >> >> >> > >> >> >> In the context of writing the switch, it looks like struct azx >> > >> >> >
[PATCH v2] HDA: Add PCI device prefix for clarity
When printing, use a prefix of the PCI domain, bus, device and function as in other drivers, to differentiate multiple devices. Important for reporting and debugging. A future step is to tidy this up with dev_printk et al. v2: Move conversion specifier into call site, preventing build issues Signed-off-by: Daniel J Blueman --- sound/pci/hda/hda_intel.c | 134 +++-- 1 file changed, 69 insertions(+), 65 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index f9d870e..eb92ab4 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -185,7 +185,7 @@ MODULE_DESCRIPTION("Intel HDA driver"); #ifdef CONFIG_SND_VERBOSE_PRINTK #define SFX/* nop */ #else -#define SFX"hda-intel: " +#define SFX"hda-intel " #endif #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO) @@ -703,7 +703,7 @@ static int azx_alloc_cmd_io(struct azx *chip) snd_dma_pci_data(chip->pci), PAGE_SIZE, &chip->rb); if (err < 0) { - snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n"); + snd_printk(KERN_ERR SFX "%s: cannot allocate CORB/RIRB\n", pci_name(chip->pci)); return err; } mark_pages_wc(chip, &chip->rb, true); @@ -835,8 +835,8 @@ static void azx_update_rirb(struct azx *chip) smp_wmb(); chip->rirb.cmds[addr]--; } else - snd_printk(KERN_ERR SFX "spurious response %#x:%#x, " - "last cmd=%#08x\n", + snd_printk(KERN_ERR SFX "%s: spurious response %#x:%#x, " + "last cmd=%#08x\n", pci_name(chip->pci), res, res_ex, chip->last_cmd[addr]); } @@ -879,9 +879,9 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, } if (!chip->polling_mode && chip->poll_count < 2) { - snd_printdd(SFX "azx_get_response timeout, " + snd_printdd(SFX "%s: azx_get_response timeout, " "polling the codec once: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); do_poll = 1; chip->poll_count++; goto again; @@ -889,17 +889,17 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, if (!chip->polling_mode) { - snd_printk(KERN_WARNING SFX "azx_get_response timeout, " + snd_printk(KERN_WARNING SFX "%s: azx_get_response timeout, " "switching to polling mode: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); chip->polling_mode = 1; goto again; } if (chip->msi) { - snd_printk(KERN_WARNING SFX "No response from codec, " + snd_printk(KERN_WARNING SFX "%s: No response from codec, " "disabling MSI: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); free_irq(chip->irq, chip); chip->irq = -1; pci_disable_msi(chip->pci); @@ -965,8 +965,8 @@ static int azx_single_wait_for_response(struct azx *chip, unsigned int addr) udelay(1); } if (printk_ratelimit()) - snd_printd(SFX "get_response timeout: IRS=0x%x\n", - azx_readw(chip, IRS)); + snd_printd(SFX "%s: get_response timeout: IRS=0x%x\n", + pci_name(chip->pci), azx_readw(chip, IRS)); chip->rirb.res[addr] = -1; return -EIO; } @@ -993,8 +993,8 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val) udelay(1); } if (printk_ratelimit()) - snd_printd(SFX "send_cmd timeout: IRS=0x%x, val=0x%x\n", - azx_readw(chip, IRS), val); + snd_printd(SFX "%s: send_cmd timeout: IRS=0x%x, val=0x%x\n", + pci_name(chip->pci), azx_readw(chip, IRS), val); return -EIO; } @@ -1080,7 +1080,7 @@ static int azx_reset(struct azx *chip, int full_reset) __skip: /* check to see if controller is ready */ if (!azx_readb(chip, GCTL)) { -
Re: switcheroo registration vs switching race...
On 4 December 2012 23:03, Takashi Iwai wrote: > At Tue, 4 Dec 2012 22:46:47 +0800, > Daniel J Blueman wrote: >> >> On 4 December 2012 21:55, Takashi Iwai wrote: >> > At Tue, 04 Dec 2012 14:23:05 +0100, >> > Takashi Iwai wrote: >> >> >> >> At Tue, 4 Dec 2012 20:58:55 +0800, >> >> Daniel J Blueman wrote: >> >> > >> >> > On 4 December 2012 01:10, Takashi Iwai wrote: >> >> > > At Tue, 4 Dec 2012 00:50:56 +0800, >> >> > > Daniel J Blueman wrote: >> >> > >> >> >> > >> On 4 December 2012 00:23, Takashi Iwai wrote: >> >> > >> > At Mon, 3 Dec 2012 23:08:28 +0800, >> >> > >> > Daniel J Blueman wrote: >> >> > >> >> >> >> > >> >> On 3 December 2012 22:40, Takashi Iwai wrote: >> >> > >> >> > At Mon, 3 Dec 2012 22:25:52 +0800, >> >> > >> >> > Daniel J Blueman wrote: >> >> > >> >> >> >> >> > >> >> >> On 3 December 2012 19:17, Takashi Iwai wrote: >> >> > >> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100, >> >> > >> >> >> > Takashi Iwai wrote: >> >> > >> >> >> >> >> >> > >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800, >> >> > >> >> >> >> Daniel J Blueman wrote: >> >> > >> >> >> >> > >> >> > >> >> >> >> > Hi Seth, Dave, Takashi, >> >> > >> >> >> >> > >> >> > >> >> >> >> > If I power down the unused discrete GPU before lightdm >> >> > >> >> >> >> > starts by >> >> > >> >> >> >> > fiddling with the sysfs file [1] in the upstart script, I >> >> > >> >> >> >> > see a race >> >> > >> >> >> >> > manifesting as the discrete GPU's HDA controller timing >> >> > >> >> >> >> > out to >> >> > >> >> >> >> > commands [2]. >> >> > >> >> >> >> > >> >> > >> >> >> >> > Adding some debug, I see that the registered audio >> >> > >> >> >> >> > devices are put >> >> > >> >> >> >> > into D3 before the GPU is, but it turns out that the >> >> > >> >> >> >> > discrete (and >> >> > >> >> >> >> > internal) GPU's HDA controller gets registered a bit >> >> > >> >> >> >> > later, so the >> >> > >> >> >> >> > list is empty. The symptom is since the HDA driver it's >> >> > >> >> >> >> > talking to >> >> > >> >> >> >> > hardware which is now in D3. >> >> > >> >> >> >> > >> >> > >> >> >> >> > We could add a mutex to nouveau to allow us to wait for >> >> > >> >> >> >> > the DGPU HDA >> >> > >> >> >> >> > controller, but perhaps this should be solved at a higher >> >> > >> >> >> >> > level in the >> >> > >> >> >> >> > vgaswitcheroo code; what do you think? >> >> > >> >> >> >> >> >> > >> >> >> >> Maybe it's a side effect for the recent effort to fix >> >> > >> >> >> >> another race in >> >> > >> >> >> >> the probe. A part of them problem is that the registration >> >> > >> >> >> >> is done at >> >> > >> >> >> >> the very last of probing. >> >> > >> >> >> >> >> >> > >> >> >> >> Instead of delaying the registration, how about the patch >> >> > >> >> >> >> below? >> >> > >> >> >> > >> >> > >> >> >> > Ping. If this really works, I'd like to queue it f
Re: switcheroo registration vs switching race...
On 5 December 2012 00:04, Takashi Iwai wrote: > At Tue, 4 Dec 2012 23:54:39 +0800, > Daniel J Blueman wrote: >> >> On 4 December 2012 23:03, Takashi Iwai wrote: >> > At Tue, 4 Dec 2012 22:46:47 +0800, >> > Daniel J Blueman wrote: >> >> >> >> On 4 December 2012 21:55, Takashi Iwai wrote: >> >> > At Tue, 04 Dec 2012 14:23:05 +0100, >> >> > Takashi Iwai wrote: >> >> >> >> >> >> At Tue, 4 Dec 2012 20:58:55 +0800, >> >> >> Daniel J Blueman wrote: >> >> >> > >> >> >> > On 4 December 2012 01:10, Takashi Iwai wrote: >> >> >> > > At Tue, 4 Dec 2012 00:50:56 +0800, >> >> >> > > Daniel J Blueman wrote: >> >> >> > >> >> >> >> > >> On 4 December 2012 00:23, Takashi Iwai wrote: >> >> >> > >> > At Mon, 3 Dec 2012 23:08:28 +0800, >> >> >> > >> > Daniel J Blueman wrote: >> >> >> > >> >> >> >> >> > >> >> On 3 December 2012 22:40, Takashi Iwai wrote: >> >> >> > >> >> > At Mon, 3 Dec 2012 22:25:52 +0800, >> >> >> > >> >> > Daniel J Blueman wrote: >> >> >> > >> >> >> >> >> >> > >> >> >> On 3 December 2012 19:17, Takashi Iwai >> >> >> > >> >> >> wrote: >> >> >> > >> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100, >> >> >> > >> >> >> > Takashi Iwai wrote: >> >> >> > >> >> >> >> >> >> >> > >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800, >> >> >> > >> >> >> >> Daniel J Blueman wrote: >> >> >> > >> >> >> >> > >> >> >> > >> >> >> >> > Hi Seth, Dave, Takashi, >> >> >> > >> >> >> >> > >> >> >> > >> >> >> >> > If I power down the unused discrete GPU before lightdm >> >> >> > >> >> >> >> > starts by >> >> >> > >> >> >> >> > fiddling with the sysfs file [1] in the upstart >> >> >> > >> >> >> >> > script, I see a race >> >> >> > >> >> >> >> > manifesting as the discrete GPU's HDA controller >> >> >> > >> >> >> >> > timing out to >> >> >> > >> >> >> >> > commands [2]. >> >> >> > >> >> >> >> > >> >> >> > >> >> >> >> > Adding some debug, I see that the registered audio >> >> >> > >> >> >> >> > devices are put >> >> >> > >> >> >> >> > into D3 before the GPU is, but it turns out that the >> >> >> > >> >> >> >> > discrete (and >> >> >> > >> >> >> >> > internal) GPU's HDA controller gets registered a bit >> >> >> > >> >> >> >> > later, so the >> >> >> > >> >> >> >> > list is empty. The symptom is since the HDA driver >> >> >> > >> >> >> >> > it's talking to >> >> >> > >> >> >> >> > hardware which is now in D3. >> >> >> > >> >> >> >> > >> >> >> > >> >> >> >> > We could add a mutex to nouveau to allow us to wait >> >> >> > >> >> >> >> > for the DGPU HDA >> >> >> > >> >> >> >> > controller, but perhaps this should be solved at a >> >> >> > >> >> >> >> > higher level in the >> >> >> > >> >> >> >> > vgaswitcheroo code; what do you think? >> >> >> > >> >> >> >> >> >> >> > >> >> >> >> Maybe it's a side effect for the recent effort to fix >> >> >> > >> >> >>
[PATCH v3] HDA: Add PCI device prefix for clarity
When printing, use a prefix of the PCI domain, bus, device and function as in other drivers, to differentiate multiple devices. Important for reporting and debugging. A future step is to tidy this up with dev_printk et al. v2: Move conversion specifier into call site, preventing build issues v3: Refactor for Takashi's for-next branch Signed-off-by: Daniel J Blueman --- sound/pci/hda/hda_intel.c | 125 +++-- 1 file changed, 64 insertions(+), 61 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 22ecadc..eb48109 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -192,7 +192,7 @@ MODULE_DESCRIPTION("Intel HDA driver"); #ifdef CONFIG_SND_VERBOSE_PRINTK #define SFX/* nop */ #else -#define SFX"hda-intel: " +#define SFX"hda-intel " #endif #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO) @@ -717,7 +717,7 @@ static int azx_alloc_cmd_io(struct azx *chip) snd_dma_pci_data(chip->pci), PAGE_SIZE, &chip->rb); if (err < 0) { - snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n"); + snd_printk(KERN_ERR SFX "%s: cannot allocate CORB/RIRB\n", pci_name(chip->pci)); return err; } mark_pages_wc(chip, &chip->rb, true); @@ -894,9 +894,9 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, } if (!chip->polling_mode && chip->poll_count < 2) { - snd_printdd(SFX "azx_get_response timeout, " + snd_printdd(SFX "%s: azx_get_response timeout, " "polling the codec once: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); do_poll = 1; chip->poll_count++; goto again; @@ -904,17 +904,17 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, if (!chip->polling_mode) { - snd_printk(KERN_WARNING SFX "azx_get_response timeout, " + snd_printk(KERN_WARNING SFX "%s: azx_get_response timeout, " "switching to polling mode: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); chip->polling_mode = 1; goto again; } if (chip->msi) { - snd_printk(KERN_WARNING SFX "No response from codec, " + snd_printk(KERN_WARNING SFX "%s: No response from codec, " "disabling MSI: last cmd=0x%08x\n", - chip->last_cmd[addr]); + pci_name(chip->pci), chip->last_cmd[addr]); free_irq(chip->irq, chip); chip->irq = -1; pci_disable_msi(chip->pci); @@ -980,8 +980,8 @@ static int azx_single_wait_for_response(struct azx *chip, unsigned int addr) udelay(1); } if (printk_ratelimit()) - snd_printd(SFX "get_response timeout: IRS=0x%x\n", - azx_readw(chip, IRS)); + snd_printd(SFX "%s: get_response timeout: IRS=0x%x\n", + pci_name(chip->pci), azx_readw(chip, IRS)); chip->rirb.res[addr] = -1; return -EIO; } @@ -1008,8 +1008,8 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val) udelay(1); } if (printk_ratelimit()) - snd_printd(SFX "send_cmd timeout: IRS=0x%x, val=0x%x\n", - azx_readw(chip, IRS), val); + snd_printd(SFX "%s: send_cmd timeout: IRS=0x%x, val=0x%x\n", + pci_name(chip->pci), azx_readw(chip, IRS), val); return -EIO; } @@ -1095,7 +1095,7 @@ static int azx_reset(struct azx *chip, int full_reset) __skip: /* check to see if controller is ready */ if (!azx_readb(chip, GCTL)) { - snd_printd(SFX "azx_reset: controller not ready!\n"); + snd_printd(SFX "%s: azx_reset: controller not ready!\n", pci_name(chip->pci)); return -EBUSY; } @@ -1107,7 +1107,7 @@ static int azx_reset(struct azx *chip, int full_reset) /* detect codecs */ if (!chip->codec_mask) { chip->codec_mask = azx_readw(chip, STATESTS); - snd_printdd(SFX "codec_mask = 0x%x\n", chip->codec_mask); + snd_printdd(SFX "%s: codec_mask = 0x%x\n", pci_name(c
[PATCH] Fix printing when no interrupt is allocated
Previously a new line is implicitly added in the no GSI case: [7.185182] pci 0001:00:12.0: can't derive routing for PCI INT A [7.191352] pci 0001:00:12.0: PCI INT A: no GSI [7.195956] - using ISA IRQ 10 The code thus prints a blank line where no legacy IRQ is available: [1.650124] pci :00:14.0: can't derive routing for PCI INT A [1.650126] pci :00:14.0: PCI INT A: no GSI [1.650126] [1.650180] pci :00:14.0: can't derive routing for PCI INT A Fix this by making the newline explicit and removing the superfluous one. Signed-off-by: Daniel J Blueman --- drivers/acpi/pci_irq.c |8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 0eefa12..2c37996 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -459,7 +459,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) */ if (gsi < 0) { u32 dev_gsi; - dev_warn(&dev->dev, "PCI INT %c: no GSI", pin_name(pin)); + dev_warn(&dev->dev, "PCI INT %c: no GSI\n", pin_name(pin)); /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF) && (acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) { @@ -467,11 +467,9 @@ int acpi_pci_irq_enable(struct pci_dev *dev) acpi_register_gsi(&dev->dev, dev_gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); - return 0; - } else { - printk("\n"); - return 0; } + + return 0; } rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 RESEND] Add NumaChip remote PCI support
On 01/12/2012 00:45, Bjorn Helgaas wrote: On Thu, Nov 29, 2012 at 10:28 PM, Daniel J Blueman On 29/11/2012 07:08, Bjorn Helgaas wrote: On Wed, Nov 21, 2012 at 1:39 AM, Daniel J Blueman wrote: Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but preventing access to AMD Northbridges which shouldn't respond. v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/numachip/numachip.h | 20 + arch/x86/kernel/apic/apic_numachip.c |2 + arch/x86/pci/Makefile|1 + arch/x86/pci/numachip.c | 134 ++ 4 files changed, 157 insertions(+) create mode 100644 arch/x86/include/asm/numachip/numachip.h create mode 100644 arch/x86/pci/numachip.c diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h new file mode 100644 index 000..d35e71a --- /dev/null +++ b/arch/x86/include/asm/numachip/numachip.h @@ -0,0 +1,20 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific header file + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + */ + +#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H +#define _ASM_X86_NUMACHIP_NUMACHIP_H + +extern int __init pci_numachip_init(void); + +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */ + diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a65829a..9c2aa89 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -179,6 +180,7 @@ static int __init numachip_system_init(void) return 0; x86_cpuinit.fixup_cpu_id = fixup_cpu_id; + x86_init.pci.arch_init = pci_numachip_init; map_csrs(); diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 3af5a1e..ee0af58 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11) += sta2x11-fixup.o obj-$(CONFIG_X86_VISWS)+= visws.o obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o +obj-$(CONFIG_X86_NUMACHIP) += numachip.o It looks like this depends on CONFIG_PCI_MMCONFIG for pci_mmconfig_lookup(). Are there config constraints that force CONFIG_PCI_MMCONFIG=y when CONFIG_X86_NUMACHIP=y? I'll revise the patch with this constraint after we work out the best approach for below. obj-$(CONFIG_X86_INTEL_MID)+= mrst.o diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c new file mode 100644 index 000..3773e05 --- /dev/null +++ b/arch/x86/pci/numachip.c @@ -0,0 +1,129 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific PCI code + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + * PCI accessor functions derived from mmconfig_64.c + * + */ + +#include +#include + +static u8 limit __read_mostly; + +static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +{ + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; +} Most of this file is copied directly from mmconfig_64.c (as you mentioned above). I wonder if we could avoid the code duplication by making the pci_dev_base() implementation in mmconfig_64.c a weak definition. Then you could just supply a non-weak pci_dev_base() here that would override that default version. Your version would look something like: char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); if (cfg && cfg->virt && devfn < limit) return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); return NULL; } That would be different from what you have in this patch because reads & writes to devices above "limit" would return -EINVAL rather than 0 as you do here. Would that be a problem? That would work nicely (pointer lookup and inlining etc aside) if there was the runtime ability to override pci_dev_base only if the NumaChip signature was detected. We could expose pci_dev_base via struct x86_init_pci; the extra complexity and performance tradeoff may not be worth it for a single case perhaps? Oh, right, I forgot that you can't decide this at
[PATCH] nouveau: Fix crash after D3
In 3.7-rc4, when starting X with the integrated GPU and suspending the discrete GPU, after one or more 32-bit applications are used (eg Skype) and X is stopped, we hit a panic. Prevent this by testing if the fini function is valid. Full panic bootlog is at: http://quora.org/2012/nouveau/dmesg-crash.txt Xorg.log is at: http://quora.org/2012/nouveau/Xorg.0.log-crash.txt Kernel log after fix is at: http://quora.org/2012/nouveau/dmesg-fix.txt Signed-off-by: Daniel J Blueman --- drivers/gpu/drm/nouveau/core/core/object.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/core/object.c b/drivers/gpu/drm/nouveau/core/core/object.c index 0daab62..3da3525 100644 --- a/drivers/gpu/drm/nouveau/core/core/object.c +++ b/drivers/gpu/drm/nouveau/core/core/object.c @@ -354,12 +354,16 @@ static int nouveau_object_decf(struct nouveau_object *object) { int ret; + struct nouveau_ofuncs *pfuncs; nv_trace(object, "stopping...\n"); - ret = nv_ofuncs(object)->fini(object, false); - if (ret) - nv_warn(object, "failed fini, %d\n", ret); + pfuncs = nv_ofuncs(object); + if (pfuncs->fini) { + ret = nv_ofuncs(object)->fini(object, false); + if (ret) + nv_warn(object, "failed fini, %d\n", ret); + } if (object->engine) { mutex_lock(&nv_subdev(object->engine)->mutex); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH, RESEND] nouveau: Prevent kernel log mangling
On 3.7-rc4, add missing newline to to prevent the following kernel log line getting appended to the current one after switching the integrated GPU and suspending the discrete GPU. Signed-off-by: Daniel J Blueman --- drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c index fe1ebf1..dc27e79 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c +++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c @@ -50,7 +50,7 @@ auxch_init(struct nouveau_i2c *aux, int ch) ctrl = nv_rd32(aux, 0x00e4e4 + (ch * 0x50)); udelay(1); if (!timeout--) { - AUX_ERR("begin idle timeout 0x%08x", ctrl); + AUX_ERR("begin idle timeout 0x%08x\n", ctrl); return -EBUSY; } } while (ctrl & 0x0301); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
3.8-rc2: EFI framebuffer lock inversion...
obe_device+0x76/0x240 [] __driver_attach+0xa3/0xb0 [] bus_for_each_dev+0x4d/0x90 [] driver_attach+0x19/0x20 [] bus_add_driver+0x1a0/0x270 [] driver_register+0x72/0x170 [] platform_driver_register+0x41/0x50 [] platform_driver_probe+0x16/0xa0 [] efifb_init+0x273/0x292 [] do_one_initcall+0x11a/0x170 [] kernel_init+0x11c/0x290 [] ret_from_fork+0x7c/0xb0 -> #0 ((fb_notifier_list).rwsem){.+}: [] validate_chain.isra.33+0x1000/0x10d0 [] __lock_acquire+0x3a1/0xb60 [] lock_acquire+0x5a/0x70 [] down_read+0x47/0x5c [] __blocking_notifier_call_chain+0x51/0xc0 [] blocking_notifier_call_chain+0x11/0x20 [] fb_notifier_call_chain+0x16/0x20 [] fb_set_suspend+0x46/0x60 [] nouveau_fbcon_set_suspend+0x92/0xc0 [nouveau] [] nouveau_do_suspend+0x51/0x200 [nouveau] [] nouveau_pmops_suspend+0x2f/0x80 [nouveau] [] nouveau_switcheroo_set_state+0x5c/0xc0 [nouveau] [] vga_switchoff+0x17/0x40 [] vga_switcheroo_debugfs_write+0xca/0x380 [] vfs_write+0xa3/0x160 [] sys_write+0x4d/0xa0 [] system_call_fastpath+0x1a/0x1f other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 lock(console_lock); lock((fb_notifier_list).rwsem); lock(console_lock); lock((fb_notifier_list).rwsem); *** DEADLOCK *** 2 locks held by sh/1017: #0: (vgasr_mutex){+.+.+.}, at: [] vga_switcheroo_debugfs_write+0x57/0x380 #1: (console_lock){+.+.+.}, at: [] nouveau_fbcon_set_suspend+0x25/0xc0 [nouveau] stack backtrace: Pid: 1017, comm: sh Not tainted 3.8.0-rc2-expert #1 Call Trace: [] print_circular_bug+0x28e/0x29f [] validate_chain.isra.33+0x1000/0x10d0 [] __lock_acquire+0x3a1/0xb60 [] ? __lock_is_held+0x54/0x80 [] lock_acquire+0x5a/0x70 [] ? __blocking_notifier_call_chain+0x51/0xc0 [] down_read+0x47/0x5c [] ? __blocking_notifier_call_chain+0x51/0xc0 [] __blocking_notifier_call_chain+0x51/0xc0 [] blocking_notifier_call_chain+0x11/0x20 [] fb_notifier_call_chain+0x16/0x20 [] fb_set_suspend+0x46/0x60 [] ? console_lock+0x77/0x80 [] ? nouveau_fbcon_set_suspend+0x25/0xc0 [nouveau] [] nouveau_fbcon_set_suspend+0x92/0xc0 [nouveau] [] nouveau_do_suspend+0x51/0x200 [nouveau] [] nouveau_pmops_suspend+0x2f/0x80 [nouveau] [] nouveau_switcheroo_set_state+0x5c/0xc0 [nouveau] [] vga_switchoff+0x17/0x40 [] vga_switcheroo_debugfs_write+0xca/0x380 [] vfs_write+0xa3/0x160 [] sys_write+0x4d/0xa0 [] system_call_fastpath+0x1a/0x1f nouveau [ DRM] suspending display... nouveau [ DRM] unpinning framebuffer(s)... nouveau [ DRM] evicting buffers... nouveau [ DRM] suspending client object trees... tg3 :0a:00.0 eth0: Link is up at 1000 Mbps, full duplex tg3 :0a:00.0 eth0: Flow control is on for TX and on for RX nouveau E[ I2C][:01:00.0] AUXCH(3): begin idle timeout 0x nouveau E[ I2C][:01:00.0] AUXCH(2): begin idle timeout 0x nouveau E[ I2C][:01:00.0] AUXCH(1): begin idle timeout 0xffff -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 3.8-rc2: EFI framebuffer lock inversion...
On 3 January 2013 21:11, Alan Cox wrote: > On Thu, 3 Jan 2013 20:56:30 +0800 > Daniel J Blueman wrote: > >> On 3.8-rc2 with lockdep enabled and dual-GPU setup (Macbook Pro >> Retina), I see two releated lock inversion issues with the EFI >> framebuffer, leading to possible deadlock: when X takes over from the >> EFI framebuffer [1] and when nouveau releases the framebuffer when >> being vgaswitcherood [2]. >> >> Let me know if you'd like any testing or analysis when I can get the time. > > The fb layer locking was broken. I posted patches early December which > should have fixed the ones we know about. ('fb: Rework locking to fix > lock ordering on takeover'). Superb work, Alan! The only patch I could find [1] (mid Nov) looks like it needs another sites updating, since we now see an i915 vs efifb lock ordering issue [2]. I can get some time next week to take a look if it helps. Thanks, Daniel --- [1] https://patchwork.kernel.org/patch/1757061/ --- [2] [drm] Memory usable by graphics device = 2048M checking generic (b000 144) vs hw (b000 1000) fb: conflicting fb hw usage inteldrmfb vs EFI VGA - removing generic driver == [ INFO: possible circular locking dependency detected ] 3.8.0-rc2-expert+ #2 Not tainted --- modprobe/603 is trying to acquire lock: (console_lock){+.+.+.}, at: [] unbind_con_driver+0x3f/0x200 but task is already holding lock: ((fb_notifier_list).rwsem){.+}, at: [] __blocking_notifier_call_chain+0x51/0xc0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 ((fb_notifier_list).rwsem){.+}: [] __lock_acquire+0x3a1/0xb60 [] lock_acquire+0x5a/0x70 [] down_read+0x47/0x5c [] __blocking_notifier_call_chain+0x51/0xc0 [] blocking_notifier_call_chain+0x11/0x20 [] fb_notifier_call_chain+0x16/0x20 [] register_framebuffer+0x1c0/0x300 [] efifb_probe+0x40f/0x496 [] platform_drv_probe+0x3e/0x70 [] driver_probe_device+0x76/0x240 [] __driver_attach+0xa3/0xb0 [] bus_for_each_dev+0x4d/0x90 [] driver_attach+0x19/0x20 [] bus_add_driver+0x1a0/0x270 [] driver_register+0x72/0x170 [] platform_driver_register+0x41/0x50 [] platform_driver_probe+0x16/0xa0 [] efifb_init+0x273/0x292 [] do_one_initcall+0x11a/0x170 [] kernel_init+0x11c/0x290 [] ret_from_fork+0x7c/0xb0 -> #0 (console_lock){+.+.+.}: [] validate_chain.isra.33+0x1000/0x10d0 [] __lock_acquire+0x3a1/0xb60 [] lock_acquire+0x5a/0x70 [] console_lock+0x77/0x80 [] unbind_con_driver+0x3f/0x200 [] fbcon_event_notify+0x447/0x8b0 [] notifier_call_chain+0x55/0x110 [] __blocking_notifier_call_chain+0x67/0xc0 [] blocking_notifier_call_chain+0x11/0x20 [] fb_notifier_call_chain+0x16/0x20 [] do_unregister_framebuffer+0x5b/0x110 [] do_remove_conflicting_framebuffers+0x158/0x190 [] remove_conflicting_framebuffers+0x3a/0x60 [] i915_driver_load+0x7d4/0xe70 [i915] [] drm_get_pci_dev+0x17e/0x2b0 [] i915_pci_probe+0x36/0x90 [i915] [] local_pci_probe+0x46/0x80 [] pci_device_probe+0x101/0x110 [] driver_probe_device+0x76/0x240 [] __driver_attach+0xa3/0xb0 [] bus_for_each_dev+0x4d/0x90 [] driver_attach+0x19/0x20 [] bus_add_driver+0x1a0/0x270 [] driver_register+0x72/0x170 [] __pci_register_driver+0x5f/0x70 [] drm_pci_init+0x115/0x130 [] i915_init+0x66/0x68 [i915] [] do_one_initcall+0x11a/0x170 [] load_module+0xfd4/0x13c0 [] sys_init_module+0xb7/0xe0 [] system_call_fastpath+0x1a/0x1f other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 lock((fb_notifier_list).rwsem); lock(console_lock); lock((fb_notifier_list).rwsem); lock(console_lock); *** DEADLOCK *** 6 locks held by modprobe/603: #0: (&__lockdep_no_validate__){..}, at: [] __driver_attach+0x53/0xb0 #1: (&__lockdep_no_validate__){..}, at: [] __driver_attach+0x61/0xb0 #2: (drm_global_mutex){+.+.+.}, at: [] drm_get_pci_dev+0xbc/0x2b0 #3: (registration_lock){+.+.+.}, at: [] remove_conflicting_framebuffers+0x2b/0x60 #4: (&fb_info->lock){+.+.+.}, at: [] lock_fb_info+0x21/0x60 #5: ((fb_notifier_list).rwsem){.+}, at: [] __blocking_notifier_call_chain+0x51/0xc0 stack backtrace: Pid: 603, comm: modprobe Not tainted 3.8.0-rc2-expert+ #2 Call Trace: [] print_circular_bug+0x28e/0x29f [] validate_chain.isra.33+0x1000/0x10d0 [] __lock_acquire+0x3a1/0xb60 [] ? _raw_spin_unlock_irqrestore+0x3a/0x70 [] ? trace_hardirqs_on_caller+0x10d/0x1a0 [] lock_acquire+0x5a/0x70 [] ? unbind_con_driver+0x3f/0x200 [] console_lock+0x77/0x80 [] ? unbind_con_driver+0x3f/0x200 [] unbind_con_driver+0x3f/0x200 []
Re: 3.8-rc2: EFI framebuffer lock inversion...
On 3 January 2013 22:11, Sedat Dilek wrote: > Hi Daniel, > > just wanted to test the fb-fix [2] from Alan and followed the thread in [1]. > Me is also working with i915 KMS. > > I looked at nouveau KMS driver and adapted the part for i915: > > drivers/gpu/drm/nouveau/nouveau_drm.c-200- /* remove conflicting > drivers (vesafb, efifb etc) */ > drivers/gpu/drm/nouveau/nouveau_drm.c:201: aper = alloc_apertures(3); > drivers/gpu/drm/nouveau/nouveau_drm.c-202- if (!aper) > drivers/gpu/drm/nouveau/nouveau_drm.c-203- return -ENOMEM; > > Untested by me, feel free to test. > > Maybe some of the i915 and/or fb driver experts can comment on the problem. The structure array from alloc_apertures is just used for the PCI base address registers, so it's important here. I'll take a look at the efifb locking later. Thanks, Daniel -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
3.8-rc2: pciehp waitqueue hang...
When the Apple thunderbolt ethernet adapter comes loose on my Macbook Pro Retina (Intel DSL3510), we see pci_slot_name return non-deterministic data (ie varying each boot), and we see pciehp_wp remain armed with events causing the kthread to get stuck: tg3 :0a:00.0 eth0: Link is up at 1000 Mbps, full duplex tg3 :0a:00.0 eth0: Flow control is on for TX and on for RX pciehp :06:03.0:pcie24: Card not present on Slot(3) tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not clear MAC_TX_MODE= tg3 :0a:00.0 eth0: No firmware running tg3 :0a:00.0 eth0: Link is down pcieport :00:01.1: System wakeup enabled by ACPI pciehp :09:00.0:pcie24: unloading service driver pciehp pciehp :09:00.0:pcie24: Latch open on Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon) pciehp :09:00.0:pcie24: Button pressed on Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon) pciehp :09:00.0:pcie24: Card present on Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon) pciehp :09:00.0:pcie24: Power fault on slot \xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon pciehp :09:00.0:pcie24: Power fault bit 0 set pciehp :09:00.0:pcie24: PCI slot #\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon - powering on due to button press. pciehp :09:00.0:pcie24: Link Training Error occurs pciehp :09:00.0:pcie24: Failed to check link status INFO: task kworker/0:1:52 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/0:1 D 880265893090 0 52 2 0x 8802655456f8 0046 81a21a60 880265545fd8 4000 880265545fd8 880265892bb0 880265adc8d0 059e 0082 880265545668 810415aa Call Trace: [] ? console_unlock+0x1fa/0x4a0 [] ? trace_hardirqs_off+0xd/0x10 [] ? vprintk_emit+0x1c9/0x510 [] schedule+0x24/0x70 [] schedule_timeout+0x19c/0x1e0 [] wait_for_common+0xe3/0x180 [] ? flush_workqueue+0x111/0x4d0 [] ? try_to_wake_up+0x2d0/0x2d0 [] wait_for_completion+0x18/0x20 [] flush_workqueue+0x1d6/0x4d0 [] ? flush_workqueue_prep_cwqs+0x200/0x200 [] pciehp_release_ctrl+0x39/0x90 [] pciehp_remove+0x25/0x30 [] pcie_port_remove_service+0x52/0x70 [] __device_release_driver+0x77/0xe0 [] device_release_driver+0x29/0x40 [] bus_remove_device+0xf1/0x140 [] device_del+0x127/0x1c0 [] ? resume_iter+0x40/0x40 [] device_unregister+0x11/0x20 [] remove_iter+0x35/0x40 [] device_for_each_child+0x36/0x70 [] pcie_port_device_remove+0x21/0x40 [] pcie_portdrv_remove+0x28/0x50 [] pci_device_remove+0x41/0xc0 [] __device_release_driver+0x77/0xe0 [] device_release_driver+0x29/0x40 [] bus_remove_device+0xf1/0x140 [] device_del+0x127/0x1c0 [] device_unregister+0x11/0x20 [] pci_stop_bus_device+0x8c/0xa0 [] pci_stop_bus_device+0x35/0xa0 [] pci_stop_and_remove_bus_device+0x11/0x20 [] pciehp_unconfigure_device+0x91/0x190 [] ? pciehp_power_thread+0x2d/0x110 [] pciehp_disable_slot+0x71/0x220 [] pciehp_power_thread+0xe6/0x110 [] process_one_work+0x193/0x550 [] ? process_one_work+0x131/0x550 [] ? pciehp_disable_slot+0x220/0x220 [] worker_thread+0x15d/0x400 [] ? trace_hardirqs_on+0xd/0x10 [] ? rescuer_thread+0x210/0x210 [] kthread+0xd6/0xe0 [] ? _raw_spin_unlock_irq+0x2b/0x50 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 3.8-rc2: pciehp waitqueue hang...
On 3 January 2013 23:41, Jiang Liu wrote: > On 01/03/2013 11:11 PM, Daniel J Blueman wrote: >> When the Apple thunderbolt ethernet adapter comes loose on my Macbook >> Pro Retina (Intel DSL3510), we see pci_slot_name return >> non-deterministic data (ie varying each boot), and we see pciehp_wp >> remain armed with events causing the kthread to get stuck: >> >> tg3 :0a:00.0 eth0: Link is up at 1000 Mbps, full duplex >> tg3 :0a:00.0 eth0: Flow control is on for TX and on for RX >> >> pciehp :06:03.0:pcie24: Card not present on Slot(3) >> tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not >> clear MAC_TX_MODE= >> tg3 :0a:00.0 eth0: No firmware running >> tg3 :0a:00.0 eth0: Link is down >> pcieport :00:01.1: System wakeup enabled by ACPI >> pciehp :09:00.0:pcie24: unloading service driver pciehp >> pciehp :09:00.0:pcie24: Latch open on >> Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon) >> pciehp :09:00.0:pcie24: Button pressed on >> Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon) >> pciehp :09:00.0:pcie24: Card present on >> Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon) >> pciehp :09:00.0:pcie24: Power fault on slot >> \xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon >> pciehp :09:00.0:pcie24: Power fault bit 0 set >> pciehp :09:00.0:pcie24: PCI slot >> #\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon >> - powering on due to button press. >> pciehp :09:00.0:pcie24: Link Training Error occurs >> pciehp :09:00.0:pcie24: Failed to check link status >> INFO: task kworker/0:1:52 blocked for more than 120 seconds. [...] > Hi Daniel, > It seems like an issue caused by recursive PCIe HPC. > Could you please help to try the patch from: > http://www.spinics.net/lists/linux-pci/msg18625.html > Thanks! > Gerry (adding Yijing) Splendid; this fixes this failure nicely [1], finally releasing the bus. If nothing else, I feel this should be queud for 3.8-rc3. Many thanks, Daniel --- [1] pciehp :06:03.0:pcie24: Card not present on Slot(3) tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not clear MAC_TX_MODE= tg3 :0a:00.0 eth0: No firmware running tg3 :0a:00.0 eth0: Link is down [sched_delayed] sched: RT throttling activated pcieport :00:01.1: System wakeup enabled by ACPI pciehp :09:00.0:pcie24: unloading service driver pciehp pciehp :09:00.0:pcie24: Latch open on Slot(\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon) pciehp :09:00.0:pcie24: Button pressed on Slot(\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon) pciehp :09:00.0:pcie24: Card present on Slot(\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon) pciehp :09:00.0:pcie24: Power fault on slot \xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon pciehp :09:00.0:pcie24: Power fault bit 0 set pciehp :09:00.0:pcie24: PCI slot #\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon - powering on due to button press. pciehp :09:00.0:pcie24: Link Training Error occurs pciehp :09:00.0:pcie24: Failed to check link status pci_bus :0a: busn_res: [bus 0a] is released pci_bus :09: busn_res: [bus 09-0a] is released -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] HDA: Fix sound resume hang
Resuming a switcheroo'd HDA controller hangs since the completion is one-shot (thus works the first time). Fix by using completions that explictly need rearming, so remain fired before. Signed-off-by: Daniel J Blueman --- sound/pci/hda/hda_intel.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 22ecadc..e12b939 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2856,7 +2856,7 @@ static int azx_free(struct azx *chip) azx_notifier_unregister(chip); chip->init_failed = 1; /* to be sure */ - complete(&chip->probe_wait); + complete_all(&chip->probe_wait); if (use_vga_switcheroo(chip)) { if (chip->disabled && chip->bus) @@ -3482,7 +3482,7 @@ static int __devinit azx_probe(struct pci_dev *pci, pm_runtime_put_noidle(&pci->dev); dev++; - complete(&chip->probe_wait); + complete_all(&chip->probe_wait); return 0; out_free: -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/3, v5] AMD64 EDAC: Add muli-domain support
On 12/11/2012 21:24, Borislav Petkov wrote: On Mon, Nov 05, 2012 at 02:05:24PM +0800, Daniel J Blueman wrote: Fix the handling of memory controller detection to index the array of detected Northbridges, allowing memory controllers over multiple PCI domains in federated systems eg using Numascale's NumaConnect/ NumaChip. v4: Generate linear Northbridge ID by indexing detected Northbridges v5: Reorder functions to prevent extra function declaration; merge 4th patch; simplify Fam15h code; add detail to warning Signed-off-by: Daniel J Blueman Acked-by: Borislav Petkov Btw, I don't have access to a multi-socket single-board AMD system right now so would you please test the patchset on such a system too, if you haven't done so yet? Thanks a lot. Yep, the expected memory controller indexes, population, column-strobe rows, banks and sysfs paths are detected on my hex-northbridge fam10h box with 3.7-rc5 with these patches: EDAC MC: Ver: 3.0.0 AMD64 EDAC driver v3.4.0 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 0). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x8 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS2: Registered DDR3 RAM EDAC amd64: CS3: Registered DDR3 RAM EDAC MC0: Giving out device to 'amd64_edac' 'F10h': DEV :00:18.2 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 1). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x8 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS2: Registered DDR3 RAM EDAC amd64: CS3: Registered DDR3 RAM EDAC MC1: Giving out device to 'amd64_edac' 'F10h': DEV :00:19.2 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 2). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x8 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS2: Registered DDR3 RAM EDAC amd64: CS3: Registered DDR3 RAM EDAC MC2: Giving out device to 'amd64_edac' 'F10h': DEV :00:1a.2 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 3). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x8 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS2: Registered DDR3 RAM EDAC amd64: CS3: Registered DDR3 RAM EDAC MC3: Giving out device to 'amd64_edac' 'F10h': DEV :00:1b.2 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 4). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x8 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS2: Registered DDR3 RAM EDAC amd64: CS3: Registered DDR3 RAM EDAC MC4: Giving out device to 'amd64_edac' 'F10h': DEV :00:1c.2 EDAC amd64: DRAM ECC enabled. EDAC amd64: F10h detected (node 5). EDAC MC: DCT0 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: DCT1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB EDAC amd64: MC: 2: 4096MB 3: 4096MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC amd64: using x8 syndromes. EDAC amd64: MCT channel count: 2 EDAC amd64: CS2: Registered DDR3 RAM EDAC amd64: CS3: Registered DDR3 RAM EDAC MC5: Giving out device to 'amd64_edac' 'F10h': DEV :00:1d.2 EDAC PCI0: Giving out device to module 'amd64_edac' controller 'EDAC PCI controller': DEV ':00:18.2' (POLLED) root@ibm-x3755-01:/sys/devices/system/edac# ls -d mc/mc*/{rank*,csrow*} mc/mc0/csrow
[PATCH] AHCI: fix build warning when PM && !PM_SLEEP
Change the conditional around ahci_suspend/resume to the same as the SIMPLE_DEV_PM_OPS macro that uses these functions, fixing an unused build warning. Signed-off-by: Daniel J Blueman --- drivers/ata/ahci_platform.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index b1ae480..b7078af 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -238,7 +238,7 @@ static int __devexit ahci_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int ahci_suspend(struct device *dev) { struct ahci_platform_data *pdata = dev_get_platdata(dev); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support
Fix the handling of memory controller detection to index the array of detected Northbridges, allowing memory controllers over multiple PCI domains in federated systems eg using Numascale's NumaConnect/ NumaChip. v4: Generate linear Northbridge ID by indexing detected Northbridges v5: Reorder functions to prevent extra function declaration; merge 4th patch; simplify Fam15h code; add detail to warning v6: Remove unused variable after simplification Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h | 13 +++ drivers/edac/amd64_edac.c | 48 + drivers/edac/amd64_edac.h |6 -- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..9f5532a 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } +static inline u16 amd_get_node_id(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i != amd_nb_num(); i++) + if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; + + WARN(1, "Unable to find AMD Northbridge identifier for %s\n", pci_name(pdev)); + return 0; +} + #else #define amd_nb_num(x) 0 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cc8e7c7..8de8873 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -982,6 +982,24 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + dev = pci_get_device(vendor, device, dev); + while (dev) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + dev = pci_get_device(vendor, device, dev); + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -1001,11 +1019,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) /* Factor in CC6 save area by reading dst node's limit reg */ if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; - u8 nid = dram_dst_node(pvt, range); + struct pci_dev *misc, *f1 = NULL; + u16 nid = dram_dst_node(pvt, range); u32 llim; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); + misc = node_to_amd_nb(nid)->misc; + f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc); if (WARN_ON(!f1)) return; @@ -1712,23 +1731,6 @@ static struct amd64_family_type amd64_family_types[] = { }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm @@ -2546,7 +2548,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u8 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2637,7 +2639,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u8 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; s
[PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers
As the AMD64 last-level-cache ID is 16-bits and federated systems eg using Numascale's NumaConnect/NumaChip can have more than 255 memory controllers, use 16-bits to store the ID. v2: Avoid change to intlv_en variable v3: Drop unneeded change to index Signed-off-by: Daniel J Blueman --- drivers/edac/amd64_edac.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8de8873..6e3f002 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -2298,7 +2299,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2336,7 +2337,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2388,7 +2389,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2427,7 +2428,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2548,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = amd_get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2639,7 +2640,7 @@ err_ret: static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = amd_get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2689,7 +2690,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = amd_get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent
As the Northbridge IDs are at most 16-bits, use the same type consistently and cleanup some indexes to use smaller types. v2: Drop changes for later cleanups v3: Further changes suggested by Boris Signed-off-by: Daniel J Blueman --- arch/x86/include/asm/amd_nb.h|2 +- arch/x86/include/asm/processor.h |2 +- arch/x86/kernel/cpu/amd.c|4 ++-- drivers/edac/amd64_edac.c| 16 drivers/edac/amd64_edac.h|6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 9f5532a..b0815a0 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature) return ((amd_northbridges.flags & feature) == feature); } -static inline struct amd_northbridge *node_to_amd_nb(int node) +static inline struct amd_northbridge *node_to_amd_nb(u16 node) { return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ad1fc85..eb3ba58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -extern int amd_get_nb_id(int cpu); +extern u16 amd_get_nb_id(int cpu); struct aperfmperf { u64 aperf, mperf; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2..52cab1f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } -int amd_get_nb_id(int cpu) +u16 amd_get_nb_id(int cpu) { - int id = 0; + u16 id = 0; #ifdef CONFIG_SMP id = per_cpu(cpu_llc_id, cpu); #endif diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 6e3f002..b27412a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * DRAM base/limit associated with node_id */ static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) + u8 nid) { u64 addr; @@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -1021,7 +1021,7 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) /* Factor in CC6 save area by reading dst node's limit reg */ if (c->x86 == 0x15) { struct pci_dev *misc, *f1 = NULL; - u16 nid = dram_dst_node(pvt, range); + u8 nid = dram_dst_node(pvt, range); u32 llim; misc = node_to_amd_nb(nid)->misc; @@ -1348,7 +1348,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1399,7 +1399,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, * checks if the csrow passed in is marked as SPARED, if so returns the new * spare row */ -static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) +static int f10_process_possible_spare(struct amd64_pvt *pvt, u16 dct, int csrow) { int tmp_cs; @@ -1424,7 +1424,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -2256,7 +2256,7 @@ static int init_csrows(struct mem_ctl_info *mci) } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) { int cpu; @@ -2266,7 +2266,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; diff --git a/drivers/edac/amd64_edac.h b/dri
Re: 3.9-rc6 ext4: free_rb_tree_fname oops
On 16 April 2013 15:37, Daniel J Blueman wrote: > When using e4defrag on a ext4 filesystem created a month ago, I ran > into this fatal page fault [1] > while running e4defrag on 3.9-rc6 (Ubuntu mainline). > > e2fsdump output is at http://quora.org/2012/e2fsdump.txt ; let me know > if you need any more info. With 3.9.6 mainline, I got the exact same protection fault at free_rb_tree_fname() from ext4_htree_free_dir_info() [1]. This suggests use-after-free, as there's no pagetable mapping. There is nothing special with my setups, so there is fair chance it's reproducible there with e4defrag on a few month old filesystem and recent kernels. Thanks, Daniel > --- [1] > > general protection fault: [#1] SMP > Modules linked in: btrfs raid6_pq zlib_deflate xor ufs qnx4 hfsplus > hfs minix ntfs msdos jfs xfs libcrc32c reiserfs ext2 8021q garp > parport_pc ppdev rfcomm bnep nfsd auth_rpcgss nfs_acl nfs lockd sunrpc > fscache snd_hda_codec_hdmi snd_hda_codec_realtek coretemp kvm_intel > kvm snd_hda_intel snd_hda_codec snd_hwdep ghash_clmulni_intel arc4 > bridge iwldvm joydev i915 cryptd snd_pcm mac80211 stp llc > snd_page_alloc drm_kms_helper drm snd_seq_midi snd_seq_midi_event > snd_rawmidi snd_seq psmouse snd_seq_device btusb ir_sony_decoder > ir_rc5_decoder ir_lirc_codec lirc_dev ir_sanyo_decoder > ir_mce_kbd_decoder ir_jvc_decoder serio_raw ir_rc6_decoder iwlwifi > ir_nec_decoder snd_timer i2c_algo_bit rc_rc6_mce microcode nuvoton_cir > snd rc_core bluetooth soundcore mac_hid cfg80211 mei lpc_ich video lp > parport hid_generic usbhid hid r8169 ahci libahci > CPU 0 > Pid: 18139, comm: e4defrag Not tainted 3.9.0-030900rc6-generic > #201304080035 ZOTAC XX/XX > RIP: 0010:[] [] > free_rb_tree_fname+0x28/0xb0 > RSP: 0018:8801134a9e28 EFLAGS: 00010202 > RAX: 0036b44b8001 RBX: 880080e09018 RCX: 000180400028 > RDX: 0036b44b8001 RSI: 0001 RDI: 88013b001700 > RBP: 8801134a9e48 R08: R09: eadbe380 > R10: 812381bc R11: 0206 R12: > R13: 880036f8ec80 R14: 880036f8ebc8 R15: 8800ade074c0 > FS: 7fd1923d7740() GS:88013fa0() knlGS: > CS: 0010 DS: ES: CR0: 80050033 > CR2: 013974d8 CR3: 0001352f2000 CR4: 000407f0 > DR0: DR1: DR2: > DR3: DR6: 0ff0 DR7: 0400 > Process e4defrag (pid: 18139, threadinfo 8801134a8000, task > 880138d9c5f0) > Stack: > 880036f8ec80 4010 880021a2f900 8800ade074c0 > 8801134a9e68 81238f36 4010 88013890f000 > 8801134a9e78 81238f6a 8801134a9ec8 8119f57a > Call Trace: > [] ext4_htree_free_dir_info+0x16/0x30 > [] ext4_release_dir+0x1a/0x20 > [] __fput+0xba/0x240 > [] fput+0xe/0x10 > [] task_work_run+0xc8/0xf0 > [] do_notify_resume+0xaa/0xc0 > [] int_signal+0x12/0x17 > Code: 90 90 90 66 66 66 66 90 55 48 89 e5 41 56 41 55 49 89 fd 41 54 > 53 48 8b 1f 48 85 db 74 67 48 8b 43 10 eb 11 0f 1f 80 00 00 00 00 <48> > 8b 50 10 48 89 c3 48 89 d0 48 85 c0 75 f1 48 8b 43 08 48 85 > RIP [] free_rb_tree_fname+0x28/0xb0 > RSP > ---[ end trace 02741f61e6b3c24b ]--- > general protection fault: [#2] SMP > Modules linked in: btrfs raid6_pq zlib_deflate xor ufs qnx4 hfsplus > hfs minix ntfs msdos jfs xfs libcrc32c reiserfs ext2 8021q garp > parport_pc ppdev rfcomm bnep nfsd auth_rpcgss nfs_acl nfs lockd sunrpc > fscache snd_hda_codec_hdmi snd_hda_codec_realtek coretemp kvm_intel > kvm snd_hda_intel snd_hda_codec snd_hwdep ghash_clmulni_intel arc4 > bridge iwldvm joydev i915 cryptd snd_pcm mac80211 stp llc > snd_page_alloc drm_kms_helper drm snd_seq_midi snd_seq_midi_event > snd_rawmidi snd_seq psmouse snd_seq_device btusb ir_sony_decoder > ir_rc5_decoder ir_lirc_codec lirc_dev ir_sanyo_decoder > ir_mce_kbd_decoder ir_jvc_decoder serio_raw ir_rc6_decoder iwlwifi > ir_nec_decoder snd_timer i2c_algo_bit rc_rc6_mce microcode nuvoton_cir > snd rc_core bluetooth soundcore mac_hid cfg80211 mei lpc_ich video lp > parport hid_generic usbhid hid r8169 ahci libahci > CPU 0 > Pid: 18139, comm: e4defrag Tainted: G D 3.9.0-030900rc6-generic > #201304080035 ZOTAC XX/XX > RIP: 0010:[] [] > free_rb_tree_fname+0x28/0xb0 > RSP: 0018:8801134a9b78 EFLAGS: 00010202 > RAX: 0036b44b8001 RBX: 880080e09018 RCX: 0001 > RDX: 0036b44b8001 RSI: 88013890fb00 RDI: 880036f8ef80 > RBP: 8801134a9b98 R08: R09: > R10: 88013890fb10 R11: R12: 4010 > R13: 880036f8ef80 R14: 8800ade07108 R15
13GB dcache+inode cache hash tables
As memory capacity increases, we see the dentry and inode cache hash tables grow to wild sizes [1], eg 13GB is consumed on a 4.5TB system. Perhaps a better approach adds a linear component to an exponent to give tuned scaling, given that spatial locality is an advantage in hash table and careful use of resources. The same approach would fit to other hash tables (mount-cache, TCP established, TCP bind, UDP, UDP-Lite, Dquot-cache) with different coefficients, so perhaps we could generalise. If so what are reasonable reference points and assumptions? Thanks, Daniel --- [1] 1GB: Dentry cache hash table entries: 131072 (order: 7, 524288 bytes) Inode-cache hash table entries: 65536 (order: 6, 262144 bytes) 8GB: Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes) Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes) 1TB: Dentry cache hash table entries: 134217728 (order: 18, 1073741824 bytes) Inode-cache hash table entries: 67108864 (order: 17, 536870912 bytes) 4.5TB Dentry cache hash table entries: 1073741824 (order: 21, 8589934592 bytes) Inode-cache hash table entries: 536870912 (order: 20, 4294967296 bytes) -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] Transparent on-demand memory setup initialization embedded in the (GFP) buddy allocator
On Wednesday, June 26, 2013 9:30:02 PM UTC+8, Andrew Morton wrote: > > On Wed, 26 Jun 2013 11:22:48 +0200 Ingo Molnar wrote: > > > except that on 32 TB > > systems we don't spend ~2 hours initializing 8,589,934,592 page heads. > > That's about a million a second which is crazy slow - even my prehistoric desktop > is 100x faster than that. > > Where's all this time actually being spent? The complexity of a directory-lookup architecture to make the (intrinsically unscalable) cache-coherency protocol scalable gives you a ~1us roundtrip to remote NUMA nodes. Probably a lot of time is spent in some memsets, and RMW cycles which are setting page bits, which are intrinsically synchronous, so the initialising core can't get to 12 or so outstanding memory transactions. Since EFI memory ranges have a flag to state if they are zerod (which may be a fair assumption for memory on non-bootstrap processor NUMA nodes), we can probably collapse the RMWs to just writes. A normal write will require a coherency cycle, then a fetch and a writeback when it's evicted from the cache. For this purpose, non-temporal writes would eliminate the cache line fetch and give a massive increase in bandwidth. We wouldn't even need a store-fence as the initialising core is the only one online. Daniel -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 13GB dcache+inode cache hash tables
On 25/06/2013 17:48, Eric Dumazet wrote: On Tue, 2013-06-25 at 16:56 +0800, Daniel J Blueman wrote: As memory capacity increases, we see the dentry and inode cache hash tables grow to wild sizes [1], eg 13GB is consumed on a 4.5TB system. Perhaps a better approach adds a linear component to an exponent to give tuned scaling, given that spatial locality is an advantage in hash table and careful use of resources. The same approach would fit to other hash tables (mount-cache, TCP established, TCP bind, UDP, UDP-Lite, Dquot-cache) with different coefficients, so perhaps we could generalise. TCP hash table is limited to 512K slots, unless overridden. TCP bind limited to 64K slots. UDP limited to 64K slots. If so what are reasonable reference points and assumptions? I do not know what you have in mind, please show us a patch ;) [...] Alright, I'll see what I can get together in the next week or so when I can fit it in. Dan -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
BCM57765: timeout waiting for hardware interrupt
Chris et al, with 3.11-rc6 and the Broadcom BCM57765 card reader [1] in my Macbook Retina, interrupts are not seen during card insertion: sdhci: Secure Digital Host Controller Interface driver sdhci: Copyright(c) Pierre Ossman sdhci-pci :03:00.1: SDHCI controller found [14e4:16bc] (rev 10) mmc0: no vqmmc regulator found mmc0: no vmmc regulator found mmc0: SDHCI controller on PCI [:03:00.1] using ADMA [...] mmc0: Timeout waiting for hardware interrupt. However /proc/interrupts shows 4 interrupts occurring at insertion time: 17: 6 0 0 0 0 0 0 0 IO-APIC-fasteoi mmc0 Debugging shows the interrupt handler inspecting the state of the hardware and finding no work to do; are the missing regulators unexpected? --- [1] $ sudo lspci -s 03:00.1 -v 03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765 Memory Card Reader (rev 10) (prog-if 01) Subsystem: Broadcom Corporation Device 96bc Flags: bus master, fast devsel, latency 0, IRQ 17 Memory at c182 (64-bit, prefetchable) [size=64K] Capabilities: [48] Power Management version 3 Capabilities: [58] MSI: Enable- Count=1/1 Maskable- 64bit+ Capabilities: [ac] Express Endpoint, MSI 00 Capabilities: [100] Advanced Error Reporting Capabilities: [150] Power Budgeting Capabilities: [160] Virtual Channel Kernel driver in use: sdhci-pci -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: False-positive RCU stall warnings on large systems...
On 02/26/2013 12:32 AM, Paul E. McKenney wrote: On Wed, Feb 20, 2013 at 11:35:57AM +0800, Daniel J Blueman wrote: On 20/02/2013 02:16, Paul E. McKenney wrote: On Wed, Feb 20, 2013 at 12:34:12AM +0800, Daniel J Blueman wrote: Hi Paul, On some of our larger servers with many hundreds of cores and when under high duress, we can see scheduler RCU stall warnings [1], so find we have to increase the hardcoded RCU_STALL_RAT_DELAY up from 2 and RCU_JIFFIES_TILL_FORCE_QS up from 3. Disabling RCU_FAST_NO_HZ will likely remove the need to adjust RCU_JIFFIES_TILL_FORCE_QS. Changes in my -rcu tree will likely remove the need to adjust these two in 3.10 or 3.11, depending on how testing goes. Is there a more sustainable way to account for this to avoid it being hard-coded, such as making it and dependent timeouts a fraction of CONFIG_RCU_CPU_STALL_TIMEOUT? Maybe... But what this means is that your system is so heavily loaded that the CPU in question is failing to make it to RCU's softirq handler in two jiffies worth of time. This is a function of workload rather than of the number of CPUs. On the other hand, perhaps this is just caused by clock jitter (eg due to distance from a contended clock source)? So increasing these a bit may just be adequate in general... Hmmm... What version of the kernel are you running? The example below occurs with v3.8, but we see the same with previous kernels eg v3.5. There is always the rcutree.rcu_cpu_stall_timeout parameter that sets the stall timeout in seconds. This may be specified at boot time or via sysfs at runtime. The default is now 21 seconds. Of course, when using the local TSC, you'd see no jitter relative to coherent transactions (eg memory writes), but when the HPET is used across a large system, coherent transactions to distant cores are just so much faster, as there's massive congestion to the shared HPET behind various HT and PCIe bridges. This could be where the jitter arises from, if I'm guessing jitter is the problem here. Agreed, timing jitter could cause problems. That said, the code uses the jiffies counter to compute these timings. Are you seeing similar memory contention on the jiffies counter itself? The contention we see in general are when cores contend for a spinlock and when there are lots of concurrent HPET reads (Opterons allow only 4 outstanding reads to the IO hub). It's probably possible to reproduce rcu_sched stalls on a quad-socket box with 64 cores and the right workload with the TSC disabled. In 3.9-rc1 with RCU_FAST_NO_HZ disabled, we've seen stalls of 4 jiffies [2], but without the "Stall ended" message. This is with a workload which allocates ~256GB of memory over 192 cores. Thanks, Daniel --- [1] [ 3939.010085] INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 1, t=29662 jiffies, g=3053, c=3052, q=598) [ 3939.020008] INFO: Stall ended before state dump start --- [2] [10660.110620] INFO: rcu_sched self-detected stall on CPU { 39} (t=4 jiffies g=1169 c=1168 q=8) [10660.110620] Pid: 11747, comm: sp.B Not tainted 3.9.0-rc1-advanced #6 [10660.110620] Call Trace: [10660.110620][] ? rcu_check_callbacks+0x2d2/0x5f0 [10660.110620] [] ? run_posix_cpu_timers+0x3a/0x790 [10660.110620] [] ? update_process_times+0x3f/0x80 [10660.110620] [] ? tick_sched_handle.isra.8+0x30/0x40 [10660.110620] [] ? tick_sched_timer+0x42/0x70 [10660.110620] [] ? __run_hrtimer.isra.30+0x4a/0xe0 [10660.110620] [] ? hrtimer_interrupt+0xe5/0x220 [10660.110620] [] ? smp_apic_timer_interrupt+0x63/0xa0 [10660.110620] [] ? apic_timer_interrupt+0x67/0x70 -- Daniel J Blueman Principal Software Engineer, Numascale Asia -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
3.9-rc6 ext4: free_rb_tree_fname oops
/0x480 [] oops_end+0xb9/0x100 [] die+0x58/0x90 [] do_general_protection+0xdc/0x160 [] general_protection+0x28/0x30 [] ? free_rb_tree_fname+0x5c/0xb0 [] ? free_rb_tree_fname+0x28/0xb0 [] ? free_rb_tree_fname+0x5c/0xb0 [] ext4_htree_free_dir_info+0x16/0x30 [] ext4_release_dir+0x1a/0x20 [] __fput+0xba/0x240 [] fput+0xe/0x10 [] task_work_run+0xc8/0xf0 [] do_notify_resume+0xaa/0xc0 [] int_signal+0x12/0x17 Code: 90 90 90 66 66 66 66 90 55 48 89 e5 41 56 41 55 49 89 fd 41 54 53 48 8b 1f 48 85 db 74 67 48 8b 43 10 eb 11 0f 1f 80 00 00 00 00 <48> 8b 50 10 48 89 c3 48 89 d0 48 85 c0 75 f1 48 8b 43 08 48 85 RIP [] free_rb_tree_fname+0x28/0xb0 RSP ---[ end trace 02741f61e6b3c24c ]--- Fixing recursive fault but reboot is needed! -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
rts5139 polling overhead...
Hi Edwin, The Realsil/Realtek rts5139 card reader driver polls it's hardware at 20Hz and has racked up 25 minutes of processor time over a few days on this Sandy Bridge media centre box, without any cards inserted. This is more than the sum of all the other processes (including X): $ ps -ef | grep rts5139 root 691 2 0 Aug03 ?00:00:36 [rts5139-control] root 693 2 0 Aug03 ?00:25:36 [rts5139-polling] The kernel is stock 3.5.0 without debug; would it help if I log a bug report or eg collect further detail? Thanks, Daniel -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[3.5.1] tg3 waitqueue hang on hotplug remove...
Hi Matt, Michael, On my Macbook Retina with 3.5.1, I see the tg3 external adapter (via Thunderbolt) get logically disconnected after a while despite remaining connected (Thunderbolt issues). The problem is though, that the pciehp_wq workqueue fails to complete flushing from the call to pcie_cleanup_slot (inlined in pciehp_release_ctrl) [1]; looks like tg3_tx or so is missing a finish_wait(), no? Daniel --- [1] pcieport :00:01.0: irq 42 for MSI/MSI-X pcieport :00:01.1: irq 43 for MSI/MSI-X pcieport :00:01.2: irq 44 for MSI/MSI-X pcieport :05:00.0: irq 45 for MSI/MSI-X pcieport :06:00.0: irq 46 for MSI/MSI-X pcieport :06:03.0: irq 47 for MSI/MSI-X pcieport :06:04.0: irq 48 for MSI/MSI-X pcieport :06:05.0: irq 49 for MSI/MSI-X pcieport :06:06.0: irq 50 for MSI/MSI-X pcieport :08:00.0: irq 51 for MSI/MSI-X pcieport :09:00.0: irq 52 for MSI/MSI-X pci_hotplug: PCI Hot Plug PCI Core version: 0.5 pciehp :06:00.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid ss_did pciehp :06:00.0:pcie24: service driver pciehp loaded pciehp :06:03.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid ss_did pciehp :06:03.0:pcie24: service driver pciehp loaded pciehp :06:04.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid ss_did pciehp :06:04.0:pcie24: service driver pciehp loaded pciehp :06:05.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid ss_did pciehp :06:05.0:pcie24: service driver pciehp loaded pciehp :06:06.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid ss_did pciehp :06:06.0:pcie24: service driver pciehp loaded pciehp :09:00.0:pcie24: HPC vendor_id 8086 device_id 1549 ss_vid 0 ss_did 0 pciehp :09:00.0:pcie24: service driver pciehp loaded pciehp: PCI Express Hot Plug Controller Driver version: 0.4 tg3 :0a:00.0: eth0: Tigon3 [partno(BCM957762) rev 57766000] (PCI Express) MAC address 40:6c:8f:36:1a:67 tg3 :0a:00.0: eth0: attached PHY is 57765 (10/100/1000Base-T Ethernet) (WireSpeed[1], EEE[0]) tg3 :0a:00.0: eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] TSOcap[1] tg3 :0a:00.0: eth0: dma_rwctrl[0001] dma_mask[64-bit] ... pciehp :06:03.0:pcie24: Card not present on Slot(3) tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not clear MAC_TX_MODE= tg3 :0a:00.0: eth1: No firmware running tg3 :0a:00.0: eth1: Link is down [sched_delayed] sched: RT throttling activated pciehp :09:00.0:pcie24: unloading service driver pciehp INFO: task kworker/0:2:3072 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/0:2 D 8180cc20 0 3072 2 0x 880237f75800 0046 0001 880237f757b0 880237f75fd8 880237f75fd8 880237f75fd8 00013940 88025f3b5c00 880237eb5c00 7fff Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x2a5/0x320 [] ? default_spin_lock_flags+0x9/0x10 [] ? pde_put+0x79/0xa0 [] wait_for_common+0xdf/0x180 [] ? pde_put+0x79/0xa0 [] ? try_to_wake_up+0x200/0x200 [] wait_for_completion+0x1d/0x20 [] flush_workqueue+0x143/0x400 [] ? pciehp_disable_slot+0x1f0/0x1f0 [] pciehp_release_ctrl+0x46/0xa0 [] pciehp_remove+0x27/0x30 [] pcie_port_remove_service+0x57/0x70 [] __device_release_driver+0x7c/0xe0 [] device_release_driver+0x2c/0x40 [] bus_remove_device+0xe1/0x120 [] ? resume_iter+0x40/0x40 [] device_del+0x120/0x1b0 [] ? resume_iter+0x40/0x40 [] device_unregister+0x16/0x30 [] remove_iter+0x3d/0x50 [] device_for_each_child+0x44/0x70 [] pcie_port_device_remove+0x26/0x40 [] pcie_portdrv_remove+0x16/0x30 [] pci_device_remove+0x46/0x110 [] __device_release_driver+0x7c/0xe0 [] device_release_driver+0x2c/0x40 [] bus_remove_device+0xe1/0x120 [] device_del+0x120/0x1b0 [] device_unregister+0x16/0x30 [] pci_stop_bus_device+0x94/0xa0 [] pci_stop_bus_device+0x43/0xa0 [] pci_stop_and_remove_bus_device+0x16/0x30 [] pciehp_unconfigure_device+0x91/0x190 [] pciehp_disable_slot+0x75/0x1f0 [] pciehp_power_thread+0xe3/0x110 [] process_one_work+0x11a/0x480 [] worker_thread+0x165/0x370 [] ? manage_workers.isra.29+0x130/0x130 [] kthread+0x93/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? kthread_freezable_should_stop+0x70/0x70 [] ? gs_change+0x13/0x13 -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6.25-rc2, 2.6.24-rc8] page allocation failure...
I'm still hitting this with e1000e on 2.6.25-rc2, 10 times again. It's clearly non-fatal, but then do we expect it to occur? Daniel --- [dmesg] [ 1250.822786] swapper: page allocation failure. order:3, mode:0x4020 [ 1250.822786] Pid: 0, comm: swapper Not tainted 2.6.25-rc2-119 #2 [ 1250.822786] [ 1250.822786] Call Trace: [ 1250.822786][] __alloc_pages+0x34e/0x3a0 [ 1250.822786] [] ? __netdev_alloc_skb+0x1f/0x40 [ 1250.822786] [] __slab_alloc+0x102/0x3d0 [ 1250.822786] [] ? __netdev_alloc_skb+0x1f/0x40 [ 1250.822786] [] __kmalloc_track_caller+0x7b/0xc0 [ 1250.822786] [] __alloc_skb+0x6f/0x160 [ 1250.822786] [] __netdev_alloc_skb+0x1f/0x40 [ 1250.822786] [] e1000_alloc_rx_buffers+0x1ed/0x260 [ 1250.822786] [] e1000_clean_rx_irq+0x22a/0x330 [ 1250.822786] [] e1000_clean+0x1e1/0x540 [ 1250.822786] [] ? tick_program_event+0x45/0x70 [ 1250.822786] [] net_rx_action+0x9a/0x150 [ 1250.822786] [] __do_softirq+0x74/0xf0 [ 1250.822786] [] call_softirq+0x1c/0x30 [ 1250.822786] [] do_softirq+0x3d/0x80 [ 1250.822786] [] irq_exit+0x85/0x90 [ 1250.822786] [] do_IRQ+0x85/0x100 [ 1250.822786] [] ? mwait_idle+0x0/0x50 [ 1250.822786] [] ret_from_intr+0x0/0xa [ 1250.822786][] ? mwait_idle+0x45/0x50 [ 1250.822786] [] ? enter_idle+0x22/0x30 [ 1250.822786] [] ? cpu_idle+0x74/0xa0 [ 1250.822786] [] ? rest_init+0x55/0x60 [ 1250.822786] [ 1250.822786] Mem-info: [ 1250.822786] DMA per-cpu: [ 1250.822786] CPU0: hi:0, btch: 1 usd: 0 [ 1250.822786] CPU1: hi:0, btch: 1 usd: 0 [ 1250.822786] DMA32 per-cpu: [ 1250.822786] CPU0: hi: 186, btch: 31 usd: 179 [ 1250.822786] CPU1: hi: 186, btch: 31 usd: 159 [ 1250.822786] Active:59792 inactive:67236 dirty:4775 writeback:4779 unstable:0 [ 1250.822786] free:2232 slab:122927 mapped:3846 pagetables:715 bounce:0 [ 1250.822786] DMA free:3984kB min:36kB low:44kB high:52kB active:4kB inactive:560kB present:10076kB pages_scanned:0 all_unreclaimable? no [ 1250.822786] lowmem_reserve[]: 0 992 992 992 [ 1250.822786] DMA32 free:4944kB min:4008kB low:5008kB high:6012kB active:239164kB inactive:268384kB present:1015936kB pages_scanned:0 all_unreclaimable? no [ 1250.822786] lowmem_reserve[]: 0 0 0 0 [ 1250.822786] DMA: 6*4kB 1*8kB 1*16kB 1*32kB 1*64kB 0*128kB 1*256kB 1*512kB 1*1024kB 1*2048kB 0*4096kB = 3984kB [ 1250.822786] DMA32: 836*4kB 148*8kB 18*16kB 0*32kB 1*64kB 1*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 5008kB [ 1250.822786] 88530 total pagecache pages [ 1250.822786] Swap cache: add 33, delete 32, find 0/0 [ 1250.822786] Free swap = 248872kB [ 1250.822786] Total swap = 248996kB [ 1250.822786] Free swap: 248872kB [ 1250.822786] 261600 pages of RAM [ 1250.822786] 5448 reserved pages [ 1250.822786] 55715 pages shared [ 1250.822786] 1 pages swap cached On Feb 14, 2008 8:40 PM, Daniel J Blueman <[EMAIL PROTECTED]> wrote: > One of my x86-64 (1GB) systems experienced order 1 page alloc failure > after ~4 days of uptime and 9 times again in the hours since. > > I understand the behaviour is that the reclaim kthread should have had > a couple of free pages around for this type allocation, but obviously > didn't (a number of times). > > What other information may help with this? > > Daniel > > --- dmesg > swapper: page allocation failure. order:1, mode:0x4020 > Pid: 0, comm: swapper Not tainted 2.6.24-rc8-117 #1 > > Call Trace: > [] __alloc_pages+0x336/0x390 > [] __netdev_alloc_skb+0x17/0x40 > [] __slab_alloc+0x145/0x3d0 > [] __netdev_alloc_skb+0x17/0x40 > [] __kmalloc_track_caller+0xf6/0x100 > [] __alloc_skb+0x6f/0x150 > [] __netdev_alloc_skb+0x17/0x40 > [] e1000_alloc_rx_buffers+0x17a/0x3a0 > [] ip_local_deliver_finish+0x83/0x1a0 > [] e1000_clean_rx_irq+0x34d/0x550 > [] e1000_intr_msi+0x81/0x110 > [] handle_IRQ_event+0x34/0x70 > [] handle_edge_irq+0xc9/0x150 > [] do_IRQ+0x7b/0x100 > [] mwait_idle+0x0/0x50 > [] ret_from_intr+0x0/0xa > [] lapic_next_event+0x0/0x10 > [] mwait_idle+0x42/0x50 > [] cpu_idle+0x75/0xa0 > [] start_kernel+0x25a/0x2e0 > [] _sinittext+0x117/0x120 > > Mem-info: > DMA per-cpu: > CPU0: Hot: hi:0, btch: 1 usd: 0 Cold: hi:0, btch: 1 usd: > 0 > CPU1: Hot: hi:0, btch: 1 usd: 0 Cold: hi:0, btch: 1 usd: > 0 > DMA32 per-cpu: > CPU0: Hot: hi: 186, btch: 31 usd: 155 Cold: hi: 62, btch: 15 usd: > 60 > CPU1: Hot: hi: 186, btch: 31 usd: 14 Cold: hi: 62, btch: 15 usd: > 42 > Active:114794 inactive:53917 dirty:8137 writeback:5025 unstable:0 > free:3549 slab:79629 mapped:4643 pagetables:1017 bounce:0 > DMA free:3968kB min:40kB low:48kB high:60kB active:72kB inactive:120kB > present:10236kB pages_scanned:0 all_unreclaimable? no > lowmem_reserve[]: 0 992 992 992 > DMA32 free:10228kB min:4008kB low:5008kB high:6012kB active:459104kB > inactive:215548kB present:
3.5.0 iwlagn AP crash...
Hi Johannes et al, When running my Centrino Wireless-N 130 BGN (rev 0xb0) card in nl80211 AP mode with hostapd on linux 3.5.0, I immediately hit this fatal pagefault [1]. I can cook a debug kernel, reproduce, disassemble the code and do some quick analysis, if that helps get the ball rolling? Thanks! Daniel --- [1] BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] ieee80211_ave_rssi+0xd/0x50 [mac80211] PGD 116616067 PUD 115c22067 PMD 0 Oops: [#1] SMP CPU 0 Modules linked in: netconsole configfs snd_hda_codec_hdmi snd_hda_codec_realtek xt_hl ip6t_rt nf_conntrack_ipv6 nf_defrag_ipv6 ipt_REJECT snd_hda_intel snd_hda_codec snd_hwdep xt_limit xt_tcpudp xt_addrtype snd_pcm ir_lirc_codec lirc_dev ir_mce_kbd_decoder ir_sanyo_decoder ir_sony_decoder xt_state ir_jvc_decoder snd_seq_midi snd_rawmidi ip6table_filter ip6_tables joydev ir_rc6_decoder snd_seq_midi_event nf_conntrack_netbios_ns nf_conntrack_broadcast snd_seq hid_generic arc4 ir_rc5_decoder nf_nat_ftp nf_nat snd_timer nf_conntrack_ipv4 snd_seq_device nf_defrag_ipv4 usbhid i915 hid coretemp drm_kms_helper iwlwifi mac80211 nf_conntrack_ftp ir_nec_decoder drm i2c_algo_bit rts5139(C) kvm_intel btusb snd nf_conntrack kvm psmouse bluetooth cfg80211 mac_hid ghash_clmulni_intel rc_rc6_mce lpc_ich soundcore iptable_filter snd_page_alloc mei ip_tables x_tables nuvoton_cir rc_core serio_raw cryptd microcode video r8169 Pid: 0, comm: swapper/0 Tainted: G C 3.5.0-030500-generic #201207211835 ZOTAC XX /XX RIP: 0010:[] [] ieee80211_ave_rssi+0xd/0x50 [mac80211] RSP: 0018:88011fa03c60 EFLAGS: 00010286 RDX: RSI: 880115b26008 RDI: RBP: 88011fa03c70 R08: a03b82e8 R09: R10: R11: 0001 R12: 880115b26008 R13: 880115b26008 R14: 880117bd1f50 R15: 880115b26000 FS: () GS:88011fa0() knlGS: CS: 0010 DS: ES: CR0: 8005003b CR2: CR3: 000116371000 CR4: 000407f0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Process swapper/0 (pid: 0, threadinfo 81c0, task 81c13440) Stack: 8801 8801 88011fa03c90 a037d997 880117bd1f50 880115b26000 88011fa03cc0 a037de09 880117bd1f40 880117bd1f40 88011fa03d98 Call Trace: [] iwlagn_fill_txpower_mode+0x27/0x100 [iwlwifi] [] iwlagn_bt_coex_profile_notif+0x189/0x250 [iwlwifi] [] iwl_rx_dispatch+0xbc/0x120 [iwlwifi] [] iwl_rx_handle+0xcf/0x190 [iwlwifi] [] iwl_irq_tasklet+0x353/0x9b0 [iwlwifi] [] tasklet_action+0x64/0xe0 [] __do_softirq+0xa8/0x210 [] ? _raw_spin_lock+0xe/0x20 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x8e/0xb0 [] do_IRQ+0x63/0xe0 [] common_interrupt+0x6a/0x6a [] ? default_spin_lock_flags+0x9/0x10 [] ? intel_idle+0xea/0x150 [] ? intel_idle+0xcc/0x150 [] cpuidle_enter+0x19/0x20 [] cpuidle_idle_call+0xac/0x2a0 [] cpu_idle+0xcf/0x120 [] rest_init+0x72/0x74 [] start_kernel+0x3b7/0x3c4 [] ? repair_env_string+0x5a/0x5a [] x86_64_start_reservations+0x131/0x135 [] ? early_idt_handlers+0x120/0x120 [] x86_64_start_kernel+0xcd/0xdc Code: 48 89 45 d8 48 8b 5d d8 4c 39 e3 75 c1 90 48 83 c4 10 5b 41 5c 41 5d 41 5e 5d c3 0f 1f 00 55 48 89 e5 48 83 ec 10 66 66 90 3f 02 75 05 8b 47 8c c9 c3 31 c0 80 3d 85 52 04 00 01 74 f3 RIP [] ieee80211_ave_rssi+0xd/0x50 [mac80211] RSP CR2: -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 3.5.0 iwlagn AP crash...
On 22 July 2012 17:15, Daniel J Blueman wrote: > Hi Johannes et al, > > When running my Centrino Wireless-N 130 BGN (rev 0xb0) card in nl80211 > AP mode with hostapd on linux 3.5.0, I immediately hit this fatal > pagefault [1]. > > I can cook a debug kernel, reproduce, disassemble the code and do some > quick analysis, if that helps get the ball rolling? > > Thanks! > Daniel > > --- [1] > > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: [] ieee80211_ave_rssi+0xd/0x50 [mac80211] >From my debug kernel, sdata is clearly NULL: (gdb) list *0x815b74f8 0x815b74f8 is in ieee80211_ave_rssi (net/mac80211/util.c:1801). 1796int ieee80211_ave_rssi(struct ieee80211_vif *vif) 1797{ 1798struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); 1799struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1800 1801if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) { 1802/* non-managed type inferfaces */ 1803return 0; 1804 } 1805return ifmgd->ave_beacon_signal; -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH, repost] Prevent interface errors with Seagate FreeAgent GoFlex
When using my Seagate FreeAgent GoFlex eSATAp external disk enclosure, interface errors are always seen until 1.5Gbps is negotiated [1]. This occurs using any disk in the enclosure, and when the disk is connected directly with a generic passive eSATAp cable, we see stable 3Gbps operation as expected. Blacklist 3Gbps mode to avoid dataloss and the ~30s delay bus reset and renegotiation incurs. Signed-off-by: Daniel J Blueman --- drivers/ata/libata-core.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 28db50b..0781510 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4119,6 +4119,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Devices which aren't very happy with higher link speeds */ { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS, }, + { "Seagate FreeAgent GoFlex", NULL, ATA_HORKAGE_1_5_GBPS, }, /* * Devices which choke on SETXFER. Applies only if both the --- [1] ata5: exception Emask 0x10 SAct 0x0 SErr 0x405 action 0xe frozen ata5: irq_stat 0x00400040, connection status changed ata5: SError: { PHYRdyChg CommWake DevExch } ata5: hard resetting link ata5: SATA link up 3.0 Gbps (SStatus 123 SControl 300) ata5.00: ATA-8: Seagate FreeAgent GoFlex, 0110, max UDMA/133 ata5.00: 2930277168 sectors, multi 0: LBA48 ata5.00: configured for UDMA/133 ata5: EH complete scsi 4:0:0:0: Direct-Access ATA Seagate FreeAgen 0110 PQ: 0 ANSI: 5 sd 4:0:0:0: [sdb] 2930277168 512-byte logical blocks: (1.50 TB/1.36 TiB) sd 4:0:0:0: [sdb] 4096-byte physical blocks sd 4:0:0:0: [sdb] Write Protect is off sd 4:0:0:0: [sdb] Mode Sense: 00 3a 00 00 sd 4:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA sd 4:0:0:0: Attached scsi generic sg1 type 0 sdb: unknown partition table sd 4:0:0:0: [sdb] Attached SCSI disk EXT4-fs (dm-0): mounted filesystem with ordered data mode. Opts: (null) ata5.00: exception Emask 0x10 SAct 0x0 SErr 0x400100 action 0x6 frozen ata5.00: irq_stat 0x0800, interface fatal error ata5: SError: { UnrecovData Handshk } ata5.00: failed command: WRITE DMA EXT ata5.00: cmd 35/00:00:38:db:61/00:04:6d:00:00/e0 tag 0 dma 524288 out res 50/00:00:37:db:61/00:00:6d:00:00/e0 Emask 0x10 (ATA bus error) ata5.00: status: { DRDY } ata5: hard resetting link ata5: link is slow to respond, please be patient (ready=0) ata5: COMRESET failed (errno=-16) ata5: hard resetting link ata5: link is slow to respond, please be patient (ready=0) ata5: COMRESET failed (errno=-16) ata5: hard resetting link ata5: link is slow to respond, please be patient (ready=0) ata5: COMRESET failed (errno=-16) ata5: limiting SATA link speed to 1.5 Gbps ata5: hard resetting link ata5: SATA link up 1.5 Gbps (SStatus 113 SControl 310) ata5.00: configured for UDMA/133 sd 4:0:0:0: [sdb] Result: hostbyte=0x00 driverbyte=0x08 sd 4:0:0:0: [sdb] Sense Key : 0xb [current] [descriptor] Descriptor sense data with sense descriptors (in hex): 72 0b 00 00 00 00 00 0c 00 0a 80 00 00 00 00 00 6d 61 db 37 sd 4:0:0:0: [sdb] ASC=0x0 ASCQ=0x0 sd 4:0:0:0: [sdb] CDB: cdb[0]=0x2a: 2a 00 6d 61 db 38 00 04 00 00 end_request: I/O error, dev sdb, sector 1835129656 Buffer I/O error on device dm-0, logical block 229390950 Buffer I/O error on device dm-0, logical block 229390951 Buffer I/O error on device dm-0, logical block 229390952 Buffer I/O error on device dm-0, logical block 229390953 Buffer I/O error on device dm-0, logical block 229390954 Buffer I/O error on device dm-0, logical block 229390955 Buffer I/O error on device dm-0, logical block 229390956 Buffer I/O error on device dm-0, logical block 229390957 Buffer I/O error on device dm-0, logical block 229390958 Buffer I/O error on device dm-0, logical block 229390959 Buffer I/O error on device dm-0, logical block 229390960 Buffer I/O error on device dm-0, logical block 229390961 Buffer I/O error on device dm-0, logical block 229390962 Buffer I/O error on device dm-0, logical block 229390963 Buffer I/O error on device dm-0, logical block 229390964 Buffer I/O error on device dm-0, logical block 229390965 Buffer I/O error on device dm-0, logical block 229390966 Buffer I/O error on device dm-0, logical block 229390967 Buffer I/O error on device dm-0, logical block 229390968 Buffer I/O error on device dm-0, logical block 229390969 Buffer I/O error on device dm-0, logical block 229390970 Buffer I/O error on device dm-0, logical block 229390971 Buffer I/O error on device dm-0, logical block 229390972 Buffer I/O error on device dm-0, logical block 229390973 Buffer I/O error on device dm-0, logical block 229390974 Buffer I/O error on device dm-0, logical block 229390975 Buffer I/O error on device dm-0, logical block 229390976 Buffer I/O error on device dm-0, logical block 229390977 Buffer I/O error on device dm-0, logical block 229390978 Buffer I/O error on device dm-0, lo
[PATCH v3] Add device ID for Bluetooth on Macbook Pro 2012
Add the device ID for supporting the Macbook Pro 2012 'MacBookPro10,1'. The bluetooth device presents itself as: T: Bus=02 Lev=04 Prnt=04 Port=02 Cnt=03 Dev#= 8 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=ff(vend.) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=05ac ProdID=8286 Rev=00.86 S: Manufacturer=Apple Inc. S: Product=Bluetooth USB Host Controller C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=0mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 3 Alt= 0 #EPs= 0 Cls=fe(app. ) Sub=01 Prot=01 Driver=(none) Patch originally written and tested by clipcarl (forums.opensuse.org). Signed-off-by: Daniel J Blueman Signed-off-by: Henrik Rydberg --- drivers/bluetooth/btusb.c |3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index e272214..61f4eb7 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -76,6 +76,9 @@ static struct usb_device_id btusb_table[] = { /* Apple MacMini5,1 */ { USB_DEVICE(0x05ac, 0x8281) }, + /* Apple MacBookPro10,1 */ + { USB_DEVICE(0x05ac, 0x8286) }, + /* AVM BlueFRITZ! USB v2.0 */ { USB_DEVICE(0x057c, 0x3800) }, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
nouveau Macbook EDID fix upstreaming...
It looks like Ryan's Nouveau patch [1,2] to get the I2C working for EDID (thus modelines) is crucial for avoiding the Nvidia binary drivers on MacbookPro 2012s. Any plans/chance for it to be upstreamed to eg 3.6-rc3? Many thanks, Daniel --- [1] diff -urNp a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c --- a/drivers/gpu/drm/nouveau/nouveau_bios.c2012-07-08 22:48:05.589828510 -0500 +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c2012-07-15 00:28:18.578864693 -0500 @@ -6461,6 +6461,9 @@ nouveau_run_vbios_init(struct drm_device } } + if (!bios->execute) + nouveau_gpio_reset(dev); + return ret; } [2] https://bugs.freedesktop.org/show_bug.cgi?id=51971 -- Daniel J Blueman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] Add MSI support for SDHCI PCI hosts
Allow module parameter 'enable_msi' to request an MSI interrupt for hosts where available (presently PCI). Useful as a workaround on platforms where the legacy interrupt is broken. Signed-off-by: Daniel J Blueman --- drivers/mmc/host/sdhci-pci.c | 30 ++ drivers/mmc/host/sdhci.c | 23 +++ drivers/mmc/host/sdhci.h |2 ++ 3 files changed, 55 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 504da71..fbde589 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -934,6 +934,34 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host) return 0; } +static int sdhci_pci_enable_msi(struct sdhci_host *host) +{ + struct sdhci_pci_slot *slot; + struct pci_dev *pdev; + int ret; + + slot = sdhci_priv(host); + pdev = slot->chip->pdev; + + ret = pci_enable_msi(pdev); + if (ret) + return ret; + + host->irq = pdev->irq; + return 0; +} + +static void sdhci_pci_disable_msi(struct sdhci_host *host) +{ + struct sdhci_pci_slot *slot; + struct pci_dev *pdev; + + slot = sdhci_priv(host); + pdev = slot->chip->pdev; + + pci_disable_msi(pdev); +} + static int sdhci_pci_8bit_width(struct sdhci_host *host, int width) { u8 ctrl; @@ -976,6 +1004,8 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host) static struct sdhci_ops sdhci_pci_ops = { .enable_dma = sdhci_pci_enable_dma, + .enable_msi = sdhci_pci_enable_msi, + .disable_msi= sdhci_pci_disable_msi, .platform_8bit_width= sdhci_pci_8bit_width, .hw_reset = sdhci_pci_hw_reset, }; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 9a11dc3..9fa2180 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -45,6 +45,7 @@ static unsigned int debug_quirks = 0; static unsigned int debug_quirks2; +static bool enable_msi; static void sdhci_finish_data(struct sdhci_host *); @@ -2433,6 +2434,9 @@ int sdhci_suspend_host(struct sdhci_host *host) free_irq(host->irq, host); + if (host->ops->disable_msi && enable_msi) + host->ops->disable_msi(host); + return ret; } @@ -2447,6 +2451,12 @@ int sdhci_resume_host(struct sdhci_host *host) host->ops->enable_dma(host); } + if (host->ops->enable_msi && enable_msi) { + ret = host->ops->enable_msi(host); + if (ret) + return ret; + } + ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, mmc_hostname(host->mmc), host); if (ret) @@ -3024,6 +3034,12 @@ int sdhci_add_host(struct sdhci_host *host) host->tuning_timer.function = sdhci_tuning_timer; } + if (host->ops->enable_msi && enable_msi) { + ret = host->ops->enable_msi(host); + if (ret) + return ret; + } + ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, mmc_hostname(mmc), host); if (ret) { @@ -3071,6 +3087,8 @@ int sdhci_add_host(struct sdhci_host *host) reset: sdhci_reset(host, SDHCI_RESET_ALL); free_irq(host->irq, host); + if (host->ops->disable_msi && enable_msi) + host->ops->disable_msi(host); #endif untasklet: tasklet_kill(&host->card_tasklet); @@ -3114,6 +3132,9 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) free_irq(host->irq, host); + if (host->ops->disable_msi && enable_msi) + host->ops->disable_msi(host); + del_timer_sync(&host->timer); tasklet_kill(&host->card_tasklet); @@ -3162,6 +3183,7 @@ module_exit(sdhci_drv_exit); module_param(debug_quirks, uint, 0444); module_param(debug_quirks2, uint, 0444); +module_param(enable_msi, bool, 0444); MODULE_AUTHOR("Pierre Ossman "); MODULE_DESCRIPTION("Secure Digital Host Controller Interface core driver"); @@ -3169,3 +3191,4 @@ MODULE_LICENSE("GPL"); MODULE_PARM_DESC(debug_quirks, "Force certain quirks."); MODULE_PARM_DESC(debug_quirks2, "Force certain other quirks."); +MODULE_PARM_DESC(debug_quirks2, "Enable MSI interrupt support where possible."); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 97653ea..df4e003 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -264,6 +264,8 @@ struct sdhci_ops { void(*set_clock)(struct sdhci_host *host, unsigned int clock); int (*enable_dma)(struct sdhci_host *host); + int (*enable_msi)(struct sd