[PATCH 1/4 v7] AMD64 EDAC: Add multi-domain support

2012-11-26 Thread Daniel J Blueman
Fix get_node_id to match northbridge IDs from the array of detected ones,
allowing multi-server support such as with Numascale's NumaConnect, renaming
to 'amd_get_node_id' for consistency.

v7: Refactor patches grouping changes

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |   17 +
 drivers/edac/amd64_edac.c |6 +++---
 drivers/edac/amd64_edac.h |6 --
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..417eb24 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int 
node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
 
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
+{
+   struct pci_dev *misc;
+   int i;
+
+   for (i = 0; i != amd_nb_num(); i++) {
+   misc = node_to_amd_nb(i)->misc;
+
+   if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
+   PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
+   return i;
+   }
+
+   WARN(1, "Unable to find AMD Northbridge identifier for %s\n", 
pci_name(pdev));
+   return 0;
+}
+
 #else
 
 #define amd_nb_num(x)  0
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index cc8e7c7..9ba70a5 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2546,7 +2546,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u16 nid = amd_get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2637,7 +2637,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2687,7 +2687,7 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
-   u8 nid = get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
 
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8c41396..cecd0c4 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -290,12 +290,6 @@
 /* MSRs */
 #define MSR_MCGCTL_NBE BIT(4)
 
-/* AMD sets the first MC device at device ID 0x18. */
-static inline u8 get_node_id(struct pci_dev *pdev)
-{
-   return PCI_SLOT(pdev->devfn) - 0x18;
-}
-
 enum amd_families {
K8_CPUS = 0,
F10_CPUS,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/4 v7] AMD64 EDAC: Consistently use u16 for northbridge IDs in amd_get_nb_id

2012-11-26 Thread Daniel J Blueman
Change amd_get_nb_id to return u16 to support >255 memory controllers,
and related consistency fixes.

v7: Refactor patches grouping changes

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/processor.h |2 +-
 arch/x86/kernel/cpu/amd.c|4 ++--
 drivers/edac/amd64_edac.c|5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ad1fc85..eb3ba58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned 
long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);
 
 struct aperfmperf {
u64 aperf, mperf;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1b7d165..2e298e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-int amd_get_nb_id(int cpu)
+u16 amd_get_nb_id(int cpu)
 {
-   int id = 0;
+   u16 id = 0;
 #ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
 #endif
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9ba70a5..60e93fa 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m)
struct amd64_pvt *pvt;
u64 cc6_base, tmp_addr;
u32 tmp;
-   u8 mce_nid, intlv_en;
+   u16 mce_nid;
+   u8 intlv_en;
 
if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
return addr;
@@ -2253,7 +2254,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 }
 
 /* get all cores on this DCT */
-static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
+static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
 {
int cpu;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4 v7] AMD64 EDAC: Fix PCI function lookup

2012-11-26 Thread Daniel J Blueman
Fix locating sibling memory controller PCI functions by using the
correct PCI domain.

v7: Refactor patches grouping changes

Signed-off-by: Daniel J Blueman 
---
 drivers/edac/amd64_edac.c |   40 +---
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 60e93fa..62b7b17 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -983,6 +983,24 @@ static u64 get_error_address(struct mce *m)
return addr;
 }
 
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+   unsigned int device,
+   struct pci_dev *related)
+{
+   struct pci_dev *dev = NULL;
+
+   dev = pci_get_device(vendor, device, dev);
+   while (dev) {
+   if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
+   (dev->bus->number == related->bus->number) &&
+   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+   break;
+   dev = pci_get_device(vendor, device, dev);
+   }
+
+   return dev;
+}
+
 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 {
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1002,11 +1020,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt 
*pvt, unsigned range)
 
/* Factor in CC6 save area by reading dst node's limit reg */
if (c->x86 == 0x15) {
-   struct pci_dev *f1 = NULL;
+   struct pci_dev *misc, *f1 = NULL;
u8 nid = dram_dst_node(pvt, range);
u32 llim;
 
-   f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 
1));
+   misc = node_to_amd_nb(nid)->misc;
+   f1 = pci_get_related_function(misc->vendor, 
PCI_DEVICE_ID_AMD_15H_NB_F1, misc);
if (WARN_ON(!f1))
return;
 
@@ -1713,23 +1732,6 @@ static struct amd64_family_type amd64_family_types[] = {
},
 };
 
-static struct pci_dev *pci_get_related_function(unsigned int vendor,
-   unsigned int device,
-   struct pci_dev *related)
-{
-   struct pci_dev *dev = NULL;
-
-   dev = pci_get_device(vendor, device, dev);
-   while (dev) {
-   if ((dev->bus->number == related->bus->number) &&
-   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
-   break;
-   dev = pci_get_device(vendor, device, dev);
-   }
-
-   return dev;
-}
-
 /*
  * These are tables of eigenvectors (one per line) which can be used for the
  * construction of the syndrome tables. The modified syndrome search algorithm
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/4 v7] AMD64 EDAC: Fix type usage in NB IDs and memory ranges

2012-11-26 Thread Daniel J Blueman
Use appropriate types for northbridge IDs and memory ranges.

v7: Refactor patches grouping changes

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |2 +-
 drivers/edac/amd64_edac.c |   20 ++--
 drivers/edac/amd64_edac.h |6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 417eb24..d2e703b 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline struct amd_northbridge *node_to_amd_nb(u16 node)
 {
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 62b7b17..b27412a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
  * DRAM base/limit associated with node_id
  */
 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
-  unsigned nid)
+  u8 nid)
 {
u64 addr;
 
@@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct 
mem_ctl_info *mci,
u64 sys_addr)
 {
struct amd64_pvt *pvt;
-   unsigned node_id;
+   u8 node_id;
u32 intlv_en, bits;
 
/*
@@ -1348,7 +1348,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, 
u64 sys_addr,
 }
 
 /* Convert the sys_addr to the normalized DCT address */
-static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
+static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
 u64 sys_addr, bool hi_rng,
 u32 dct_sel_base_addr)
 {
@@ -1399,7 +1399,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, 
unsigned range,
  * checks if the csrow passed in is marked as SPARED, if so returns the new
  * spare row
  */
-static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
+static int f10_process_possible_spare(struct amd64_pvt *pvt, u16 dct, int 
csrow)
 {
int tmp_cs;
 
@@ -1424,7 +1424,7 @@ static int f10_process_possible_spare(struct amd64_pvt 
*pvt, u8 dct, int csrow)
  * -EINVAL:  NOT FOUND
  * 0..csrow = Chip-Select Row
  */
-static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
+static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
@@ -2266,7 +2266,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask 
*mask, u16 nid)
 }
 
 /* check MCG_CTL on all the cpus on this node */
-static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
+static bool amd64_nb_mce_bank_enabled_on_node(u16 nid)
 {
cpumask_var_t mask;
int cpu, nbe;
@@ -2299,7 +2299,7 @@ out:
return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
cpumask_var_t cmask;
int cpu;
@@ -2337,7 +2337,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings 
*s, u8 nid, bool on)
return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
   struct pci_dev *F3)
 {
bool ret = true;
@@ -2389,7 +2389,7 @@ static bool enable_ecc_error_reporting(struct 
ecc_settings *s, u8 nid,
return ret;
 }
 
-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
struct pci_dev *F3)
 {
u32 value, mask = 0x3;  /* UECC/CECC enable */
@@ -2428,7 +2428,7 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";
 
-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 {
u32 value;
u8 ecc_en = 0;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index cecd0c4..a558084 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -332,7 +332,7 @@ struct amd64_pvt {
/* pci_device handles which we utilize */
struct pci_dev *F1, *F2, *F3;
 
-   unsigned mc_node_id;/* MC index of this MC node */
+   u16 mc_node_id; /* MC index of this MC node */
int ext_model;  /* extended model value of this node */
int channel_coun

switcheroo registration vs switching race...

2012-11-27 Thread Daniel J Blueman
-codec: out of range cmd 0:0:5:707:fffc
hda-codec: out of range cmd 0:0:5:707:ffbf
hda-codec: out of range cmd 0:0:5:707:
hda-codec: out of range cmd 0:0:5:707:fffc
hda-codec: out of range cmd 0:0:5:707:
hda-codec: out of range cmd 0:0:5:707:fffc
hda-codec: out of range cmd 0:0:5:707:ffbf
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:ffbf
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:ffbf
hda-codec: out of range cmd 0:0:4:707:
hda-codec: out of range cmd 0:0:4:707:fffc
hda-codec: out of range cmd 0:0:4:707:
hda-codec: out of range cmd 0:0:4:707:fffc
azx_single_send_cmd: 179 callbacks suppressed
hda-codec: out of range cmd 0:0:4:707:ffbf
hda-codec: out of range cmd 0:0:4:707:
hda-codec: out of range cmd 0:0:4:707:fffc
hda-codec: out of range cmd 0:0:4:707:
hda-codec: out of range cmd 0:0:4:707:fffc
hda-codec: out of range cmd 0:0:4:707:ffbf
hda-codec: out of range cmd 0:0:4:707:
hda-codec: out of range cmd 0:0:4:707:fffc
hda-codec: out of range cmd 0:0:4:707:
hda-codec: out of range cmd 0:0:4:707:fffc
hda-codec: out of range cmd 0:0:4:707:ffbf
hda-codec: out of range cmd 0:0:5:707:
hda-codec: out of range cmd 0:0:5:707:fffc
hda-codec: out of range cmd 0:0:5:707:
hda-codec: out of range cmd 0:0:5:707:fffc
hda-codec: out of range cmd 0:0:5:707:ffbf
hda-codec: out of range cmd 0:0:5:707:
hda-codec: out of range cmd 0:0:5:707:fffc
hda-codec: out of range cmd 0:0:5:707:
hda-codec: out of range cmd 0:0:5:707:fffc
hda-codec: out of range cmd 0:0:5:707:ffbf
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:ffbf
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:
hda-codec: out of range cmd 0:0:7:707:fffc
hda-codec: out of range cmd 0:0:7:707:ffbf
--
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 RESEND] Add NumaChip remote PCI support

2012-11-29 Thread Daniel J Blueman

Hi Bjorn,

On 29/11/2012 07:08, Bjorn Helgaas wrote:

On Wed, Nov 21, 2012 at 1:39 AM, Daniel J Blueman
 wrote:

Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but
preventing access to AMD Northbridges which shouldn't respond.

v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes

Signed-off-by: Daniel J Blueman 
---
  arch/x86/include/asm/numachip/numachip.h |   20 +
  arch/x86/kernel/apic/apic_numachip.c |2 +
  arch/x86/pci/Makefile|1 +
  arch/x86/pci/numachip.c  |  134 ++
  4 files changed, 157 insertions(+)
  create mode 100644 arch/x86/include/asm/numachip/numachip.h
  create mode 100644 arch/x86/pci/numachip.c

diff --git a/arch/x86/include/asm/numachip/numachip.h 
b/arch/x86/include/asm/numachip/numachip.h
new file mode 100644
index 000..d35e71a
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -0,0 +1,20 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific header file
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_H
+
+extern int __init pci_numachip_init(void);
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
+
diff --git a/arch/x86/kernel/apic/apic_numachip.c 
b/arch/x86/kernel/apic/apic_numachip.c
index a65829a..9c2aa89 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -22,6 +22,7 @@
  #include 
  #include 

+#include 
  #include 
  #include 
  #include 
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
 return 0;

 x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+   x86_init.pci.arch_init = pci_numachip_init;

 map_csrs();

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 3af5a1e..ee0af58 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11)   += sta2x11-fixup.o
  obj-$(CONFIG_X86_VISWS)+= visws.o

  obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o
+obj-$(CONFIG_X86_NUMACHIP) += numachip.o


It looks like this depends on CONFIG_PCI_MMCONFIG for
pci_mmconfig_lookup().  Are there config constraints that force
CONFIG_PCI_MMCONFIG=y when CONFIG_X86_NUMACHIP=y?


I'll revise the patch with this constraint after we work out the best 
approach for below.



  obj-$(CONFIG_X86_INTEL_MID)+= mrst.o

diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
new file mode 100644
index 000..3773e05
--- /dev/null
+++ b/arch/x86/pci/numachip.c
@@ -0,0 +1,129 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific PCI code
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ * PCI accessor functions derived from mmconfig_64.c
+ *
+ */
+
+#include 
+#include 
+
+static u8 limit __read_mostly;
+
+static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, 
unsigned int devfn)
+{
+   struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
+
+   if (cfg && cfg->virt)
+   return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
+   return NULL;
+}


Most of this file is copied directly from mmconfig_64.c (as you
mentioned above).  I wonder if we could avoid the code duplication by
making the pci_dev_base() implementation in mmconfig_64.c a weak
definition.  Then you could just supply a non-weak pci_dev_base() here
that would override that default version.  Your version would look
something like:

   char __iomem *pci_dev_base(unsigned int seg, unsigned int bus,
unsigned int devfn)
   {
   struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);

   if (cfg && cfg->virt && devfn < limit)
   return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
   return NULL;
   }

That would be different from what you have in this patch because reads
& writes to devices above "limit" would return -EINVAL rather than 0
as you do here.  Would that be a problem?


That would work nicely (pointer lookup and inlining etc aside) if there 
was the runtime ability to override pci_dev_base only if the NumaChip 
signature was detected.


We could expose pci_dev_base via struct x86_init_pci; the extra 
complexity and performance tradeoff may not be worth it for a single 
case perhaps?


Thanks,
  Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel&q

[PATCH 3/4 v8] AMD64 EDAC: Fix PCI function lookup

2012-11-30 Thread Daniel J Blueman
Fix locating sibling memory controller PCI functions by using the
correct PCI domain and use Northbridge only if found. Tested on
multi-socket server and multi-server, multi-socket NumaConnect setup.

v7: Refactor patches grouping changes
v8: Restructure searching for PCI function for clarity; use Northbridge
only if found

Signed-off-by: Daniel J Blueman 
---
 drivers/edac/amd64_edac.c |   43 ---
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 60e93fa..6c1005f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -983,6 +983,22 @@ static u64 get_error_address(struct mce *m)
return addr;
 }
 
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+   unsigned int device,
+   struct pci_dev *related)
+{
+   struct pci_dev *dev = NULL;
+
+   while ((dev = pci_get_device(vendor, device, dev))) {
+   if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
+   (dev->bus->number == related->bus->number) &&
+   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+   break;
+   }
+
+   return dev;
+}
+
 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 {
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1002,11 +1018,17 @@ static void read_dram_base_limit_regs(struct amd64_pvt 
*pvt, unsigned range)
 
/* Factor in CC6 save area by reading dst node's limit reg */
if (c->x86 == 0x15) {
-   struct pci_dev *f1 = NULL;
+   struct pci_dev *misc, *f1 = NULL;
u8 nid = dram_dst_node(pvt, range);
+   struct amd_northbridge *nb = node_to_amd_nb(nid);
u32 llim;
 
-   f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 
1));
+   /* If DRAM base/limit registers point to a non-AMD device, nb 
won't have been found */
+   if (!nb)
+   return;
+
+   misc = nb->misc;
+   f1 = pci_get_related_function(misc->vendor, 
PCI_DEVICE_ID_AMD_15H_NB_F1, misc);
if (WARN_ON(!f1))
return;
 
@@ -1713,23 +1735,6 @@ static struct amd64_family_type amd64_family_types[] = {
},
 };
 
-static struct pci_dev *pci_get_related_function(unsigned int vendor,
-   unsigned int device,
-   struct pci_dev *related)
-{
-   struct pci_dev *dev = NULL;
-
-   dev = pci_get_device(vendor, device, dev);
-   while (dev) {
-   if ((dev->bus->number == related->bus->number) &&
-   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
-   break;
-   dev = pci_get_device(vendor, device, dev);
-   }
-
-   return dev;
-}
-
 /*
  * These are tables of eigenvectors (one per line) which can be used for the
  * construction of the syndrome tables. The modified syndrome search algorithm
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges

2012-11-30 Thread Daniel J Blueman
Use appropriate types for northbridge IDs and memory ranges. Mark immutable
data const and keep within compilation unit on related structures. Tested on
multi-socket server and multi-server, multi-socket NumaConnect setup.

v7: Refactor patches grouping changes
v8: Drop unneeded change; use const and static where appropriate

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |2 +-
 drivers/edac/amd64_edac.c |   26 +-
 drivers/edac/amd64_edac.h |6 +++---
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 417eb24..d2e703b 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline struct amd_northbridge *node_to_amd_nb(u16 node)
 {
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6c1005f..30149e4 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -31,7 +31,7 @@ static struct ecc_settings **ecc_stngs;
  *
  *FIXME: Produce a better mapping/linearisation.
  */
-struct scrubrate {
+static const struct scrubrate {
u32 scrubval;   /* bit pattern for scrub rate */
u32 bandwidth;  /* bandwidth consumed (bytes/sec) */
 } scrubrates[] = {
@@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
  * DRAM base/limit associated with node_id
  */
 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
-  unsigned nid)
+  u8 nid)
 {
u64 addr;
 
@@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct 
mem_ctl_info *mci,
u64 sys_addr)
 {
struct amd64_pvt *pvt;
-   unsigned node_id;
+   u8 node_id;
u32 intlv_en, bits;
 
/*
@@ -1351,7 +1351,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, 
u64 sys_addr,
 }
 
 /* Convert the sys_addr to the normalized DCT address */
-static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
+static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
 u64 sys_addr, bool hi_rng,
 u32 dct_sel_base_addr)
 {
@@ -1427,7 +1427,7 @@ static int f10_process_possible_spare(struct amd64_pvt 
*pvt, u8 dct, int csrow)
  * -EINVAL:  NOT FOUND
  * 0..csrow = Chip-Select Row
  */
-static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
+static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
@@ -1742,7 +1742,7 @@ static struct amd64_family_type amd64_family_types[] = {
  *
  * Algorithm courtesy of Ross LaFetra from AMD.
  */
-static u16 x4_vectors[] = {
+static const u16 x4_vectors[] = {
0x2f57, 0x1afe, 0x66cc, 0xdd88,
0x11eb, 0x3396, 0x7f4c, 0xeac8,
0x0001, 0x0002, 0x0004, 0x0008,
@@ -1781,7 +1781,7 @@ static u16 x4_vectors[] = {
0x19a9, 0x2efe, 0xb5cc, 0x6f88,
 };
 
-static u16 x8_vectors[] = {
+static const u16 x8_vectors[] = {
0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
@@ -1803,7 +1803,7 @@ static u16 x8_vectors[] = {
0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
 };
 
-static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs,
+static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs,
   unsigned v_dim)
 {
unsigned int i, err_sym;
@@ -2269,7 +2269,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask 
*mask, u16 nid)
 }
 
 /* check MCG_CTL on all the cpus on this node */
-static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
+static bool amd64_nb_mce_bank_enabled_on_node(u16 nid)
 {
cpumask_var_t mask;
int cpu, nbe;
@@ -2302,7 +2302,7 @@ out:
return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
cpumask_var_t cmask;
int cpu;
@@ -2340,7 +2340,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings 
*s, u8 nid, bool on)
return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
   struct pci_dev *F3)
 {
bool ret = true;
@@ -2392,7 +2392,7 @@ 

Re: [PATCH, resubmit] ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver

2013-02-03 Thread Daniel J Blueman
Hi Freddy,

> Michael, could you give me more information about how do you test this driver?
> I have tried to reproduce the issue by using "ifconfig ethX mtu 1500", but I 
> didn't confront the same issue.
> Thank you in advance for your help.

I found the same by just starting with 'ifconfig eth0 1500' and
testing as high as 4000; pinging another host with a large payload of
size mtu-40 starts failing; after ~30s, I see the transmit time out
trace [1].

Of course, a default MTU size of 1500 is essential to avoid
fragmentation issues, so should be fixed too. Jumbo frames support is
essential these days too.

Thanks,
  Daniel

--- [1]

usb 4-1: new SuperSpeed USB device number 3 using xhci_hcd
ax88179_178a 4-1:1.0 eth0: register 'ax88179_178a' at
usb-:00:14.0-1, ASIX AX88179 USB 3.0 Gigibit Ethernet,
00:0a:cd:21:46:a7
ax88179_178a 4-1:1.0 eth2: ax88179 - Link status is: 1
[ cut here ]
WARNING: at net/sched/sch_generic.c:254 dev_watchdog+0x26b/0x280()
Hardware name: MacBookPro10,1
NETDEV WATCHDOG: eth2 (ax88179_178a): transmit queue 0 timed out
Modules linked in: fuse snd_hda_codec_hdmi snd_hda_codec_cirrus joydev
hid_apple bcm5974 coretemp kvm_intel kvm ghash_clmulni_intel b43 ssb
ax88179_178a usbnet mii uvcvideo videobuf2_core videobuf2_vmalloc
videobuf2_memops applesmc input_polldev microcode bcma bnep rfcomm
lpc_ich mfd_core snd_hda_intel snd_hda_codec snd_hwdep snd_pcm nouveau
apple_gmux snd_timer i915 ttm drm_kms_helper snd hwmon binfmt_misc
mxm_wmi snd_page_alloc video apple_bl nls_iso8859_1
Pid: 0, comm: swapper/0 Not tainted 3.8.0-rc6-expert+ #2
Call Trace:
  [] ? dev_watchdog+0x250/0x280
 [] warn_slowpath_common+0x7a/0xb0
 [] warn_slowpath_fmt+0x41/0x50
 [] dev_watchdog+0x26b/0x280
 [] ? pfifo_fast_dequeue+0xe0/0xe0
 [] call_timer_fn+0x74/0xf0
 [] ? usleep_range+0x40/0x40
 [] ? pfifo_fast_dequeue+0xe0/0xe0
 [] run_timer_softirq+0x18b/0x220
 [] __do_softirq+0xc2/0x180
 [] ? tick_program_event+0x1f/0x30
 [] ? read_measured_perf_ctrs+0x70/0x70
 [] call_softirq+0x1c/0x30
 [] do_softirq+0x7d/0xb0
 [] irq_exit+0x9e/0xc0
 [] smp_apic_timer_interrupt+0x69/0xa0
 [] apic_timer_interrupt+0x6c/0x80
  [] ? get_next_timer_interrupt+0x1c4/0x290
 [] ? cpuidle_wrap_enter+0x50/0x90
 [] ? cpuidle_wrap_enter+0x4c/0x90
 [] cpuidle_enter_tk+0x10/0x20
 [] cpuidle_idle_call+0x7c/0x110
 [] cpu_idle+0x7a/0xf0
 [] rest_init+0x144/0x150
 [] ? csum_partial_copy_generic+0x170/0x170
 [] ? efi_free_boot_services+0x53/0x58
 [] start_kernel+0x359/0x366
 [] ? repair_env_string+0x5e/0x5e
 [] x86_64_start_reservations+0x131/0x135
 [] ? early_idt_handlers+0x120/0x120
 [] x86_64_start_kernel+0xd3/0xd7
---[ end trace 58c12634a365560a ]---
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


3.8-rc6: nouveau lockdep recursive lock acquisition

2013-02-03 Thread Daniel J Blueman
[   DRM] 3: core 1000MHz shader 2000MHz memory 1080MHz voltage 630mV
nouveau [   DRM] 4: core 1254MHz shader 2508MHz memory 1080MHz voltage 630mV
nouveau [   DRM] c:
nouveau [   DRM] MM: using COPY for buffer copies
nouveau :01:00.0: No connectors reported connected with modes
[drm] Cannot find any crtc or sizes - going 1024x768
nouveau [   DRM] allocated 1024x768 fb: 0x8, bo 88025b966800
nouveau :01:00.0: fb1: nouveaufb frame buffer device
[drm] Initialized nouveau 1.1.0 20120801 for :01:00.0 on minor 1
snd_hda_intel :01:00.1: enabling device ( -> 0002)
hda-intel :01:00.1: Handle VGA-switcheroo audio client
snd_hda_intel :01:00.1: irq 49 for MSI/MSI-X
input: HDA NVidia HDMI/DP,pcm=8 as
/devices/pci:00/:00:01.0/:01:00.1/sound/card1/input11
input: HDA NVidia HDMI/DP,pcm=7 as
/devices/pci:00/:00:01.0/:01:00.1/sound/card1/input12
input: HDA NVidia HDMI/DP,pcm=3 as
/devices/pci:00/:00:01.0/:01:00.1/sound/card1/input13
hda-intel :01:00.1: Disabling via VGA-switcheroo
VGA switcheroo: switched nouveau off
nouveau [   DRM] suspending fbcon...
nouveau [   DRM] suspending display...
nouveau [   DRM] unpinning framebuffer(s)...
nouveau [   DRM] evicting buffers...
nouveau [   DRM] suspending client object trees...
nouveau E[   I2C][:01:00.0] AUXCH(3): begin idle timeout 0x
nouveau E[   I2C][:01:00.0] AUXCH(2): begin idle timeout 0x
nouveau E[   I2C][:01:00.0] AUXCH(1): begin idle timeout 0x
applesmc: light sensor data length set to 10
nouveau E[   I2C][:01:00.0] AUXCH(1): begin idle timeout 0x
nouveau E[   I2C][:01:00.0] AUXCH(3): begin idle timeout 0x
nouveau E[   I2C][:01:00.0] AUXCH(2): begin idle timeout 0x
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


3.8-rc1 patch_cirrus 4.0 regression...

2013-02-05 Thread Daniel J Blueman
Hi Takashi,

The v3.8-rc kernels have regressed from v3.7 with the quad-speaker
arrangement on my Macbook Pro 10,1 - only the higher-frequency
speakers work despite the front and rear channels being exposed in the
mixer.

Reverting f37bc7 [1] restored the correct behaviour ([2] was reverted
to compile, but the problem still occurs with only [2] reverted).

How can I help to debug this and find the right approach?

Thanks,
  Daniel

--- [1]

commit f37bc7a88d374448a1f4bba9267d308606d78bf2
Author: Takashi Iwai 
Date:   Thu Nov 8 15:59:23 2012 +0100

ALSA: hda - Give standard "Bass Speaker" mixer for 2.1 speakers

When two built-in speakers are found on the machine, we can suppose
it's rather a 2.1 speaker system with a bass output instead of
front/surround channels.

Signed-off-by: Takashi Iwai 

--- [2]

commit ee81abb623cb5e03c182d16871bb4fb34fdc9b4f
Author: Takashi Iwai 
Date:   Thu Nov 8 17:12:10 2012 +0100

ALSA: hda - Apply a proper chmap for built-in 2.1 speakers

When 2.1 speakers are detected, use the corresponding channel map
instead of the standard map with front+rear surrounds.

Signed-off-by: Takashi Iwai 
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 3.8-rc1 patch_cirrus 4.0 regression...

2013-02-05 Thread Daniel J Blueman
On 6 February 2013 00:16, Takashi Iwai  wrote:
> At Wed, 6 Feb 2013 00:10:30 +0800,
> Daniel J Blueman wrote:
>>
>> Hi Takashi,
>>
>> The v3.8-rc kernels have regressed from v3.7 with the quad-speaker
>> arrangement on my Macbook Pro 10,1 - only the higher-frequency
>> speakers work despite the front and rear channels being exposed in the
>> mixer.
>
> Interesting.  So you have a machine with 4.0 built-in speaker instead
> of 2.1?  Then we need to add a device-specific flag for it.  Currently
> the driver assumes 2.1 system blindly because majority of machines
> have that.
>
> FWIW, the codec parser code has been totally rewritten for 3.9, so any
> patch to 3.8 won't be applied to 3.9 (and vice versa)...
>
> Could you give alsa-info.sh output on 3.8 kernel?  Then I'll try to
> cook it for 3.9 (and maybe backport to 3.8).

Here's the output from the current alsa-info.sh on 3.8-rc6 with the
two cited patches reverted [1]; let me know if you'd like 3.8-rc6
pure.

The sound is pretty impressive for a laptop when the low-frequecy
speakers are enabled.

Thanks, Takashi!
  Daniel

--- [1]

upload=true&script=true&cardinfo=
!!
!!ALSA Information Script v 0.4.61
!!

!!Script ran on: Tue Feb  5 16:22:01 UTC 2013


!!Linux Distribution
!!--

Ubuntu 12.10 \n \l DISTRIB_ID=Ubuntu DISTRIB_DESCRIPTION="Ubuntu
12.10" NAME="Ubuntu" ID=ubuntu ID_LIKE=debian PRETTY_NAME="Ubuntu
quantal (12.10)"


!!DMI Information
!!---

Manufacturer:  Apple Inc.
Product Name:  MacBookPro10,1
Product Version:   1.0
Firmware Version:  MBP101.88Z.00EE.B02.1208081132


!!Kernel Information
!!--

Kernel release:3.8.0-rc6-ninja+
Operating System:  GNU/Linux
Architecture:  x86_64
Processor: x86_64
SMP Enabled:   Yes


!!ALSA Version
!!

Driver version: k3.8.0-rc6-ninja+
Library version:1.0.25
Utilities version:  1.0.25


!!Loaded ALSA modules
!!---

snd_hda_intel
snd_hda_intel


!!Sound Servers on this system
!!

Pulseaudio:
  Installed - Yes (/usr/bin/pulseaudio)
  Running - Yes


!!Soundcards recognised by ALSA
!!-

 0 [PCH]: HDA-Intel - HDA Intel PCH
  HDA Intel PCH at 0xc1c1 irq 50
 1 [NVidia ]: HDA-Intel - HDA NVidia
  HDA NVidia at 0xc108 irq 51


!!PCI Soundcards installed in the system
!!--

00:1b.0 Audio device: Intel Corporation 7 Series/C210 Series Chipset
Family High Definition Audio Controller (rev 04)
01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff)


!!Advanced information - PCI Vendor/Device/Subsystem ID's
!!---

00:1b.0 0403: 8086:1e20 (rev 04)
Subsystem: 8086:7270
--
01:00.1 0403: 10de:0e1b (rev ff) (prog-if ff)
!!! Unknown header type 7f


!!Modprobe options (Sound related)
!!

snd-atiixp-modem: index=-2
snd-intel8x0m: index=-2
snd-via82xx-modem: index=-2
snd-usb-audio: index=-2
snd-usb-caiaq: index=-2
snd-usb-ua101: index=-2
snd-usb-us122l: index=-2
snd-usb-usx2y: index=-2
snd-cmipci: mpu_port=0x330 fm_port=0x388
snd-pcsp: index=-2
snd-usb-audio: index=-2
snd_hda_intel: enable_msi=1 power_save=1


!!Loaded sound module options
!!---

!!Module: snd_hda_intel
align_buffer_size : -1
bdl_pos_adj : 
1,32,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
enable : Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y
enable_msi : 1
id : 
(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null)
index : 
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
jackpoll_ms : 
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
model : 
(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null)
patch : 
(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null)
position_fix :
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
power_save : 0
power_save_control

Re: 3.8-rc1 patch_cirrus 4.0 regression...

2013-02-05 Thread Daniel J Blueman
On 6 February 2013 01:40, Takashi Iwai  wrote:
> At Tue, 05 Feb 2013 17:34:15 +0100,
> Takashi Iwai wrote:
>>
>> At Wed, 6 Feb 2013 00:29:54 +0800,
>> Daniel J Blueman wrote:
>> >
>> > On 6 February 2013 00:16, Takashi Iwai  wrote:
>> > > At Wed, 6 Feb 2013 00:10:30 +0800,
>> > > Daniel J Blueman wrote:
>> > >>
>> > >> Hi Takashi,
>> > >>
>> > >> The v3.8-rc kernels have regressed from v3.7 with the quad-speaker
>> > >> arrangement on my Macbook Pro 10,1 - only the higher-frequency
>> > >> speakers work despite the front and rear channels being exposed in the
>> > >> mixer.
>> > >
>> > > Interesting.  So you have a machine with 4.0 built-in speaker instead
>> > > of 2.1?  Then we need to add a device-specific flag for it.  Currently
>> > > the driver assumes 2.1 system blindly because majority of machines
>> > > have that.
>> > >
>> > > FWIW, the codec parser code has been totally rewritten for 3.9, so any
>> > > patch to 3.8 won't be applied to 3.9 (and vice versa)...
>> > >
>> > > Could you give alsa-info.sh output on 3.8 kernel?  Then I'll try to
>> > > cook it for 3.9 (and maybe backport to 3.8).
>> >
>> > Here's the output from the current alsa-info.sh on 3.8-rc6 with the
>> > two cited patches reverted [1]; let me know if you'd like 3.8-rc6
>> > pure.
>>
>> Thanks.
>>
>> > The sound is pretty impressive for a laptop when the low-frequecy
>> > speakers are enabled.
>>
>> Which program are you using for testing the surrounds?
>> I'm interested in it because the commit you reverted is basically
>> providing only an additional information for the channel map, and it
>> doesn't change anything else.  It implies that some applications are
>> really referring to the chmap info.
>
> Or, it might be that the mixer value is simply not set correct.
>
> To be sure, could you try again 3.8-rc6 without reversing patches,
> adjust "Speaker" and "Bass Speaker" volumes properly, and retest?
> If it still doesn't work, please take alsa-info.sh snapshot at this
> state for comparing with the previous result.

My apologies! We do now have "Bass Speaker" which affects both bass
speakers. It was always being restored to level 0 and works when set
up.

There is a "Subwoofer" slider in addition to "Balance" and "Fade" in
the GNOME mixer UI which is greyed out; presumably this is intended as
the same mixer control?

Dan
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[3.8-rc7] PCI hotplug wakeup oops

2013-02-11 Thread Daniel J Blueman
With 3.8-rc7, when unplugging the Thunderbolt ethernet adapter (bus 0a
[1]) on a Macbook Pro 10,1, we see the PCIe port correctly released:

pciehp :06:03.0:pcie24: Card not present on Slot(3)
tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not
clear MAC_TX_MODE=
tg3 :0a:00.0 eth0: No firmware running
tg3 :0a:00.0 eth0: Link is down
[sched_delayed] sched: RT throttling activated
pcieport :00:01.1: System wakeup enabled by ACPI
pciehp :09:00.0:pcie24: unloading service driver pciehp
pci_bus :0a: busn_res: [bus 0a] is released
pci_bus :09: busn_res: [bus 09-0a] is released

After some activity later (eg I can reproduce this by switching to a
text console and back), often we'll see an oops:

Unable to handle kernel paging request at 1070
pci_pme_list_scan+0x3d/0xe0
Call Trace:
process_one_work+0x193
? process_one_work+0x131
? pci_pme_wakeup+0x60
worker_thread+0x15d

(gdb) list *(pci_pme_list_scan+0x3d)
0x8123f6dd is in pci_pme_list_scan (drivers/pci/pci.c:1556).
1551/*
1552 * If bridge is in low power state, the
1553 * configuration space of subordinate 
devices
1554 * may be not accessible
1555 */
1556if (bridge && bridge->current_state != 
PCI_D0)
1557continue;
1558pci_pme_wakeup(pme_dev->dev, NULL);
1559} else {
1560list_del(&pme_dev->list);

Since a panic in vsnprintf happens after the oops (hence I can't catch
it with EFI pstore), it is almost certainly significant heap
corruption; this would explain why pme_dev became null (the load has
been ordered ahead).

I'll see what I can find out with memory poisoning and list debugging.

Thanks,
  Daniel

--- [1]

$ lspci
00:00.0 Host bridge: Intel Corporation 3rd Gen Core processor DRAM
Controller (rev 09)
00:01.0 PCI bridge: Intel Corporation Xeon E3-1200 v2/3rd Gen Core
processor PCI Express Root Port (rev 09)
00:01.1 PCI bridge: Intel Corporation Xeon E3-1200 v2/3rd Gen Core
processor PCI Express Root Port (rev 09)
00:01.2 PCI bridge: Intel Corporation Xeon E3-1200 v2/3rd Gen Core
processor PCI Express Root Port (rev 09)
00:02.0 VGA compatible controller: Intel Corporation 3rd Gen Core
processor Graphics Controller (rev 09)
00:14.0 USB controller: Intel Corporation 7 Series/C210 Series Chipset
Family USB xHCI Host Controller (rev 04)
00:16.0 Communication controller: Intel Corporation 7 Series/C210
Series Chipset Family MEI Controller #1 (rev 04)
00:1a.0 USB controller: Intel Corporation 7 Series/C210 Series Chipset
Family USB Enhanced Host Controller #2 (rev 04)
00:1b.0 Audio device: Intel Corporation 7 Series/C210 Series Chipset
Family High Definition Audio Controller (rev 04)
00:1c.0 PCI bridge: Intel Corporation 7 Series/C210 Series Chipset
Family PCI Express Root Port 1 (rev c4)
00:1c.1 PCI bridge: Intel Corporation 7 Series/C210 Series Chipset
Family PCI Express Root Port 2 (rev c4)
00:1d.0 USB controller: Intel Corporation 7 Series/C210 Series Chipset
Family USB Enhanced Host Controller #1 (rev 04)
00:1f.0 ISA bridge: Intel Corporation HM77 Express Chipset LPC
Controller (rev 04)
00:1f.2 SATA controller: Intel Corporation 7 Series Chipset Family
6-port SATA Controller [AHCI mode] (rev 04)
00:1f.3 SMBus: Intel Corporation 7 Series/C210 Series Chipset Family
SMBus Controller (rev 04)
01:00.0 VGA compatible controller: NVIDIA Corporation Device 0fd5 (rev ff)
01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff)
03:00.0 Ethernet controller: Broadcom Corporation Device 16a3 (rev 10)
03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765
Memory Card Reader (rev 10)
04:00.0 Network controller: Broadcom Corporation BCM4331 802.11a/b/g/n (rev 02)
05:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus
Ridge] (rev 03)
06:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus
Ridge] (rev 03)
06:03.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus
Ridge] (rev 03)
06:04.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus
Ridge] (rev 03)
06:05.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus
Ridge] (rev 03)
06:06.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Port [Cactus
Ridge] (rev 03)
07:00.0 System peripheral: Intel Corporation DSL3510 Thunderbolt Port
[Cactus Ridge] (rev 03)
08:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Controller
[Cactus Ridge]
09:00.0 PCI bridge: Intel Corporation DSL3510 Thunderbolt Controller
[Cactus Ridge]
0a:00.0 Ethernet controller: Broadcom Corporation NetXtreme BCM57762
Gigabit Ethernet PCIe
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "u

Re: [3.8-rc7] PCI hotplug wakeup oops

2013-02-11 Thread Daniel J Blueman
On 11 February 2013 21:03, Daniel J Blueman  wrote:
> With 3.8-rc7, when unplugging the Thunderbolt ethernet adapter (bus 0a
> [1]) on a Macbook Pro 10,1, we see the PCIe port correctly released:
>
> pciehp :06:03.0:pcie24: Card not present on Slot(3)
> tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not
> clear MAC_TX_MODE=
> tg3 :0a:00.0 eth0: No firmware running
> tg3 :0a:00.0 eth0: Link is down
> [sched_delayed] sched: RT throttling activated
> pcieport :00:01.1: System wakeup enabled by ACPI
> pciehp :09:00.0:pcie24: unloading service driver pciehp
> pci_bus :0a: busn_res: [bus 0a] is released
> pci_bus :09: busn_res: [bus 09-0a] is released
>
> After some activity later (eg I can reproduce this by switching to a
> text console and back), often we'll see an oops:
>
> Unable to handle kernel paging request at 1070
> pci_pme_list_scan+0x3d/0xe0
> Call Trace:
> process_one_work+0x193
> ? process_one_work+0x131
> ? pci_pme_wakeup+0x60
> worker_thread+0x15d
>
> (gdb) list *(pci_pme_list_scan+0x3d)
> 0x8123f6dd is in pci_pme_list_scan (drivers/pci/pci.c:1556).
> 1551/*
> 1552 * If bridge is in low power state, 
> the
> 1553 * configuration space of subordinate 
> devices
> 1554 * may be not accessible
> 1555 */
> 1556if (bridge && bridge->current_state 
> != PCI_D0)
> 1557continue;
> 1558pci_pme_wakeup(pme_dev->dev, NULL);
> 1559} else {
> 1560list_del(&pme_dev->list);
>
> Since a panic in vsnprintf happens after the oops (hence I can't catch
> it with EFI pstore), it is almost certainly significant heap
> corruption; this would explain why pme_dev became null (the load has
> been ordered ahead).
>
> I'll see what I can find out with memory poisoning and list debugging.

Enabling a bunch of related debugging, we see pme_dev is non-null and:

BUG: Unable to handle NULL pointer dereference at
pci_bus_read_config_word+0x6c
PGD 26314c067 PUD 2633f9067 PMD 0
Oops:  [#1] SMP
pci_check_pme_status+0x4f
pci_pme_wakeup+0x21
pci_pme_list_scan+0xd5
process_one_work+0x1ca
? process_one_work+0x160
? pci_pme_wakeup+0x60
worker_thread+0x14e

Anyway, it looks like the device being unplugged wasn't removed from
pci_pme_list as pci_pme_active(dev, false) wasn't called.

>From a quick review, I wasn't able to find the right place in the
call-chain which I only see releases the child busses and PCIe port
drivers. Anyone?

Thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [3.8-rc7] PCI hotplug wakeup oops

2013-02-11 Thread Daniel J Blueman
On 12 February 2013 03:49, Rafael J. Wysocki  wrote:
> On Monday, February 11, 2013 08:27:49 PM Rafael J. Wysocki wrote:
>> On Monday, February 11, 2013 12:01:37 PM Bjorn Helgaas wrote:
>> > [+cc Rafael]
>> >
>> > On Mon, Feb 11, 2013 at 10:08 AM, Daniel J Blueman  
>> > wrote:
>> > > On 11 February 2013 21:03, Daniel J Blueman  wrote:
>> > >> With 3.8-rc7, when unplugging the Thunderbolt ethernet adapter (bus 0a
>> > >> [1]) on a Macbook Pro 10,1, we see the PCIe port correctly released:
>> > >>
>> > >> pciehp :06:03.0:pcie24: Card not present on Slot(3)
>> > >> tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not
>> > >> clear MAC_TX_MODE=
>> > >> tg3 :0a:00.0 eth0: No firmware running
>> > >> tg3 :0a:00.0 eth0: Link is down
>> > >> [sched_delayed] sched: RT throttling activated
>> > >> pcieport :00:01.1: System wakeup enabled by ACPI
>> > >> pciehp :09:00.0:pcie24: unloading service driver pciehp
>> > >> pci_bus :0a: busn_res: [bus 0a] is released
>> > >> pci_bus :09: busn_res: [bus 09-0a] is released
>> > >>
>> > >> After some activity later (eg I can reproduce this by switching to a
>> > >> text console and back), often we'll see an oops:
>> > >>
>> > >> Unable to handle kernel paging request at 1070
>> > >> pci_pme_list_scan+0x3d/0xe0
>> > >> Call Trace:
>> > >> process_one_work+0x193
>> > >> ? process_one_work+0x131
>> > >> ? pci_pme_wakeup+0x60
>> > >> worker_thread+0x15d
>> > >>
>> > >> (gdb) list *(pci_pme_list_scan+0x3d)
>> > >> 0x8123f6dd is in pci_pme_list_scan (drivers/pci/pci.c:1556).
>> > >> 1551/*
>> > >> 1552 * If bridge is in low power 
>> > >> state, the
>> > >> 1553 * configuration space of 
>> > >> subordinate devices
>> > >> 1554 * may be not accessible
>> > >> 1555 */
>> > >> 1556if (bridge && 
>> > >> bridge->current_state != PCI_D0)
>> > >> 1557continue;
>> > >> 1558pci_pme_wakeup(pme_dev->dev, 
>> > >> NULL);
>> > >> 1559} else {
>> > >> 1560list_del(&pme_dev->list);
>> > >>
>> > >> Since a panic in vsnprintf happens after the oops (hence I can't catch
>> > >> it with EFI pstore), it is almost certainly significant heap
>> > >> corruption; this would explain why pme_dev became null (the load has
>> > >> been ordered ahead).
>> > >>
>> > >> I'll see what I can find out with memory poisoning and list debugging.
>> > >
>> > > Enabling a bunch of related debugging, we see pme_dev is non-null and:
>> > >
>> > > BUG: Unable to handle NULL pointer dereference at
>> > > pci_bus_read_config_word+0x6c
>> > > PGD 26314c067 PUD 2633f9067 PMD 0
>> > > Oops:  [#1] SMP
>> > > pci_check_pme_status+0x4f
>> > > pci_pme_wakeup+0x21
>> > > pci_pme_list_scan+0xd5
>> > > process_one_work+0x1ca
>> > > ? process_one_work+0x160
>> > > ? pci_pme_wakeup+0x60
>> > > worker_thread+0x14e
>> > >
>> > > Anyway, it looks like the device being unplugged wasn't removed from
>> > > pci_pme_list as pci_pme_active(dev, false) wasn't called.
>> > >
>> > > From a quick review, I wasn't able to find the right place in the
>> > > call-chain which I only see releases the child busses and PCIe port
>> > > drivers. Anyone?
>> >
>> > It looks like drivers *add* devices to pci_pme_list when they use
>> > pci_enable_wake() or pci_wake_from_d3().  But many drivers never
>> > remove their devices, and I don't see any place where the core does it
>> > either.  My guess is we need to remove it in pci_stop_dev() (we
>> > already do pcie_aspm_exit_link_state() there) or somewhere similar

[PATCH] x86, amd, mce: Prevent potential cpu-online oops

2013-04-04 Thread Daniel J Blueman
On platforms where all Northbridges may not be visible (due to routing, eg on
NumaConnect systems), prevent oopsing due to stale pointer access when
offlining cores.

Signed-off-by: Steffen Persvold 
Signed-off-by: Daniel J Blueman 

---
 arch/x86/kernel/cpu/mcheck/mce_amd.c |   11 ++-
 1 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 1ac581f..53a58c2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -578,8 +578,11 @@ static __cpuinit int threshold_create_bank(unsigned int 
cpu, unsigned int bank)
if (shared_bank[bank]) {
nb = node_to_amd_nb(amd_get_nb_id(cpu));
 
+   if (WARN_ON_ONCE(!nb))
+   goto out;
+
/* threshold descriptor already initialized on this node? */
-   if (nb && nb->bank4) {
+   if (nb->bank4) {
/* yes, use it */
b = nb->bank4;
err = kobject_add(b->kobj, &dev->kobj, name);
@@ -613,10 +616,8 @@ static __cpuinit int threshold_create_bank(unsigned int 
cpu, unsigned int bank)
atomic_set(&b->cpus, 1);
 
/* nb is already initialized, see above */
-   if (nb) {
-   WARN_ON(nb->bank4);
-   nb->bank4 = b;
-   }
+   WARN_ON(nb->bank4);
+   nb->bank4 = b;
}
 
err = allocate_threshold_blocks(cpu, bank, 0,
-- 
1.7.4.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[3.8.0-rc6] xhci NULL deref

2013-02-02 Thread Daniel J Blueman
With a couple of communication devices attached [1] on 3.8.0-rc6, XHCI
ran into a zero-page access [2].

A quick check of the code [3,4] suggests that struct xhci_virt_device
dev was NULL.

I'll see if I can have netconsole enabled for when it occurs again, to
catch any related error information.

Thanks,
  Daniel

--- [1]

$ lsusb
Bus 001 Device 002: ID 8087:0024 Intel Corp. Integrated Rate Matching Hub
Bus 002 Device 002: ID 8087:0024 Intel Corp. Integrated Rate Matching Hub
Bus 003 Device 004: ID 0424:2412 Standard Microsystems Corp.
Bus 003 Device 003: ID 04e8:6863 Samsung Electronics Co., Ltd
Bus 001 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub
Bus 002 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub
Bus 003 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub
Bus 004 Device 001: ID 1d6b:0003 Linux Foundation 3.0 root hub
Bus 001 Device 003: ID 05ac:8510 Apple, Inc.
Bus 002 Device 003: ID 0424:2512 Standard Microsystems Corp. USB 2.0 Hub
Bus 003 Device 005: ID 0403:6010 Future Technology Devices
International, Ltd FT2232C Dual USB-UART/FIFO IC
Bus 002 Device 008: ID 05ac:8286 Apple, Inc.
Bus 002 Device 004: ID 0a5c:4500 Broadcom Corp. BCM2046B1 USB 2.0 Hub
(part of BCM2046 Bluetooth)
Bus 002 Device 005: ID 05ac:0262 Apple, Inc.

--- [2]

Unable to handle NULL pointer dereference 000508

RAX: 00500
RBX: 0
RCX: 00508
RDX: 0
RSI: 6
RDI: 0
RBP: 88026f203dc8

xhci_stream_id_to_ring+0x40
handle_cmd_completion+0x16a
? rebalance_domains+0x96
xhci_irq+0x27b

--- [3]

(gdb) disassemble xhci_stream_id_to_ring+0x40
   0x813384e0 <+0>: mov%esi,%esi
   0x813384e2 <+2>: push   %rbp
   0x813384e3 <+3>: lea(%rsi,%rsi,2),%rax
   0x813384e7 <+7>: mov%rsp,%rbp
   0x813384ea <+10>:lea(%rsi,%rax,4),%rax
   0x813384ee <+14>:shl$0x4,%rax
   0x813384f2 <+18>:test   %edx,%edx
   0x813384f4 <+20>:lea0x20(%rdi,%rax,1),%rax
   0x813384f9 <+25>:lea0x8(%rax),%rcx
   0x813384fd <+29>:je 0x81338520

   0x813384ff <+31>:mov0x8(%rcx),%rax
   0x81338503 <+35>:test   %rax,%rax
   0x81338506 <+38>:je 0x81338530

   0x81338508 <+40>:cmp0x8(%rax),%edx // deref

--- [4]

struct xhci_ring *xhci_stream_id_to_ring(
struct xhci_virt_device *dev,
unsigned int ep_index,
unsigned int stream_id)
{
struct xhci_virt_ep *ep = &dev->eps[ep_index];

if (stream_id == 0)
return ep->ring; // deref
if (!ep->stream_info)
return NULL;

if (stream_id > ep->stream_info->num_streams)
return NULL;
return ep->stream_info->stream_rings[stream_id];
}
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Prevent USB hub remove oops

2013-02-26 Thread Daniel J Blueman
When initialisation of one or more USB hub ports fails, we can hit a null
pointer dereference when dropping the hub. Analysis shows there's a false
assumption about the ports being setup, so address this.

hub 2-3:1.0: USB hub found
hub 2-3:1.0: 7 ports detected
hub 2-3:1.0: hub_hub_status failed (err = -11)
hub 2-3:1.0: config failed, can't get hub status (err -11)
BUG: unable to handle kernel NULL pointer dereference at  (null)
IP: [] hub_quiesce+0x46/0xb0
PGD 0 
Oops:  [#1] SMP 
CPU 2 
Pid: 3364, comm: khubd Not tainted 3.8.0-advanced+ #21 IBM IBM System X3755 M3 
-[7164Z63]-/94Y6321   
RIP: 0010:[] [] hub_quiesce+0x46/0xb0
RSP: 0018:88046c535978 EFLAGS: 00010246
RAX: 88046c4fc100 RBX:  RCX: 000b
RDX:  RSI: 0001 RDI: 
RBP: 88046c6b8400 R08: 0002 R09: 129b
R10:  R11: 88046c53550e R12: 88046c098000
R13: 88046c0a8430 R14: 81af72e0 R15: 88046c6b8400
FS: 7fcbd36196e0() GS:88046fc8() knlGS:
CS: 0010 DS:  ES:  CR0: 8005003b
CR2:  CR3: 01e0c000 CR4: 07e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process khubd (pid: 3364, threadinfo 88046c534000, task 88046c58ad00)
Stack:
 88046c0a8400 88046c6b8400 88046c098088 816901a5
 88046c098088 88046c0a8400 88046c098088 88046c098000
 88046c4fc180 816904c1 8169a261 88046c0a8430
Call Trace:
 [] ? hub_disconnect+0x75/0x140
 [] ? hub_probe+0x251/0x260
 [] ? usb_match_one_id+0x31/0x70
 [] ? usb_probe_interface+0x1a6/0x260
 [] ? driver_probe_device+0x68/0x210
 [] ? __driver_attach+0xa0/0xa0
 [] ? bus_for_each_drv+0x3e/0x80
 [] ? device_attach+0x98/0xb0
 [] ? bus_probe_device+0x80/0xb0
 [] ? device_add+0x5be/0x680
 [] ? usb_string+0x11e/0x1e0
 [] ? usb_set_configuration+0x4cd/0x7c0
 [] ? sysfs_do_create_link+0xed/0x220
 [] ? generic_probe+0x2f/0x90
 [] ? driver_probe_device+0x68/0x210
 [] ? __driver_attach+0xa0/0xa0
 [] ? bus_for_each_drv+0x3e/0x80
 [] ? device_attach+0x98/0xb0
 [] ? bus_probe_device+0x80/0xb0
 [] ? device_add+0x5be/0x680
 [] ? mix_pool_bytes.constprop.19+0x3f/0x60
 [] ? usb_new_device+0x158/0x210
 [] ? hub_port_connect_change+0x570/0x9c0
 [] ? hub_thread+0x26f/0x7c0
 [] ? __wake_up_common+0x4c/0x80
 [] ? abort_exclusive_wait+0xb0/0xb0
 [] ? usb_reset_device+0x140/0x140
 [] ? kthread+0xb3/0xc0
 [] ? __kthread_parkme+0x80/0x80
 [] ? ret_from_fork+0x7c/0xb0
 [] ? __kthread_parkme+0x80/0x80
Code: e4 00 00 00 02 83 fb 02 74 38 41 8b 84 24 40 04 00 00 85 c0 7e 2c 31 db 
0f 1f 44 00 00 48 8b 85 f8 01 00 00 48 63 d3 48 8b 3c d0 <48> 83 3f 00 74 05 e8 
9f fe ff ff ff c3 41 39 9c 24 40 04 00 00 
RIP [] hub_quiesce+0x46/0xb0
 RSP 
CR2: 

(gdb) list *(hub_quiesce+0x46)
0x81690056 is in hub_quiesce (drivers/usb/core/hub.c:1266).
1261hub->quiescing = 1;
1262
1263if (type != HUB_SUSPEND) {
1264/* Disconnect all the children */
1265for (i = 0; i < hdev->maxchild; ++i) {
1266if (hub->ports[i]->child)
1267usb_disconnect(&hub->ports[i]->child);
1268    }
1269    }

Signed-off-by: Daniel J Blueman 
---
 drivers/usb/core/hub.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index cbf7168..a7abc57 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1263,7 +1263,7 @@ static void hub_quiesce(struct usb_hub *hub, enum 
hub_quiescing_type type)
if (type != HUB_SUSPEND) {
/* Disconnect all the children */
for (i = 0; i < hdev->maxchild; ++i) {
-   if (hub->ports[i]->child)
+   if (hub->ports[i] && hub->ports[i]->child)
usb_disconnect(&hub->ports[i]->child);
}
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges

2013-02-19 Thread Daniel J Blueman

Hi Boris,

On 05/12/2012 00:01, Borislav Petkov wrote:

On Tue, Dec 04, 2012 at 05:24:16PM +0800, Daniel J Blueman wrote:

It works well on fam10h and fam15h boxes, with and without Numaconnect.


Good, thanks for testing.

I will send it upstream after the upcoming merge window closes since it
is too late for this one now and I wouldn't want to rush it if it is not
necessary to do so and it hasn't seen enough testing in linux-next and
-tip trees. Which means that it will end up in 3.9; I hope that is OK
with you guys.


Alas your merges missed the v3.8 merge window, but it looks like your 
v3.9 pull request has dropped these patches [1].


Any chance you can get them in during this merge window?

Many thanks,
  Daniel

[1] 
https://groups.google.com/forum/?fromgroups=#!topic/linux.kernel/2DLVw1Rv8bQ

--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges

2013-02-19 Thread Daniel J Blueman

On 19/02/2013 22:59, Borislav Petkov wrote:

On Tue, Feb 19, 2013 at 10:40:26PM +0800, Daniel J Blueman wrote:

Alas your merges missed the v3.8 merge window, but it looks like your
v3.9 pull request has dropped these patches [1].

Any chance you can get them in during this merge window?


They should go in anytime now. They're in tip:x86/platform and Ingo is
sending pull requests to Linus as we speak. Since they touch x86 code I
asked x86 guys to send them upstream instead of me.


Superb; thanks for your help Boris and Ingo!

Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


False-positive RCU stall warnings on large systems...

2013-02-19 Thread Daniel J Blueman

Hi Paul,

On some of our larger servers with many hundreds of cores and when under 
high duress, we can see scheduler RCU stall warnings [1], so find we 
have to increase the hardcoded RCU_STALL_RAT_DELAY up from 2 and 
RCU_JIFFIES_TILL_FORCE_QS up from 3.


Is there a more sustainable way to account for this to avoid it being 
hard-coded, such as making it and dependent timeouts a fraction of 
CONFIG_RCU_CPU_STALL_TIMEOUT?


On the other hand, perhaps this is just caused by clock jitter (eg due 
to distance from a contended clock source)? So increasing these a bit 
may just be adequate in general...


Many thanks,
  Daniel

--- [1]

[ 3939.010085] INFO: rcu_sched detected stalls on CPUs/tasks: {} 
(detected by 1, t=29662 jiffies, g=3053, c=3052, q=598)

[ 3939.020008] INFO: Stall ended before state dump start
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: False-positive RCU stall warnings on large systems...

2013-02-19 Thread Daniel J Blueman

On 20/02/2013 02:16, Paul E. McKenney wrote:

On Wed, Feb 20, 2013 at 12:34:12AM +0800, Daniel J Blueman wrote:

Hi Paul,

On some of our larger servers with many hundreds of cores and when
under high duress, we can see scheduler RCU stall warnings [1], so
find we have to increase the hardcoded RCU_STALL_RAT_DELAY up from 2
and RCU_JIFFIES_TILL_FORCE_QS up from 3.

Is there a more sustainable way to account for this to avoid it
being hard-coded, such as making it and dependent timeouts a
fraction of CONFIG_RCU_CPU_STALL_TIMEOUT?

On the other hand, perhaps this is just caused by clock jitter (eg
due to distance from a contended clock source)? So increasing these
a bit may just be adequate in general...


Hmmm...  What version of the kernel are you running?


The example below occurs with v3.8, but we see the same with previous 
kernels eg v3.5.


Of course, when using the local TSC, you'd see no jitter relative to 
coherent transactions (eg memory writes), but when the HPET is used 
across a large system, coherent transactions to distant cores are just 
so much faster, as there's massive congestion to the shared HPET behind 
various HT and PCIe bridges. This could be where the jitter arises from, 
if I'm guessing jitter is the problem here.


Thanks,
  Daniel


--- [1]

[ 3939.010085] INFO: rcu_sched detected stalls on CPUs/tasks: {}
(detected by 1, t=29662 jiffies, g=3053, c=3052, q=598)
[ 3939.020008] INFO: Stall ended before state dump start

--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] nouveau: Fix kernel log mangling

2012-09-12 Thread Daniel J Blueman
Add missing newline to prevent the following kernel log line getting
appended to the currnet one.

Signed-off-by: Daniel J Blueman 
---
 drivers/gpu/drm/nouveau/nouveau_dp.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c 
b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 7e289d2..2a9294f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -61,7 +61,7 @@ auxch_init(struct drm_device *dev, int ch)
ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
udelay(1);
if (!timeout--) {
-   AUX_ERR("begin idle timeout 0x%08x", ctrl);
+   AUX_ERR("begin idle timeout 0x%08x\n", ctrl);
return -EBUSY;
}
} while (ctrl & 0x0301);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Prevent AMD MCE oops on multi-server system

2012-10-02 Thread Daniel J Blueman

On 02/10/2012 02:01, Borislav Petkov wrote:

On Tue, Oct 02, 2012 at 12:12:31AM +0800, Daniel J Blueman wrote:

On 01/10/2012 18:06, Borislav Petkov wrote:

On Mon, Oct 01, 2012 at 02:42:05PM +0800, Daniel J Blueman wrote:

When booting on a federated multi-server system, the processor Northbridge
lookup returns NULL; add guards to prevent this causing an oops.

Interesting.

What does lspci say on those systems?

Thanks.

As NumaConnect remote-server I/O is in a pre-release stage, we only
expose I/O on the first (root) server, so the lspci on eg my three
server, single-socket C32 development system is uninteresting [1].


Yeah, I was looking for the NB devices:


00:18.0 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor 
HyperTransport Configuration
00:18.1 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor Address 
Map
00:18.2 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor DRAM 
Controller
00:18.3 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor 
Miscellaneous Control
00:18.4 Host bridge: Advanced Micro Devices [AMD] Family 10h Processor Link 
Control


[ … ]


We map MMCONFIG addresses in the global address map to the
respective server, which is how we access the processor Northbridges
in the bootloader before Linux loads, so they are accessible and get
enumerated when we enable remote I/O with the ACPI SSDT we generate,
however since the AMD APIC IDs (hence NB IDs) are only 8-bit, the
present amd_get_nb_id will produce duplicate NB IDs at best (but in
this case, as we disable I/O routing, there is no structure); later,
we may propose to using eg bits 23:8 for the server ID. That's
another discussion though.


Ah yes, I remember now. We had this discussion already, AFAIR. So if you
say you disable I/O routing, what actually doesn't work out as expected
is the NB enumeration in amd_nb.c where pci_get_device simply fails?

Because if you had duplicate APIC IDs, you'd atleast get some NB
descriptor, even if not the correct one?


With remote-I/O disabled, since only the first PCI domain has been 
enumerated, the array of Northbridge IDs has structures only for the 
root (first) server's northbridges, thus the lookup returns NULL for 
later ones.


Yes, we see the duplicates with remote I/O enabled [1, 2], stemming from 
amd64_edac.h:


static inline u8 get_node_id(struct pci_dev *pdev)
{
return PCI_SLOT(pdev->devfn) - 0x18;
}

How about a patch that would add the PCI domain eg in bits 8 and up?


The minimal patch at least corrects the oops regression which didn't
happen in earlier kernels.


Right, I beefed it up a bit and added a stable tag, pls take a look and
let me know if it is ok. I'll run it on a couple of machines but I don't
expect any issues so I'll send it upstream soon.


Looks good!

Thanks Boris,
   Daniel

--- [1]

EDAC MC: Ver: 3.0.0
AMD64 EDAC driver v3.4.0
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 0).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2:   0MB 3:   0MB
EDAC amd64: MC: 4: 2048MB 5: 2048MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2:   0MB 3:   0MB
EDAC amd64: MC: 4: 2048MB 5: 2048MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x4 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS4: Unbuffered DDR3 RAM
EDAC amd64: CS5: Unbuffered DDR3 RAM
EDAC MC0: Giving out device to 'amd64_edac' 'F10h': DEV :00:18.2
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 0).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2:   0MB 3:   0MB
EDAC amd64: MC: 4: 2048MB 5: 2048MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2:   0MB 3:   0MB
EDAC amd64: MC: 4: 2048MB 5: 2048MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x4 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS4: Unbuffered DDR3 RAM
EDAC amd64: CS5: Unbuffered DDR3 RAM
EDAC MC: bug in low-level driver: attempt to assign
  duplicate mc_idx 0 in add_mc_to_global_list()
EDAC amd64: Error probing instance: 0
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 0).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2:   0MB 3:   0MB
EDAC amd64: MC: 4: 2048MB 5: 2048MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2:   0MB 3:   0MB
EDAC amd64: MC: 4: 2048MB 5: 2048MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x4 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS4: Unbuffered DDR3 RAM
EDAC amd64: CS5: Unbuffered DDR3 RAM
EDAC MC: bug in low-level driver: attempt to assign
  duplicate mc_idx 0 in add_mc_to_global_list()
EDAC amd64: Error probing instance: 0
EDAC PCI0: Giving out device to module 'amd64_edac' controller 'EDAC PCI

[PATCH] RFC: Fix AMD Northbridge-ID contiguity assumptions

2012-10-03 Thread Daniel J Blueman
The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs
are contiguous, which no longer holds on federated systems with multiple
HyperTransport fabrics with multiple PCI domains.

Address this assumption by searching the Northbridge ID array, rather than
directly indexing it, using the upper bits for the PCI domain.

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |   17 +++--
 arch/x86/kernel/amd_nb.c  |   15 ---
 drivers/edac/amd64_edac.c |   18 +-
 drivers/edac/amd64_edac.h |4 ++--
 4 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..016448c 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -47,6 +47,7 @@ struct threshold_bank {
 };
 
 struct amd_northbridge {
+   u32 node;
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
@@ -76,15 +77,27 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline int node_to_amd_index(u32 node)
 {
-   return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
+   int i;
+
+   for (i = 0; i < amd_northbridges.num; i++)
+   if (amd_northbridges.nb[i].node == node)
+   return i;
+
+   return 0;
+}
+
+static inline struct amd_northbridge *node_to_amd_nb(u32 node)
+{
+   return &amd_northbridges.nb[node_to_amd_index(node)];
 }
 
 #else
 
 #define amd_nb_num(x)  0
 #define amd_nb_has_feature(x)  false
+#define node_to_amd_index(x)   0
 #define node_to_amd_nb(x)  NULL
 
 #endif
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index aadf335..011eca1 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -75,10 +75,9 @@ int amd_cache_northbridges(void)
 
link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
-   node_to_amd_nb(i)->misc = misc =
-   next_northbridge(misc, amd_nb_misc_ids);
-   node_to_amd_nb(i)->link = link =
-   next_northbridge(link, amd_nb_link_ids);
+   nb->misc = misc = next_northbridge(misc, amd_nb_misc_ids);
+   nb->link = link = next_northbridge(link, amd_nb_link_ids);
+   nb++;
 }
 
/* some CPU families (e.g. family 0x11) do not support GART */
@@ -212,6 +211,7 @@ int amd_set_subcaches(int cpu, int mask)
 static int amd_cache_gart(void)
 {
u16 i;
+   struct amd_northbridge *nb = amd_northbridges.nb;
 
if (!amd_nb_has_feature(AMD_NB_GART))
return 0;
@@ -222,9 +222,10 @@ static int amd_cache_gart(void)
return -ENOMEM;
}
 
-   for (i = 0; i != amd_nb_num(); i++)
-   pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
- &flush_words[i]);
+   for (i = 0; i != amd_nb_num(); i++) {
+   pci_read_config_dword(nb->misc, 0x9c, &flush_words[i]);
+   nb++;
+   }
 
return 0;
 }
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5a297a2..9c35565 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2549,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u32 nid = get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2640,7 +2640,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = get_node_id(pdev);
+   u32 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2656,7 +2656,7 @@ static int __devinit amd64_probe_one_instance(struct 
pci_dev *pdev,
if (!s)
goto err_out;
 
-   ecc_stngs[nid] = s;
+   ecc_stngs[node_to_amd_index(nid)] = s;
 
if (!ecc_enabled(F3, nid)) {
ret = -ENODEV;
@@ -2680,7 +2680,7 @@ static int __devinit amd64_probe_one_instance(struct 
pci_dev *pdev,
 
 err_enable:
kfree(s);
-   ecc_stngs[nid] = NULL;
+   ecc_stngs[node_to_amd_index(nid)] = NULL;
 
 err_out:
return ret;
@@ -2690,9 +2690,9 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
-   u8 nid = get_node_id(pdev);
+   u32 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_am

[PATCH v2] Fix AMD Northbridge-ID contiguity assumptions

2012-10-03 Thread Daniel J Blueman
The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs
are contiguous, which no longer holds on federated systems with multiple
HyperTransport fabrics and multiple PCI domains.

Address this assumption by searching the Northbridge ID array, rather than
directly indexing it, using the upper bits for the PCI domain.

v2: Fix Northbridge entry initialisation

Tested on a single-socket system and 3-server federated system.

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |   23 +--
 arch/x86/kernel/amd_nb.c  |   16 +---
 drivers/edac/amd64_edac.c |   18 +-
 drivers/edac/amd64_edac.h |6 --
 4 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..0fd2f0c 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -47,6 +47,7 @@ struct threshold_bank {
 };
 
 struct amd_northbridge {
+   u32 node;
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
@@ -76,15 +77,33 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline int node_to_amd_index(u32 node)
 {
-   return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
+   int i;
+
+   for (i = 0; i < amd_northbridges.num; i++)
+   if (amd_northbridges.nb[i].node == node)
+   return i;
+
+   return 0;
+}
+
+static inline struct amd_northbridge *node_to_amd_nb(u32 node)
+{
+   return &amd_northbridges.nb[node_to_amd_index(node)];
+}
+
+/* AMD sets the first MC device at device ID 0x18 */
+static inline u32 get_node_id(struct pci_dev *pdev)
+{
+   return (pci_domain_nr(pdev->bus) << 8) | (PCI_SLOT(pdev->devfn) - 0x18);
 }
 
 #else
 
 #define amd_nb_num(x)  0
 #define amd_nb_has_feature(x)  false
+#define node_to_amd_index(x)   0
 #define node_to_amd_nb(x)  NULL
 
 #endif
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index aadf335..c29ce39 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -75,10 +75,10 @@ int amd_cache_northbridges(void)
 
link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
-   node_to_amd_nb(i)->misc = misc =
-   next_northbridge(misc, amd_nb_misc_ids);
-   node_to_amd_nb(i)->link = link =
-   next_northbridge(link, amd_nb_link_ids);
+   nb->misc = misc = next_northbridge(misc, amd_nb_misc_ids);
+   nb->node = get_node_id(misc);
+   nb->link = link = next_northbridge(link, amd_nb_link_ids);
+   nb++;
 }
 
/* some CPU families (e.g. family 0x11) do not support GART */
@@ -212,6 +212,7 @@ int amd_set_subcaches(int cpu, int mask)
 static int amd_cache_gart(void)
 {
u16 i;
+   struct amd_northbridge *nb = amd_northbridges.nb;
 
if (!amd_nb_has_feature(AMD_NB_GART))
return 0;
@@ -222,9 +223,10 @@ static int amd_cache_gart(void)
return -ENOMEM;
}
 
-   for (i = 0; i != amd_nb_num(); i++)
-   pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
- &flush_words[i]);
+   for (i = 0; i != amd_nb_num(); i++) {
+   pci_read_config_dword(nb->misc, 0x9c, &flush_words[i]);
+   nb++;
+   }
 
return 0;
 }
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5a297a2..9c35565 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2549,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u32 nid = get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2640,7 +2640,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = get_node_id(pdev);
+   u32 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2656,7 +2656,7 @@ static int __devinit amd64_probe_one_instance(struct 
pci_dev *pdev,
if (!s)
goto err_out;
 
-   ecc_stngs[nid] = s;
+   ecc_stngs[node_to_amd_index(nid)] = s;
 
if (!ecc_enabled(F3, nid)) {
ret = -ENODEV;
@@ -2680,7 +2680,7 @@ static int __devinit amd64_probe_one_instance(struct 
pci_dev *pdev,
 
 err_enable:
kfree(s);
-   ecc_stngs

[PATCH] Fix Intel PIIX4 I2C driver build failure

2012-10-05 Thread Daniel J Blueman
Fix build failure in Intel PIIX4 I2C driver.

Signed-off-by: Daniel J Blueman 
---
 drivers/i2c/busses/i2c-piix4.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index ef511df..8bbd6ec 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] Add support for AMD64 EDAC on multiple PCI domains

2012-10-25 Thread Daniel J Blueman
The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs
are contiguous, which no longer holds on federated systems with multiple
HyperTransport fabrics and multiple PCI domains, eg on Numascale's
Numaconnect systems with NumaChip.

Address this assumption by searching the Northbridge ID array, rather than
directly indexing it, using the upper bits for the PCI domain.

RFC->v2: Correct array initialisation
v2->v3: Add Boris's neater linked list approach

Todo:
1. fix kobject/sysfs oops (see http://quora.org/2012/16-server-boot.txt later)
2. reorder amd64_edac.c or add amd64_per_family_init/pci_get_related_function
   forward declarations, based on feedback

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h|   63 +++-
 arch/x86/include/asm/numachip/numachip.h |   22 ++
 arch/x86/kernel/amd_gart_64.c|8 +-
 arch/x86/kernel/amd_nb.c |   85 -
 arch/x86/pci/numachip.c  |  121 ++
 drivers/char/agp/amd64-agp.c |   12 +--
 drivers/edac/amd64_edac.c|   34 +
 drivers/edac/amd64_edac.h|6 --
 8 files changed, 283 insertions(+), 68 deletions(-)
 create mode 100644 arch/x86/include/asm/numachip/numachip.h
 create mode 100644 arch/x86/pci/numachip.c

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..6a27226 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -4,6 +4,8 @@
 #include 
 #include 
 
+#define NUM_POSSIBLE_NBS   8
+
 struct amd_nb_bus_dev_range {
u8 bus;
u8 dev_base;
@@ -51,12 +53,22 @@ struct amd_northbridge {
struct pci_dev *link;
struct amd_l3_cache l3_cache;
struct threshold_bank *bank4;
+   u16 node;
+   struct list_head nbl;
 };
 
 struct amd_northbridge_info {
u16 num;
u64 flags;
-   struct amd_northbridge *nb;
+
+   /*
+* The first 8 elems are for fast lookup of NB descriptors on single-
+* system setups, i.e. "normal" boxes. The nb_list, OTOH, is list of
+* additional NB descriptors which exist on confederate systems
+* like using Numascale's Numaconnect/NumaChip.
+*/
+   struct amd_northbridge *nbs[NUM_POSSIBLE_NBS];
+   struct list_head nb_list;
 };
 extern struct amd_northbridge_info amd_northbridges;
 
@@ -78,7 +90,54 @@ static inline bool amd_nb_has_feature(unsigned feature)
 
 static inline struct amd_northbridge *node_to_amd_nb(int node)
 {
-   return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
+   struct amd_northbridge_info *nbi = &amd_northbridges;
+   struct amd_northbridge *nb;
+   int i;
+
+   /* Quick search for first domain */
+   if (node < NUM_POSSIBLE_NBS) {
+   if (node < nbi->num)
+   return nbi->nbs[node];
+   else
+   return NULL;
+   }
+
+   /* Search for NBs from later domains in array */
+   for (i = 0; i < NUM_POSSIBLE_NBS; i++)
+   if (nbi->nbs[i]->node == node)
+   return nbi->nbs[i];
+
+   list_for_each_entry(nb, &nbi->nb_list, nbl)
+   if (node == nb->node)
+   return nb;
+
+   return NULL;
+}
+
+static inline struct amd_northbridge *index_to_amd_nb(int index)
+{
+   struct amd_northbridge_info *nbi = &amd_northbridges;
+   struct amd_northbridge *nb;
+   int count = NUM_POSSIBLE_NBS;
+
+   if (index < NUM_POSSIBLE_NBS) {
+   if (index < nbi->num)
+   return nbi->nbs[index];
+   else
+   return NULL;
+   }
+
+   list_for_each_entry(nb, &nbi->nb_list, nbl) {
+   if (count++ == index)
+   return nb;
+   }
+
+   return NULL;
+}
+
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
+{
+   return (pci_domain_nr(pdev->bus) << 3) | (PCI_SLOT(pdev->devfn) - 0x18);
 }
 
 #else
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index e663112..4f56487 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -573,7 +573,7 @@ static void enable_gart_translations(void)
return;
 
for (i = 0; i < amd_nb_num(); i++) {
-   struct pci_dev *dev = node_to_amd_nb(i)->misc;
+   struct pci_dev *dev = index_to_amd_nb(i)->misc;
 
enable_gart_translation(dev, __pa(agp_gatt_table));
}
@@ -610,7 +610,7 @@ static void gart_fixup_northbridges(void)
pr_info("PCI-DMA: Restoring GART aperture settings\n");
 
for (i = 0; i < amd_nb_num(); i++) {
-   struct pci_dev *dev = node_to_amd_nb(i)->misc;
+ 

[2.6.24-rc8] page allocation failure...

2008-02-14 Thread Daniel J Blueman
32 kB
NFS_Unstable:0 kB
Bounce:  0 kB
CommitLimit:761392 kB
Committed_AS:   541628 kB
VmallocTotal: 34359738367 kB
VmallocUsed:264820 kB
VmallocChunk: 34359473499 kB

--- /proc/zoneinfo
Node 0, zone  DMA
  pages free 1259
min  10
low  12
high 15
scanned  0 (a: 2 i: 16)
spanned  4096
present  2559
nr_free_pages 1259
nr_inactive  66
nr_active7
nr_anon_pages 0
nr_mapped0
nr_file_pages 73
nr_dirty 0
nr_writeback 0
nr_slab_reclaimable 1296
nr_slab_unreclaimable 65
nr_page_table_pages 0
nr_unstable  0
nr_bounce0
nr_vmscan_write 787
protection: (0, 992, 992, 992)
  pagesets
cpu: 0 pcp: 0
  count: 0
  high:  0
  batch: 1
cpu: 0 pcp: 1
  count: 0
  high:  0
  batch: 1
  vm stats threshold: 4
cpu: 1 pcp: 0
  count: 0
  high:  0
  batch: 1
cpu: 1 pcp: 1
  count: 0
  high:  0
  batch: 1
  vm stats threshold: 4
  all_unreclaimable: 0
  prev_priority: 12
  start_pfn: 0
Node 0, zoneDMA32
  pages free 12514
min  1002
low  1252
high 1503
scanned  0 (a: 0 i: 0)
spanned  257504
present  253984
nr_free_pages 12514
nr_inactive  60499
nr_active115446
nr_anon_pages 93837
nr_mapped4550
nr_file_pages 82403
nr_dirty 7
nr_writeback 0
nr_slab_reclaimable 56383
nr_slab_unreclaimable 4290
nr_page_table_pages 1008
nr_unstable  0
nr_bounce0
nr_vmscan_write 7348
protection: (0, 0, 0, 0)
  pagesets
cpu: 0 pcp: 0
  count: 102
  high:  186
  batch: 31
cpu: 0 pcp: 1
  count: 64
  high:  62
  batch: 15
  vm stats threshold: 16
cpu: 1 pcp: 0
  count: 102
  high:  186
  batch: 31
cpu: 1 pcp: 1
  count: 65
  high:  62
  batch: 15
  vm stats threshold: 16
  all_unreclaimable: 0
  prev_priority: 12
  start_pfn: 4096
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] Add NumaChip remote PCI support

2012-10-31 Thread Daniel J Blueman
Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but
preventing access to AMD Northbridges which shouldn't respond.

v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/numachip/numachip.h |   20 +
 arch/x86/kernel/apic/apic_numachip.c |2 +
 arch/x86/pci/Makefile|1 +
 arch/x86/pci/numachip.c  |  134 ++
 4 files changed, 157 insertions(+)
 create mode 100644 arch/x86/include/asm/numachip/numachip.h
 create mode 100644 arch/x86/pci/numachip.c

diff --git a/arch/x86/include/asm/numachip/numachip.h 
b/arch/x86/include/asm/numachip/numachip.h
new file mode 100644
index 000..d35e71a
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -0,0 +1,20 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific header file
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_H
+
+extern int __init pci_numachip_init(void);
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
+
diff --git a/arch/x86/kernel/apic/apic_numachip.c 
b/arch/x86/kernel/apic/apic_numachip.c
index a65829a..9c2aa89 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
return 0;
 
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+   x86_init.pci.arch_init = pci_numachip_init;
 
map_csrs();
 
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 3af5a1e..ee0af58 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11)   += sta2x11-fixup.o
 obj-$(CONFIG_X86_VISWS)+= visws.o
 
 obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o
+obj-$(CONFIG_X86_NUMACHIP) += numachip.o
 
 obj-$(CONFIG_X86_INTEL_MID)+= mrst.o
 
diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
new file mode 100644
index 000..3773e05
--- /dev/null
+++ b/arch/x86/pci/numachip.c
@@ -0,0 +1,129 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific PCI code
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ * PCI accessor functions derived from mmconfig_64.c
+ *
+ */
+
+#include 
+#include 
+
+static u8 limit __read_mostly;
+
+static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, 
unsigned int devfn)
+{
+   struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
+
+   if (cfg && cfg->virt)
+   return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
+   return NULL;
+}
+
+static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value)
+{
+   char __iomem *addr;
+
+   /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+   if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
+err:   *value = -1;
+   return -EINVAL;
+   }
+
+   /* Ensure AMD Northbridges don't decode reads to other devices */
+   if (unlikely(bus == 0 && devfn >= limit)) {
+   *value = -1;
+   return 0;
+   }
+
+   rcu_read_lock();
+   addr = pci_dev_base(seg, bus, devfn);
+   if (!addr) {
+   rcu_read_unlock();
+   goto err;
+   }
+
+   switch (len) {
+   case 1:
+   *value = mmio_config_readb(addr + reg);
+   break;
+   case 2:
+   *value = mmio_config_readw(addr + reg);
+   break;
+   case 4:
+   *value = mmio_config_readl(addr + reg);
+   break;
+   }
+   rcu_read_unlock();
+
+   return 0;
+}
+
+static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus,
+  unsigned int devfn, int reg, int len, u32 value)
+{
+   char __iomem *addr;
+
+   /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+   if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
+   return -EINVAL;
+
+   /* Ensure AMD Northbridges don't decode writes to other devices */
+   if (unlikely(bus == 0 && devfn >= limit))
+   return 0;
+
+   rcu_read_lock();
+   addr = pci_dev_base(seg, bus,

[PATCH] Fix nouveau hang after switcheroo

2012-11-01 Thread Daniel J Blueman
After switcherooing to integrated and starting X, when X fails to start
and causes a console switch, we get hit with a hanger (below). Fix
by checking if we're already in D3.

BUG: soft lockup - CPU#0 stuck for 22s! [Xorg:1703]
 [] nv04_timer_read+0x28/0x70 [nouveau]
 [] nouveau_timer_wait_eq+0x7c/0xe0 [nouveau]
 [] nvd0_sor_dpms+0xde/0x1a0 [nouveau]
 [] ? fb_set_var+0xe9/0x3a0
 [] ? __pte_alloc+0xa9/0x160
 [] ? nvd0_sor_dp_link_set+0x2c0/0x2c0 [nouveau]
 [] drm_helper_connector_dpms+0xbc/0x100 [drm_kms_helper]
 [] drm_fb_helper_dpms.isra.13+0xa5/0xf0 [drm_kms_helper]
 [] drm_fb_helper_blank+0x49/0x80 [drm_kms_helper]
 [] fb_blank+0x56/0xc0
 [] do_fb_ioctl+0x59b/0x5f0
 [] ? vma_interval_tree_insert+0x83/0x90
 [] fb_ioctl+0x45/0x50
 [] do_vfs_ioctl+0x8a/0x340
 [] sys_ioctl+0x91/0xb0

Signed-off-by: Daniel J Blueman 
---
 drivers/gpu/drm/nouveau/nvd0_display.c |4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvd0_display.c 
b/drivers/gpu/drm/nouveau/nvd0_display.c
index c402fca..c3285bf 100644
--- a/drivers/gpu/drm/nouveau/nvd0_display.c
+++ b/drivers/gpu/drm/nouveau/nvd0_display.c
@@ -1364,6 +1364,10 @@ nvd0_sor_dpms(struct drm_encoder *encoder, int mode)
int or = nv_encoder->or;
u32 dpms_ctrl;
 
+   /* prevent hanging after hardware is in D3 */
+   if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+   return;
+
nv_encoder->last_dpms = mode;
 
list_for_each_entry(partner, &dev->mode_config.encoder_list, head) {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] HDA: Fix digital microphone on CS420x

2012-11-03 Thread Daniel J Blueman
Correctly enable the digital microphones with the right bits in the right 
coeffecient
registers on Cirrus CS4206/7 codecs. It also prevents misconfiguring ADC1/2.

This fixes the digital mic on the Macbook Pro 10,1/Retina.

Based-on-patch-by: Alexander Stein 
Signed-off-by: Daniel J Blueman 
---
 sound/pci/hda/patch_cirrus.c |   16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 61a7113..859a119 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -101,8 +101,8 @@ enum {
 #define CS420X_VENDOR_NID  0x11
 #define CS_DIG_OUT1_PIN_NID0x10
 #define CS_DIG_OUT2_PIN_NID0x15
-#define CS_DMIC1_PIN_NID   0x12
-#define CS_DMIC2_PIN_NID   0x0e
+#define CS_DMIC1_PIN_NID   0x0e
+#define CS_DMIC2_PIN_NID   0x12
 
 /* coef indices */
 #define IDX_SPDIF_STAT 0x
@@ -1079,14 +1079,18 @@ static void init_input(struct hda_codec *codec)
cs_automic(codec, NULL);
 
coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
+   cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+
+   coef = cs_vendor_coef_get(codec, IDX_BEEP_CFG);
if (is_active_pin(codec, CS_DMIC2_PIN_NID))
-   coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
+   coef |= 1 << 4; /* DMIC2 2 chan on, GPIO1 off */
if (is_active_pin(codec, CS_DMIC1_PIN_NID))
-   coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
+   coef |= 1 << 3; /* DMIC1 2 chan on, GPIO0 off
 * No effect if SPDIF_OUT2 is
 * selected in IDX_SPDIF_CTL.
*/
-   cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+
+   cs_vendor_coef_set(codec, IDX_BEEP_CFG, coef);
} else {
if (spec->mic_detect)
cs_automic(codec, NULL);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] HDA: Mark CS260x immutable structures const

2012-11-03 Thread Daniel J Blueman
Mark structures that won't change const.

Signed-off-by: Daniel J Blueman 
---
 sound/pci/hda/patch_cirrus.c |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 859a119..d5f3a26 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -1732,8 +1732,7 @@ static int cs421x_mux_enum_put(struct snd_kcontrol 
*kcontrol,
 
 }
 
-static struct snd_kcontrol_new cs421x_capture_source = {
-
+static const struct snd_kcontrol_new cs421x_capture_source = {
.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
.name = "Capture Source",
.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
@@ -1950,7 +1949,7 @@ static int cs421x_suspend(struct hda_codec *codec)
 }
 #endif
 
-static struct hda_codec_ops cs421x_patch_ops = {
+static const struct hda_codec_ops cs421x_patch_ops = {
.build_controls = cs421x_build_controls,
.build_pcms = cs_build_pcms,
.init = cs421x_init,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3, v5] AMD64 EDAC: Add muli-domain support

2012-11-04 Thread Daniel J Blueman
Fix the handling of memory controller detection to index the array
of detected Northbridges, allowing memory controllers over multiple
PCI domains in federated systems eg using Numascale's NumaConnect/
NumaChip.

v4: Generate linear Northbridge ID by indexing detected Northbridges
v5: Reorder functions to prevent extra function declaration; merge 4th
patch; simplify Fam15h code; add detail to warning

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |   13 +++
 drivers/edac/amd64_edac.c |   49 ++---
 drivers/edac/amd64_edac.h |6 -
 3 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..9f5532a 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int 
node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
 
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
+{
+   int i;
+
+   for (i = 0; i != amd_nb_num(); i++)
+   if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == 
pci_domain_nr(pdev->bus) &&
+   PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == 
PCI_SLOT(pdev->devfn))
+   return i;
+
+   WARN(1, "Unable to find AMD Northbridge identifier for %s\n", 
pci_name(pdev));
+   return 0;
+}
+
 #else
 
 #define amd_nb_num(x)  0
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index cc8e7c7..852f1cd 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -982,6 +982,24 @@ static u64 get_error_address(struct mce *m)
return addr;
 }
 
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+   unsigned int device,
+   struct pci_dev *related)
+{
+   struct pci_dev *dev = NULL;
+
+   dev = pci_get_device(vendor, device, dev);
+   while (dev) {
+   if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
+   (dev->bus->number == related->bus->number) &&
+   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+   break;
+   dev = pci_get_device(vendor, device, dev);
+   }
+
+   return dev;
+}
+
 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 {
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1001,11 +1019,13 @@ static void read_dram_base_limit_regs(struct amd64_pvt 
*pvt, unsigned range)
 
/* Factor in CC6 save area by reading dst node's limit reg */
if (c->x86 == 0x15) {
-   struct pci_dev *f1 = NULL;
-   u8 nid = dram_dst_node(pvt, range);
+   struct pci_dev *misc, *f1 = NULL;
+   struct amd64_family_type *fam_type;
+   u16 nid = dram_dst_node(pvt, range);
u32 llim;
 
-   f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 
1));
+   misc = node_to_amd_nb(nid)->misc;
+   f1 = pci_get_related_function(misc->vendor, 
PCI_DEVICE_ID_AMD_15H_NB_F1, misc);
if (WARN_ON(!f1))
return;
 
@@ -1712,23 +1732,6 @@ static struct amd64_family_type amd64_family_types[] = {
},
 };
 
-static struct pci_dev *pci_get_related_function(unsigned int vendor,
-   unsigned int device,
-   struct pci_dev *related)
-{
-   struct pci_dev *dev = NULL;
-
-   dev = pci_get_device(vendor, device, dev);
-   while (dev) {
-   if ((dev->bus->number == related->bus->number) &&
-   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
-   break;
-   dev = pci_get_device(vendor, device, dev);
-   }
-
-   return dev;
-}
-
 /*
  * These are tables of eigenvectors (one per line) which can be used for the
  * construction of the syndrome tables. The modified syndrome search algorithm
@@ -2546,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u8 nid = amd_get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2637,7 +2640,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = get_node_id(pdev);
+   u8 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
   

[PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers

2012-11-04 Thread Daniel J Blueman
As the AMD64 last-level-cache ID is 16-bits and federated systems
eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
controllers, use 16-bits to store the ID.

v2: Avoid change to intlv_en variable
v3: Drop unneeded change to index

Signed-off-by: Daniel J Blueman 
---
 drivers/edac/amd64_edac.c |   19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 852f1cd..5dfe452 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m)
struct amd64_pvt *pvt;
u64 cc6_base, tmp_addr;
u32 tmp;
-   u8 mce_nid, intlv_en;
+   u16 mce_nid;
+   u8 intlv_en;
 
if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
return addr;
@@ -2299,7 +2300,7 @@ out:
return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
cpumask_var_t cmask;
int cpu;
@@ -2337,7 +2338,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings 
*s, u8 nid, bool on)
return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
   struct pci_dev *F3)
 {
bool ret = true;
@@ -2389,7 +2390,7 @@ static bool enable_ecc_error_reporting(struct 
ecc_settings *s, u8 nid,
return ret;
 }
 
-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
struct pci_dev *F3)
 {
u32 value, mask = 0x3;  /* UECC/CECC enable */
@@ -2428,7 +2429,7 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";
 
-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 {
u32 value;
u8 ecc_en = 0;
@@ -2549,7 +2550,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = amd_get_node_id(F2);
+   u16 nid = amd_get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2640,7 +2641,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = amd_get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2690,7 +2691,7 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
-   u8 nid = amd_get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3, v2] AMD64 EDAC: Cleanup type usage to be consistent

2012-11-04 Thread Daniel J Blueman
As the Northbridge IDs are at most 16-bits, use the same type
consistently and cleanup some indexes to use smaller types.

v2: Drop unneeded changes and changes Boris will cleanup later

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h|2 +-
 arch/x86/include/asm/processor.h |2 +-
 arch/x86/kernel/cpu/amd.c|4 ++--
 drivers/edac/amd64_edac.c|   14 +++---
 drivers/edac/amd64_edac.h|6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 9f5532a..b0815a0 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline struct amd_northbridge *node_to_amd_nb(u16 node)
 {
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ad1fc85..eb3ba58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned 
long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);
 
 struct aperfmperf {
u64 aperf, mperf;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f7e98a2..52cab1f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-int amd_get_nb_id(int cpu)
+u16 amd_get_nb_id(int cpu)
 {
-   int id = 0;
+   u16 id = 0;
 #ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
 #endif
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5dfe452..a3e297a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
  * DRAM base/limit associated with node_id
  */
 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
-  unsigned nid)
+  u8 nid)
 {
u64 addr;
 
@@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct 
mem_ctl_info *mci,
u64 sys_addr)
 {
struct amd64_pvt *pvt;
-   unsigned node_id;
+   u8 node_id;
u32 intlv_en, bits;
 
/*
@@ -1349,7 +1349,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, 
u64 sys_addr,
 }
 
 /* Convert the sys_addr to the normalized DCT address */
-static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
+static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
 u64 sys_addr, bool hi_rng,
 u32 dct_sel_base_addr)
 {
@@ -1400,7 +1400,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, 
unsigned range,
  * checks if the csrow passed in is marked as SPARED, if so returns the new
  * spare row
  */
-static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
+static int f10_process_possible_spare(struct amd64_pvt *pvt, u16 dct, int 
csrow)
 {
int tmp_cs;
 
@@ -1425,7 +1425,7 @@ static int f10_process_possible_spare(struct amd64_pvt 
*pvt, u8 dct, int csrow)
  * -EINVAL:  NOT FOUND
  * 0..csrow = Chip-Select Row
  */
-static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
+static int f1x_lookup_addr_in_dct(u64 in_addr, u16 nid, u8 dct)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
@@ -2257,7 +2257,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 }
 
 /* get all cores on this DCT */
-static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
+static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
 {
int cpu;
 
@@ -2267,7 +2267,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask 
*mask, unsigned nid)
 }
 
 /* check MCG_CTL on all the cpus on this node */
-static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
+static bool amd64_nb_mce_bank_enabled_on_node(u16 nid)
 {
cpumask_var_t mask;
int cpu, nbe;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 90cae61..a2ea6a4 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -332,7 +332,7 @@ struct amd64_pvt {
/* pci_device handles which we utilize */
struct pci_dev *F1, *F2, *F3;
 
-   unsigned mc_node_id;/* MC index of this MC node */
+   u16 mc_node_id; /* MC index of this MC node */
int ext_model;  /* extended model value of this node */

[PATCH] Add Etron XHCI quirk to avoid warning spam

2012-07-26 Thread Daniel J Blueman
When various USB3 devices with Etron XHCI controllers, we see a bunch of
warnings:
xhci_hcd :02:00.0: WARN Successful completion on short TX: needs
XHCI_TRUST_TX_LENGTH quirk?

Acknowledge the issue by adding the quirk.

Signed-off-by: Daniel J Blueman 
---
 drivers/usb/host/xhci-pci.c |   11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 18b231b..715ad11 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -95,10 +95,13 @@ static void xhci_pci_quirks(struct device *dev, struct 
xhci_hcd *xhci)
xhci->limit_active_eps = 64;
xhci->quirks |= XHCI_SW_BW_CHECKING;
}
-   if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
-   pdev->device == PCI_DEVICE_ID_ASROCK_P67) {
-   xhci->quirks |= XHCI_RESET_ON_RESUME;
-   xhci_dbg(xhci, "QUIRK: Resetting on resume\n");
+   if (pdev->vendor == PCI_VENDOR_ID_ETRON) {
+   xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+
+   if (pdev->device == PCI_DEVICE_ID_ASROCK_P67) {
+   xhci->quirks |= XHCI_RESET_ON_RESUME;
+   xhci_dbg(xhci, "QUIRK: Resetting on resume\n");
+   }
}
if (pdev->vendor == PCI_VENDOR_ID_VIA)
xhci->quirks |= XHCI_RESET_ON_RESUME;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Add Etron XHCI quirk to avoid warning spam

2012-07-26 Thread Daniel J Blueman
On 27 July 2012 14:14, Sarah Sharp  wrote:
> On Fri, Jul 27, 2012 at 12:03:44PM +0800, Daniel J Blueman wrote:
>> When various USB3 devices with Etron XHCI controllers, we see a bunch of
>> warnings:
>> xhci_hcd :02:00.0: WARN Successful completion on short TX: needs
>> XHCI_TRUST_TX_LENGTH quirk?
>>
>> Acknowledge the issue by adding the quirk.
>>
>> Signed-off-by: Daniel J Blueman 
[]
> I already have a patch in my queue for this.  However, it keys off the
> PCI_DEVICE_ID_ASROCK_P67 PCI device ID.  Do you have another Etron
> device with a different device ID that needs this quirk?

Yes, the subsystem ID is different [1] (but Zotac program it the same
as the vendor and device IDs here), however what you say suggests the
problem is general to this Etron XHCI controller (1b6f:7023), as we'd
suspect anyway.

Thus the more general patch I posted makes better sense perhaps?

Thanks,
  Daniel

--- [1]

# lspci -vs 02:00.0
02:00.0 USB controller: Etron Technology, Inc. EJ168 USB 3.0 Host
Controller (rev 01) (prog-if 30 [XHCI])
Subsystem: Etron Technology, Inc. EJ168 USB 3.0 Host Controller
[]
# lspci -vns 02:00.0
02:00.0 0c03: 1b6f:7023 (rev 01) (prog-if 30 [XHCI])
Subsystem: 1b6f:7023
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Add Etron XHCI quirk to avoid warning spam

2012-07-30 Thread Daniel J Blueman
On 28 July 2012 01:10, Sarah Sharp  wrote:
> On Fri, Jul 27, 2012 at 02:40:56PM +0800, Daniel J Blueman wrote:
>> On 27 July 2012 14:14, Sarah Sharp  wrote:
>> > On Fri, Jul 27, 2012 at 12:03:44PM +0800, Daniel J Blueman wrote:
>> >> When various USB3 devices with Etron XHCI controllers, we see a bunch of
>> >> warnings:
>> >> xhci_hcd :02:00.0: WARN Successful completion on short TX: needs
>> >> XHCI_TRUST_TX_LENGTH quirk?
>> >>
>> >> Acknowledge the issue by adding the quirk.
>> >>
>> >> Signed-off-by: Daniel J Blueman 
>> []
>> > I already have a patch in my queue for this.  However, it keys off the
>> > PCI_DEVICE_ID_ASROCK_P67 PCI device ID.  Do you have another Etron
>> > device with a different device ID that needs this quirk?
>>
>> Yes, the subsystem ID is different [1] (but Zotac program it the same
>> as the vendor and device IDs here), however what you say suggests the
>> problem is general to this Etron XHCI controller (1b6f:7023), as we'd
>> suspect anyway.
>>
>> Thus the more general patch I posted makes better sense perhaps?
>
> I'd really like to keep this quirk specific to the particular PCI vendor
> and device ID.  It's possible that their next chip version will have the
> opposite issue (short TX completion code and bad untransferred length).
>
> Your patch turned it on for all Etron hosts, so I would rather keep my
> version:
>
> http://git.kernel.org/?p=linux/kernel/git/sarah/xhci.git;a=commit;h=12751f75720391bb2b607acdb2537f02e313251e
[]

Ok, the patch is correct since PCI_DEVICE_ID_ASROCK_P67 evaluates to
0x7023, which is the Etron EJ168 device ID.

Board-specific IDs (as the definition name suggests) are used for the
subsystem IDs, so this name is misleading (and misled me at least).

Can you fix this up with a patch to change PCI_DEVICE_ID_ASROCK_P67 to
PCI_DEVICE_ID_ETRON_EJ168, else I can cook and test a patch?

Thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[3.6-rc7] switcheroo race with Intel HDA...

2012-09-24 Thread Daniel J Blueman
On my Macbook with a discrete Nvidia GPU, there is a race between
selecting the integrated GPU and putting the discrete GPU into D3 [1],
reliably causing a kernel oops [2].

Introducing a delay of ~1s between the calls prevents this. When the
second 'OFF' write path executes, it looks like struct azx at
card->private_data hasn't yet been allocated yet [3], so there is
likely some locking missing.

I'm happy to perform further testing and debug of course...

Thanks,
  Daniel

--- [1]

echo IGD > /sys/kernel/debug/vgaswitcheroo/switch
echo OFF > /sys/kernel/debug/vgaswitcheroo/switch

--- [2]

BUG: unable to handle kernel NULL pointer dereference at 0170
IP: [] azx_vs_set_state+0x26/0x178 [snd_hda_intel]
PGD 259c26067 PUD 25a0fd067 PMD 0
Oops:  [#1] SMP DEBUG_PAGEALLOC
Modules linked in: snd_hda_codec_hdmi bnep rfcomm b43 joydev nfsd ssb
nfs_acl auth_rpcgss binfmt_misc nfs lockd sunrpc uvcvideo bcm5974
videobuf2_core videobuf2_vmalloc videobuf2_memops coretemp kvm_intel
snd_hda_codec_cirrus kvm applesmc input_polldev microcode bcma lpc_ich
mfd_core mei snd_hda_intel(+) snd_hda_codec snd_hwdep snd_pcm
snd_timer snd snd_page_alloc nls_iso8859_1 apple_gmux mac_hid apple_bl
btrfs hid_apple sdhci_pci ghash_clmulni_intel tg3 sdhci i915 nouveau
ttm drm_kms_helper hwmon mxm_wmi video
CPU 2
Pid: 961, comm: sh Not tainted 3.6.0-rc7 #2 Apple Inc.
MacBookPro10,1/Mac-C3EC7CD22292981F
RIP: 0010:[] []
azx_vs_set_state+0x26/0x178 [snd_hda_intel]
RSP: 0018:880264271e48 EFLAGS: 00010286
RAX:  RBX: 88025a2f5280 RCX: 
RDX: 0006 RSI:  RDI: 880265479098
RBP: 880264271e68 R08:  R09: 
R10:  R11:  R12: 880265479098
R13:  R14: 880264271f50 R15: 
FS: 7fa4fe183700() GS:88026f28() knlGS:
CS: 0010 DS:  ES:  CR0: 80050033
CR2: 0170 CR3: 0002641a7000 CR4: 001407e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process sh (pid: 961, threadinfo 88026427, task 880264503a00)
Stack:
  88025a2f5280  880264271e98
 880264271e88 812e83a7 8802622835c0 0004
 880264271ef8 812e89ac 88020a46464f 880264503a00
Call Trace:
 [] set_audio_state+0x67/0x70
 [] vga_switcheroo_debugfs_write+0xbc/0x380
 [] vfs_write+0xa3/0x160
 [] sys_write+0x45/0xa0
 [] system_call_fastpath+0x1a/0x1f
Code: 00 00 00 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 4c 8d a7 98
00 00 00 4c 89 e7 48 89 5d e8 4c 89 6d f8 41 89 f5 e8 2a 35 13 e1 <48>
8b 98 70 01 00 00 0f b6 83 55 02 00 00 a8 08 75 34 45 85 ed
RIP [] azx_vs_set_state+0x26/0x178 [snd_hda_intel]
 RSP 
CR2: 0170

--- [3]

(gdb) list *(azx_vs_set_state+0x26)
0x2936 is in azx_vs_set_state (sound/pci/hda/hda_intel.c:2505).
2500
2501static void azx_vs_set_state(struct pci_dev *pci,
2502   enum vga_switcheroo_state state)
2503{
2504struct snd_card *card = pci_get_drvdata(pci);
2505struct azx *chip = card->private_data;
2506bool disabled;
2507
2508if (chip->init_failed)
2509    return;
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3] Add support for AMD64 EDAC on multiple PCI domains

2012-10-28 Thread Daniel J Blueman

On 25/10/2012 19:03, Borislav Petkov wrote:

On Thu, Oct 25, 2012 at 04:32:52PM +0800, Daniel J Blueman wrote:

The AMD Northbridge initialisation code and EDAC assume the Northbridge IDs
are contiguous, which no longer holds on federated systems with multiple
HyperTransport fabrics and multiple PCI domains, eg on Numascale's
Numaconnect systems with NumaChip.

Address this assumption by searching the Northbridge ID array, rather than
directly indexing it, using the upper bits for the PCI domain.

RFC->v2: Correct array initialisation
v2->v3: Add Boris's neater linked list approach

Todo:
1. fix kobject/sysfs oops (see http://quora.org/2012/16-server-boot.txt later)
2. reorder amd64_edac.c or add amd64_per_family_init/pci_get_related_function
forward declarations, based on feedback

Signed-off-by: Daniel J Blueman 


This patch contains code from both of us and thus needs both our SOBs:

Signed-off-by: Borislav Petkov 


I'll use "Based-on-patch-from: Borislav Petkov ", great.


---
  arch/x86/include/asm/amd_nb.h|   63 +++-
  arch/x86/include/asm/numachip/numachip.h |   22 ++
  arch/x86/kernel/amd_gart_64.c|8 +-
  arch/x86/kernel/amd_nb.c |   85 -
  arch/x86/pci/numachip.c  |  121 ++
  drivers/char/agp/amd64-agp.c |   12 +--
  drivers/edac/amd64_edac.c|   34 +
  drivers/edac/amd64_edac.h|6 --
  8 files changed, 283 insertions(+), 68 deletions(-)
  create mode 100644 arch/x86/include/asm/numachip/numachip.h
  create mode 100644 arch/x86/pci/numachip.c

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..6a27226 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -4,6 +4,8 @@
  #include 
  #include 

+#define NUM_POSSIBLE_NBS   8
+
  struct amd_nb_bus_dev_range {
u8 bus;
u8 dev_base;
@@ -51,12 +53,22 @@ struct amd_northbridge {
struct pci_dev *link;
struct amd_l3_cache l3_cache;
struct threshold_bank *bank4;
+   u16 node;
+   struct list_head nbl;
  };

  struct amd_northbridge_info {
u16 num;
u64 flags;
-   struct amd_northbridge *nb;
+
+   /*
+* The first 8 elems are for fast lookup of NB descriptors on single-
+* system setups, i.e. "normal" boxes. The nb_list, OTOH, is list of
+* additional NB descriptors which exist on confederate systems
+* like using Numascale's Numaconnect/NumaChip.
+*/
+   struct amd_northbridge *nbs[NUM_POSSIBLE_NBS];
+   struct list_head nb_list;
  };
  extern struct amd_northbridge_info amd_northbridges;

@@ -78,7 +90,54 @@ static inline bool amd_nb_has_feature(unsigned feature)

  static inline struct amd_northbridge *node_to_amd_nb(int node)
  {
-   return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
+   struct amd_northbridge_info *nbi = &amd_northbridges;
+   struct amd_northbridge *nb;
+   int i;
+
+   /* Quick search for first domain */
+   if (node < NUM_POSSIBLE_NBS) {
+   if (node < nbi->num)
+   return nbi->nbs[node];
+   else
+   return NULL;
+   }


Why change that here from what I had before?

nbi->nbs[node] will either return a valid descriptor or NULL because it
is statically allocated in amd_northbridge_info.

So why add a conditional where you clearly don't need it?


True; fixed up.


+   /* Search for NBs from later domains in array */
+   for (i = 0; i < NUM_POSSIBLE_NBS; i++)
+   if (nbi->nbs[i]->node == node)
+   return nbi->nbs[i];


And then this is not needed.


Eg with two servers with two Northbridges per server, interconnected, 
Linux sees two PCI domains (bits 15:3) and the nbs array would have node 
IDs:


[0x00]
[0x01]
[0x08]
[0x09]

Without that check, searching for node 0x08 would only hit the linked 
list, though this doesn't affect the fast-path (id < 0x8) of course.


We can use the static array for only the first PCI domain by changing 
_alloc_nb_desc to use the list when nbi->node > NUM_POSSIBLE_NBS, rather 
than nbi->num; we'd then need to introduce a variable to struct 
amd_northbridge_info to keep track of how many static array entries are 
used, for a linear lookup in index_to_amd_nb.



+
+   list_for_each_entry(nb, &nbi->nb_list, nbl)
+   if (node == nb->node)
+   return nb;


And why change the list_for_each_entry_safe variant? It is not needed
now but who knows what code changes where in the future.


Changed also.


+
+   return NULL;
+}
+
+static inline struct amd_northbridge *index_to_amd_nb(int index)
+{
+   struct amd_northbridge_inf

Re: [PATCH v3] Add support for AMD64 EDAC on multiple PCI domains

2012-10-29 Thread Daniel J Blueman

On 29/10/2012 14:17, Daniel J Blueman wrote:

On 25/10/2012 19:03, Borislav Petkov wrote:

On Thu, Oct 25, 2012 at 04:32:52PM +0800, Daniel J Blueman wrote:

The AMD Northbridge initialisation code and EDAC assume the
Northbridge IDs
are contiguous, which no longer holds on federated systems with multiple
HyperTransport fabrics and multiple PCI domains, eg on Numascale's
Numaconnect systems with NumaChip.

Address this assumption by searching the Northbridge ID array, rather
than
directly indexing it, using the upper bits for the PCI domain.

RFC->v2: Correct array initialisation
v2->v3: Add Boris's neater linked list approach

Todo:
1. fix kobject/sysfs oops (see
http://quora.org/2012/16-server-boot.txt later)
2. reorder amd64_edac.c or add
amd64_per_family_init/pci_get_related_function
forward declarations, based on feedback

Signed-off-by: Daniel J Blueman 


This patch contains code from both of us and thus needs both our SOBs:

Signed-off-by: Borislav Petkov 


I'll use "Based-on-patch-from: Borislav Petkov ", great.


---
  arch/x86/include/asm/amd_nb.h|   63 +++-
  arch/x86/include/asm/numachip/numachip.h |   22 ++
  arch/x86/kernel/amd_gart_64.c|8 +-
  arch/x86/kernel/amd_nb.c |   85 -
  arch/x86/pci/numachip.c  |  121
++
  drivers/char/agp/amd64-agp.c |   12 +--
  drivers/edac/amd64_edac.c|   34 +
  drivers/edac/amd64_edac.h|6 --
  8 files changed, 283 insertions(+), 68 deletions(-)
  create mode 100644 arch/x86/include/asm/numachip/numachip.h
  create mode 100644 arch/x86/pci/numachip.c

diff --git a/arch/x86/include/asm/amd_nb.h
b/arch/x86/include/asm/amd_nb.h
index b3341e9..6a27226 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -4,6 +4,8 @@
  #include 
  #include 

+#define NUM_POSSIBLE_NBS8
+
  struct amd_nb_bus_dev_range {
  u8 bus;
  u8 dev_base;
@@ -51,12 +53,22 @@ struct amd_northbridge {
  struct pci_dev *link;
  struct amd_l3_cache l3_cache;
  struct threshold_bank *bank4;
+u16 node;
+struct list_head nbl;
  };

  struct amd_northbridge_info {
  u16 num;
  u64 flags;
-struct amd_northbridge *nb;
+
+/*
+ * The first 8 elems are for fast lookup of NB descriptors on
single-
+ * system setups, i.e. "normal" boxes. The nb_list, OTOH, is
list of
+ * additional NB descriptors which exist on confederate systems
+ * like using Numascale's Numaconnect/NumaChip.
+ */
+struct amd_northbridge *nbs[NUM_POSSIBLE_NBS];
+struct list_head nb_list;
  };
  extern struct amd_northbridge_info amd_northbridges;

@@ -78,7 +90,54 @@ static inline bool amd_nb_has_feature(unsigned
feature)

  static inline struct amd_northbridge *node_to_amd_nb(int node)
  {
-return (node < amd_northbridges.num) ?
&amd_northbridges.nb[node] : NULL;
+struct amd_northbridge_info *nbi = &amd_northbridges;
+struct amd_northbridge *nb;
+int i;
+
+/* Quick search for first domain */
+if (node < NUM_POSSIBLE_NBS) {
+if (node < nbi->num)
+return nbi->nbs[node];
+else
+return NULL;
+}


Why change that here from what I had before?

nbi->nbs[node] will either return a valid descriptor or NULL because it
is statically allocated in amd_northbridge_info.

So why add a conditional where you clearly don't need it?


True; fixed up.


+/* Search for NBs from later domains in array */
+for (i = 0; i < NUM_POSSIBLE_NBS; i++)
+if (nbi->nbs[i]->node == node)
+return nbi->nbs[i];


And then this is not needed.


Eg with two servers with two Northbridges per server, interconnected,
Linux sees two PCI domains (bits 15:3) and the nbs array would have node
IDs:

[0x00]
[0x01]
[0x08]
[0x09]

Without that check, searching for node 0x08 would only hit the linked
list, though this doesn't affect the fast-path (id < 0x8) of course.

We can use the static array for only the first PCI domain by changing
_alloc_nb_desc to use the list when nbi->node > NUM_POSSIBLE_NBS, rather
than nbi->num; we'd then need to introduce a variable to struct
amd_northbridge_info to keep track of how many static array entries are
used, for a linear lookup in index_to_amd_nb.


+
+list_for_each_entry(nb, &nbi->nb_list, nbl)
+if (node == nb->node)
+return nb;


And why change the list_for_each_entry_safe variant? It is not needed
now but who knows what code changes where in the future.


Changed also.


+
+return NULL;
+}
+
+static inline struct amd_northbridge *index_to_amd_nb(int index)
+{
+struct amd_northbridge_info *nbi = &amd_northbridges;
+struct amd_northbridge *nb;
+int count = NUM_POSSIBLE_NBS;
+
+if (index <

Re: [PATCH v3] Add support for AMD64 EDAC on multiple PCI domains

2012-10-30 Thread Daniel J Blueman

On 29/10/2012 18:32, Borislav Petkov wrote:

+ Andreas.

Dude, look at this boot log below:

http://quora.org/2012/16-server-boot-2.txt

That's 192 F10h's!


We were booting 384 a while back, but I'll let you know when reach 4096!


On Mon, Oct 29, 2012 at 04:54:59PM +0800, Daniel J Blueman wrote:

A number of other callers lookup the PCI device based on index
0..amd_nb_num(), but we can't easily allocate contiguous northbridge IDs

>from the PCI device in the first place.


OTOH we can simply this code by changing amd_get_node_id to generate a
linear northbridge ID from the index of the matching entry in the
northbridge array.

I'll get a patch together to see if there are any snags.


I suspected that after we have this nice approach, you guys would come
with non-contiguous node numbers. Maan, can't you build your systems so
that software people can have it easy at least for once??!


It depends on the definition of node, of course. The only changes we're 
considering is compliance with the Intel x2apic spec with using the 
upper 16-bits of the APIC ID as the server ("cluster") ID, since there 
are optimisations in Linux for this.



This really is a lot less intrusive [1] and boots well on top of
3.7-rc3 on one of our 16-server/192-core/512GB systems [2].

If you're happy with this simpler approach for now, I'll present
this and a separate patch cleaning up the inconsistent use of
unsigned and u8 node ID variables to u16?


Sure, bring it on.


Yes, I've prepared a patch series and it tests out well.


diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..b88fc7a 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,18 @@ static inline struct amd_northbridge
*node_to_amd_nb(int node)
 return (node < amd_northbridges.num) ?
&amd_northbridges.nb[node] : NULL;
  }

+static inline u8 get_node_id(struct pci_dev *pdev)
+{
+   int i;
+
+   for (i = 0; i != amd_nb_num(); i++)
+   if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) ==
pci_domain_nr(pdev->bus) &&
+   PCI_SLOT(node_to_amd_nb(i)->misc->devfn) ==
PCI_SLOT(pdev->devfn))
+   return i;


Looks ok, can you send the whole patch please?


+   BUG();


I'm not sure about this - maybe WARN()? Are we absolutely sure we
unconditionally should panic after not finding an NB descriptor?


It looks like the only way we could be looking up a non-existent NB 
descriptor is if the array or variable in hand was corrupted. Maybe 
better to panic immediately debugging to be elusive later.


I've tweaked this to warn and return the first Northbridge ID to avoid 
further issues, but even that isn't ideal.



Btw, this shouldn't happen on those CPUs:

[   39.279131] TSC synchronization [CPU#0 -> CPU#12]:
[   39.287223] Measured 22750019569 cycles TSC warp between CPUs, turning off 
TSC clock.
[0.03] tsc: Marking TSC unstable due to check_tsc_sync_source failed

I guess TSCs are not starting at the same moment on all boards.


As these are physically separate servers (off-the-shelf servers in fact, 
a key benefit of NumaConnect), the TSC clocks diverge. Later, I'll be 
cooking up a patch series to keep them in sync, allowing fast TSC use.



You definitely need ucode on those too:

[  113.392460] microcode: CPU0: patch_level=0x


Good tip!

Thanks,
  Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/4, v4] AMD64 EDAC: Add multi-domain support to AMD EDAC

2012-10-30 Thread Daniel J Blueman
Fix the handling of memory controller detection to index the array
of detected Northbridges, allowing memory controllers over multiple
PCI domains in federated systems eg using Numascale's NumaConnect/
NumaChip.

v4: Generate linear Northbridge ID by indexing detected Northbridges

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |   12 
 drivers/edac/amd64_edac.c |   18 ++
 drivers/edac/amd64_edac.h |6 --
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..b88fc7a 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int 
node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
 
+static inline u16 get_node_id(struct pci_dev *pdev)
+{
+   int i;
+
+   for (i = 0; i != amd_nb_num(); i++)
+   if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == 
pci_domain_nr(pdev->bus) &&
+   PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == 
PCI_SLOT(pdev->devfn))
+   return i;
+
+   WARN(1, "Unable to find AMD Northbridge identifier\n");
+   return 0;
+}
+
 #else
 
 #define amd_nb_num(x)  0
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index cc8e7c7..18d404a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -982,6 +982,9 @@ static u64 get_error_address(struct mce *m)
return addr;
 }
 
+static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt);
+static struct pci_dev *pci_get_related_function(unsigned int vendor, unsigned 
int device, struct pci_dev *related);
+
 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 {
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1001,11 +1004,17 @@ static void read_dram_base_limit_regs(struct amd64_pvt 
*pvt, unsigned range)
 
/* Factor in CC6 save area by reading dst node's limit reg */
if (c->x86 == 0x15) {
-   struct pci_dev *f1 = NULL;
-   u8 nid = dram_dst_node(pvt, range);
+   struct pci_dev *misc, *f1 = NULL;
+   struct amd64_family_type *fam_type;
+   u16 nid = dram_dst_node(pvt, range);
u32 llim;
 
-   f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 
1));
+   misc = node_to_amd_nb(nid)->misc;
+   fam_type = amd64_per_family_init(pvt);
+   if (WARN_ON(!f1))
+   return;
+
+   f1 = pci_get_related_function(misc->vendor, fam_type->f1_id, 
misc);
if (WARN_ON(!f1))
return;
 
@@ -1720,7 +1729,8 @@ static struct pci_dev *pci_get_related_function(unsigned 
int vendor,
 
dev = pci_get_device(vendor, device, dev);
while (dev) {
-   if ((dev->bus->number == related->bus->number) &&
+   if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
+   (dev->bus->number == related->bus->number) &&
(PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
break;
dev = pci_get_device(vendor, device, dev);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8d48047..90cae61 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -290,12 +290,6 @@
 /* MSRs */
 #define MSR_MCGCTL_NBE BIT(4)
 
-/* AMD sets the first MC device at device ID 0x18. */
-static inline u8 get_node_id(struct pci_dev *pdev)
-{
-   return PCI_SLOT(pdev->devfn) - 0x18;
-}
-
 enum amd_families {
K8_CPUS = 0,
F10_CPUS,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/4] AMD64 EDAC: Add support for >255 memory controllers

2012-10-30 Thread Daniel J Blueman
As the AMD64 last-level-cache ID is 16-bits and federated systems
eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
controllers, use 16-bits to store the ID.

Signed-off-by: Daniel J Blueman 
---
 drivers/edac/amd64_edac.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 18d404a..9920dfd 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,7 @@ static u64 get_error_address(struct mce *m)
struct amd64_pvt *pvt;
u64 cc6_base, tmp_addr;
u32 tmp;
-   u8 mce_nid, intlv_en;
+   u16 mce_nid, intlv_en;
 
if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
return addr;
@@ -1499,7 +1499,7 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, 
unsigned range,
u8 channel;
bool high_range = false;
 
-   u8 node_id= dram_dst_node(pvt, range);
+   u16 node_id   = dram_dst_node(pvt, range);
u8 intlv_en   = dram_intlv_en(pvt, range);
u32 intlv_sel = dram_intlv_sel(pvt, range);
 
@@ -2306,7 +2306,7 @@ out:
return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
cpumask_var_t cmask;
int cpu;
@@ -2344,7 +2344,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings 
*s, u8 nid, bool on)
return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
   struct pci_dev *F3)
 {
bool ret = true;
@@ -2396,7 +2396,7 @@ static bool enable_ecc_error_reporting(struct 
ecc_settings *s, u8 nid,
return ret;
 }
 
-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
struct pci_dev *F3)
 {
u32 value, mask = 0x3;  /* UECC/CECC enable */
@@ -2435,7 +2435,7 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";
 
-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 {
u32 value;
u8 ecc_en = 0;
@@ -2556,7 +2556,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u16 nid = get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2647,7 +2647,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = get_node_id(pdev);
+   u16 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2697,7 +2697,7 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
-   u8 nid = get_node_id(pdev);
+   u16 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] AMD64 EDAC: Cleanup type usage to be consistent

2012-10-30 Thread Daniel J Blueman
As the Northbridge IDs are at most 16-bits, use the same type
consistently.

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h|2 +-
 arch/x86/include/asm/processor.h |2 +-
 arch/x86/kernel/cpu/amd.c|4 ++--
 drivers/edac/amd64_edac.c|   26 ++
 drivers/edac/amd64_edac.h|2 +-
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b88fc7a..0cc1045 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline struct amd_northbridge *node_to_amd_nb(u16 node)
 {
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ad1fc85..eb3ba58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned 
long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);
 
 struct aperfmperf {
u64 aperf, mperf;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f7e98a2..52cab1f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-int amd_get_nb_id(int cpu)
+u16 amd_get_nb_id(int cpu)
 {
-   int id = 0;
+   u16 id = 0;
 #ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
 #endif
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9920dfd..12cd675 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
  * DRAM base/limit associated with node_id
  */
 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
-  unsigned nid)
+  u16 nid)
 {
u64 addr;
 
@@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct 
mem_ctl_info *mci,
u64 sys_addr)
 {
struct amd64_pvt *pvt;
-   unsigned node_id;
+   u16 node_id;
u32 intlv_en, bits;
 
/*
@@ -613,7 +613,8 @@ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, 
u64 sys_addr)
 static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
 {
struct amd64_pvt *pvt;
-   unsigned node_id, intlv_shift;
+   u16 node_id;
+   unsigned intlv_shift;
u64 bits, dram_addr;
u32 intlv_sel;
 
@@ -1337,7 +1338,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, 
u64 sys_addr,
 }
 
 /* Convert the sys_addr to the normalized DCT address */
-static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
+static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u16 range,
 u64 sys_addr, bool hi_rng,
 u32 dct_sel_base_addr)
 {
@@ -1413,7 +1414,7 @@ static int f10_process_possible_spare(struct amd64_pvt 
*pvt, u8 dct, int csrow)
  * -EINVAL:  NOT FOUND
  * 0..csrow = Chip-Select Row
  */
-static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
+static int f1x_lookup_addr_in_dct(u64 in_addr, u16 nid, u8 dct)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
@@ -1491,7 +1492,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt 
*pvt, u64 sys_addr)
 
 /* For a given @dram_range, check if @sys_addr falls within it. */
 static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
- u64 sys_addr, int *nid, int *chan_sel)
+ u64 sys_addr, u16 *nid, int *chan_sel)
 {
int cs_found = -EINVAL;
u64 chan_addr;
@@ -1572,10 +1573,10 @@ static int f1x_match_to_this_node(struct amd64_pvt 
*pvt, unsigned range,
 }
 
 static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
-  int *node, int *chan_sel)
+  u16 *node, int *chan_sel)
 {
int cs_found = -EINVAL;
-   unsigned range;
+   u16 range;
 
for (range = 0; range < DRAM_RANGES; range++) {
 
@@ -1607,7 +1608,8 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info 
*mci, u64 sys_addr,
 {
struct amd64_pvt *pvt = mci->pvt_info;
u32 page, offset;
-   int nid, csrow, chan = 0;
+   int csrow, chan = 0;
+   u16 nid;
 
error_address_to_page_and_offset(sys_addr, &page, &offs

[PATCH 4/4] AMD64 EDAC: Use appropriate name for NB indexing

2012-10-30 Thread Daniel J Blueman
Use the same 'amd' prefix as related functions for clarity.

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |2 +-
 drivers/edac/amd64_edac.c |6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 0cc1045..39b5ddd 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,7 +81,7 @@ static inline struct amd_northbridge *node_to_amd_nb(u16 node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
 
-static inline u16 get_node_id(struct pci_dev *pdev)
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
 {
int i;
 
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 12cd675..59658b9 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2558,7 +2558,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u16 nid = get_node_id(F2);
+   u16 nid = amd_get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2649,7 +2649,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u16 nid = get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2699,7 +2699,7 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
-   u16 nid = get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/4] AMD64 EDAC: Add support for >255 memory controllers

2012-10-31 Thread Daniel J Blueman

On 31/10/2012 16:18, Torsten Kaiser wrote:

On Wed, Oct 31, 2012 at 6:55 AM, Daniel J Blueman
 wrote:

As the AMD64 last-level-cache ID is 16-bits and federated systems
eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
controllers, use 16-bits to store the ID.

Signed-off-by: Daniel J Blueman 
---
  drivers/edac/amd64_edac.c |   18 +-
  1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 18d404a..9920dfd 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,7 @@ static u64 get_error_address(struct mce *m)
 struct amd64_pvt *pvt;
 u64 cc6_base, tmp_addr;
 u32 tmp;
-   u8 mce_nid, intlv_en;
+   u16 mce_nid, intlv_en;


Is the change of intlv_en to u16 intentional?
I assume its not, because...


It's unintentional. Elsewhere, intlv_en is declared as unsigned, so 
perhaps that should be cleaned up later too.


I'll issue an updated patch.


 if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
 return addr;
@@ -1499,7 +1499,7 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, 
unsigned range,
 u8 channel;
 bool high_range = false;

-   u8 node_id= dram_dst_node(pvt, range);
+   u16 node_id   = dram_dst_node(pvt, range);
 u8 intlv_en   = dram_intlv_en(pvt, range);


... here you keep it at u8.


 u32 intlv_sel = dram_intlv_sel(pvt, range);

@@ -2306,7 +2306,7 @@ out:
 return ret;
  }

-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
  {
 cpumask_var_t cmask;
 int cpu;
@@ -2344,7 +2344,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings 
*s, u8 nid, bool on)
 return 0;
  }

-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
struct pci_dev *F3)
  {
 bool ret = true;
@@ -2396,7 +2396,7 @@ static bool enable_ecc_error_reporting(struct 
ecc_settings *s, u8 nid,
 return ret;
  }

-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
 struct pci_dev *F3)
  {
 u32 value, mask = 0x3;  /* UECC/CECC enable */
@@ -2435,7 +2435,7 @@ static const char *ecc_msg =
 "'ecc_enable_override'.\n"
 " (Note that use of the override may cause unknown side effects.)\n";

-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
  {
 u32 value;
 u8 ecc_en = 0;
@@ -2556,7 +2556,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
 struct mem_ctl_info *mci = NULL;
 struct edac_mc_layer layers[2];
 int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u16 nid = get_node_id(F2);

 ret = -ENOMEM;
 pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2647,7 +2647,7 @@ err_ret:
  static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
  const struct pci_device_id 
*mc_type)
  {
-   u8 nid = get_node_id(pdev);
+   u16 nid = get_node_id(pdev);
 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 struct ecc_settings *s;
 int ret = 0;
@@ -2697,7 +2697,7 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
  {
 struct mem_ctl_info *mci;
 struct amd64_pvt *pvt;
-   u8 nid = get_node_id(pdev);
+   u16 nid = get_node_id(pdev);
 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 struct ecc_settings *s = ecc_stngs[nid];

--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/4, v2] AMD64 EDAC: Add support for >255 memory controllers

2012-10-31 Thread Daniel J Blueman
As the AMD64 last-level-cache ID is 16-bits and federated systems
eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
controllers, use 16-bits to store the ID.

v2: Avoid change to intlv_en variable

Signed-off-by: Daniel J Blueman 
---
 drivers/edac/amd64_edac.c |   19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 18d404a..28b2005 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m)
struct amd64_pvt *pvt;
u64 cc6_base, tmp_addr;
u32 tmp;
-   u8 mce_nid, intlv_en;
+   u16 mce_nid;
+   u8 intlv_en;
 
if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
return addr;
@@ -1499,7 +1500,7 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, 
unsigned range,
u8 channel;
bool high_range = false;
 
-   u8 node_id= dram_dst_node(pvt, range);
+   u16 node_id   = dram_dst_node(pvt, range);
u8 intlv_en   = dram_intlv_en(pvt, range);
u32 intlv_sel = dram_intlv_sel(pvt, range);
 
@@ -2306,7 +2307,7 @@ out:
return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
cpumask_var_t cmask;
int cpu;
@@ -2344,7 +2345,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings 
*s, u8 nid, bool on)
return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
   struct pci_dev *F3)
 {
bool ret = true;
@@ -2396,7 +2397,7 @@ static bool enable_ecc_error_reporting(struct 
ecc_settings *s, u8 nid,
return ret;
 }
 
-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
struct pci_dev *F3)
 {
u32 value, mask = 0x3;  /* UECC/CECC enable */
@@ -2435,7 +2436,7 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";
 
-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 {
u32 value;
u8 ecc_en = 0;
@@ -2556,7 +2557,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u16 nid = get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2647,7 +2648,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = get_node_id(pdev);
+   u16 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2697,7 +2698,7 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
-   u8 nid = get_node_id(pdev);
+   u16 nid = get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix printing when no interrupt is allocated

2012-10-31 Thread Daniel J Blueman

Hi Len,

On 19/10/2012 09:21, Joe Perches wrote:

On Fri, 2012-10-19 at 08:33 +0800, Daniel J Blueman wrote:

Previously a new line is implicitly added in the no GSI case:

[7.185182] pci 0001:00:12.0: can't derive routing for PCI INT A
[7.191352] pci 0001:00:12.0: PCI INT A: no GSI
[7.195956]  - using ISA IRQ 10

The code thus prints a blank line where no legacy IRQ is available:

[1.650124] pci :00:14.0: can't derive routing for PCI INT A
[1.650126] pci :00:14.0: PCI INT A: no GSI
[1.650126]
[1.650180] pci :00:14.0: can't derive routing for PCI INT A

Fix this by making the newline explicit and removing the superfluous
one.


I think this is a better fix:

  drivers/acpi/pci_irq.c | 11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 0eefa12..9b98f9f 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -459,19 +459,20 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 */
if (gsi < 0) {
u32 dev_gsi;
-   dev_warn(&dev->dev, "PCI INT %c: no GSI", pin_name(pin));
/* Interrupt Line values above 0xF are forbidden */
if (dev->irq > 0 && (dev->irq <= 0xF) &&
(acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) {
-   printk(" - using ISA IRQ %d\n", dev->irq);
+   dev_warn(&dev->dev,
+"PCI INT %c: no GSI - using ISA IRQ %d\n",
+pin_name(pin), dev->irq);
acpi_register_gsi(&dev->dev, dev_gsi,
  ACPI_LEVEL_SENSITIVE,
  ACPI_ACTIVE_LOW);
-   return 0;
} else {
-   printk("\n");
-   return 0;
+   dev_warn(&dev->dev, "PCI INT %c: no GSI\n",
+pin_name(pin));
}
+   return 0;
}

rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);


We're still seeing this in 3.7-rc3. Any preference to the approach here?

Thanks,
  Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


BCM57765 card reader: irq nobody cared

2013-07-12 Thread Daniel J Blueman
With the Broadcom BCM57765 card reader [1] in my Macbook Pro 10,1 (Mid
2012), we see the sdhci-pci interrupt handler not claim the interrupt
generated when the module initialises [2].

Beyond the MMC subsystem output, what other data may be useful in
diagnosing this?

Many thanks,
  Daniel

--- [1]

$ sudo lspci -s 03:00.1 -v
03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765
Memory Card Reader (rev 10) (prog-if 01)
Subsystem: Broadcom Corporation Device 96bc
Flags: bus master, fast devsel, latency 0, IRQ 17
Memory at c182 (64-bit, prefetchable) [size=64K]
Capabilities: [48] Power Management version 3
Capabilities: [58] MSI: Enable- Count=1/1 Maskable- 64bit+
Capabilities: [ac] Express Endpoint, MSI 00
Capabilities: [100] Advanced Error Reporting
Capabilities: [150] Power Budgeting 
Capabilities: [160] Virtual Channel
Kernel driver in use: sdhci-pci

--- [2]

sdhci: Secure Digital Host Controller Interface driver
sdhci: Copyright(c) Pierre Ossman
sdhci-pci :03:00.1: SDHCI controller found [14e4:16bc] (rev 10)
mmc0: no vqmmc regulator found
mmc0: no vmmc regulator found
mmc0: SDHCI controller on PCI [:03:00.1] using ADMA
[...]
irq 17: nobody cared (try booting with the "irqpoll" option)
CPU: 0 PID: 0 Comm: swapper/0 Tainted: GW  3.10.0-031000-generic
#201306301935
Hardware name: Apple Inc. MacBookPro10,1/Mac-C3EC7CD22292981F, BIOS
MBP101.88Z.00EE.B02.1208081132 08/08/2012
 8802635cd89c 88026f203e48 81703460 88026f203e78
 810f0d6d 00029d3daf00 8802635cd800 0011
  88026f203ea8 810f1195 0011
Call Trace:
  [] dump_stack+0x19/0x1b
 [] __report_bad_irq+0x3d/0xe0
 [] note_interrupt+0x135/0x190
 [] handle_irq_event_percpu+0xa9/0x210
 [] ? sched_clock+0x9/0x10
 [] handle_irq_event+0x4e/0x80
 [] handle_fasteoi_irq+0x64/0x120
 [] handle_irq+0x22/0x40
 [] do_IRQ+0x5a/0xe0
 [] common_interrupt+0x6d/0x6d
  [] ? rcu_eqs_enter_common.isra.48+0x43/0x100
 [] ? cpuidle_enter_state+0x61/0xe0
 [] ? cpuidle_enter_state+0x57/0xe0
 [] cpuidle_idle_call+0xc0/0x220
 [] arch_cpu_idle+0xe/0x30
 [] cpu_idle_loop+0x7e/0x250
 [] cpu_startup_entry+0x6b/0x70
 [] rest_init+0x77/0x80
 [] start_kernel+0x40c/0x419
 [] ? do_early_param+0x87/0x87
 [] ? early_idt_handlers+0x120/0x120
 [] x86_64_start_reservations+0x2a/0x2c
 [] x86_64_start_kernel+0xf3/0x102
handlers:
[] sdhci_irq [sdhci]
Disabling IRQ #17
--
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] thp: support split page table lock

2013-09-08 Thread Daniel J Blueman

On Saturday, 7 September 2013 02:10:02 UTC+8, Naoya Horiguchi  wrote:

Hi Alex,

On Fri, Sep 06, 2013 at 11:04:23AM -0500, Alex Thorlton wrote:
> On Thu, Sep 05, 2013 at 05:27:46PM -0400, Naoya Horiguchi wrote:
> > Thp related code also uses per process mm->page_table_lock now.
> > So making it fine-grained can provide better performance.
> >
> > This patch makes thp support split page table lock by using page->ptl
> > of the pages storing "pmd_trans_huge" pmds.
> >
> > Some functions like pmd_trans_huge_lock() and

page_check_address_pmd()

> > are expected by their caller to pass back the pointer of ptl, so this
> > patch adds to those functions new arguments for that. Rather than

that,

> > this patch gives only straightforward replacement.
> >
> > ChangeLog v3:
> >  - fixed argument of huge_pmd_lockptr() in copy_huge_pmd()
> >  - added missing declaration of ptl in do_huge_pmd_anonymous_page()
>
> I've applied these and tested them using the same tests program that I
> used when I was working on the same issue, and I'm running into some
> bugs.  Here's a stack trace:

Thank you for helping testing. This bug is new to me.


With 3.11, this patch series and CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS, I 
consistently hit the same failure when exiting one of my stress-testers 
[1] when using eg 24 cores.


Doesn't happen with 8 cores, so likely needs enough virtual memory to 
use multiple split locks. Otherwise, this is very promising work!


[1] http://quora.org/2013/fft3d.c
--
Daniel J Blueman
Principal Software Engineer, Numascale
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: BCM57765: timeout waiting for hardware interrupt

2013-09-03 Thread Daniel J Blueman
On 3 September 2013 19:04, Chris Ball  wrote:
> On Tue, Sep 03 2013, Daniel J Blueman wrote:
>> Please let me know if there's a better vector for reporting and
>> looking into this issue, if you can.
>
> Do you know whether it's ever worked on this hardware?  If so, could
> you try bisecting to find the first bad commit?

Yes; the card reader works when an ethernet cable is plugged into the
first PCI function on the chip, the Broadcom NIC (see comment #3):
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1067222

Google also use this in one of their Chromebook models:
https://groups.google.com/a/chromium.org/forum/#!msg/chromium-os-reviews/nwFj3KVQy_Y/FOckx1trSkUJ

> If not, I suspect it's going to be very difficult to debug/fix
> remotely, and we need to wait for someone who has the hardware and
> wants to create the fix.  (Or get the hardware into the hands of
> someone who's willing to take a look.)

Got hardware and willing to take a look; just was wanting some tips on
where to look with your experience of SDHCI/MMC.

Doing some further debugging, the card reader's parent device (the
NIC) is in D3 [1] while the card reader is in D0. I'll add the missing
device IDs to the tg3 driver and force it to bring the NIC into D0 and
see what happens.

Daniel

--- [1]

$ sudo lspci -s 3:0 -vv
03:00.0 Ethernet controller: Broadcom Corporation Device 16a3 (rev 10)
Subsystem: Broadcom Corporation Device 16b4
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- 
Capabilities: [160 v1] Virtual Channel
Caps:LPEVC=0 RefClk=100ns PATEntryBits=1
Arb:Fixed- WRR32- WRR64- WRR128-
Ctrl:ArbSelect=Fixed
Status:InProgress-
VC0:Caps:PATOffset=00 MaxTimeSlots=1 RejSnoopTrans-
Arb:Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256-
Ctrl:Enable+ ID=0 ArbSelect=Fixed TC/VC=ff
Status:NegoPending- InProgress-

03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765
Memory Card Reader (rev 10) (prog-if 01)
Subsystem: Broadcom Corporation Device 96bc
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- 
Capabilities: [160 v1] Virtual Channel
Caps:LPEVC=0 RefClk=100ns PATEntryBits=1
Arb:Fixed- WRR32- WRR64- WRR128-
Ctrl:ArbSelect=Fixed
Status:InProgress-
VC0:Caps:PATOffset=00 MaxTimeSlots=1 RejSnoopTrans-
Arb:Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256-
Ctrl:Enable+ ID=0 ArbSelect=Fixed TC/VC=ff
Status:NegoPending- InProgress-
Kernel driver in use: sdhci-pci
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [3.6-rc7] switcheroo race with Intel HDA...

2012-10-08 Thread Daniel J Blueman
On 8 October 2012 20:58, Takashi Iwai  wrote:
> At Tue, 25 Sep 2012 13:20:05 +0800,
> Daniel J Blueman wrote:
>> On my Macbook with a discrete Nvidia GPU, there is a race between
>> selecting the integrated GPU and putting the discrete GPU into D3 [1],
>> reliably causing a kernel oops [2].
>>
>> Introducing a delay of ~1s between the calls prevents this. When the
>> second 'OFF' write path executes, it looks like struct azx at
>> card->private_data hasn't yet been allocated yet [3], so there is
>> likely some locking missing.
>
> It's rather pci_get_drvdata() returning NULL (i.e. card is NULL, thus
> card->private_data causes Oops).  Could you check the patch like below
> and see whether you get a kernel warning (but no Oops) or the problem
> gets fixed by shifting the assignment of pci drvdata?
[...]

Good patching. Calling pci_set_drvdata later prevents the oops in HDA,
though we see unexpected 0x0 responses in the response ring buffer
[1], which we don't see when there's a >~1.5s delay between IGD and
OFF.

Thanks,
  Daniel

--- [1]

snd_hda_intel :00:1b.0: enabling device ( -> 0002)
snd_hda_intel :00:1b.0: irq 55 for MSI/MSI-X
vga_switcheroo: enabled
input: HDA Intel PCH Headphone as
/devices/pci:00/:00:1b.0/sound/card0/input11
snd_hda_intel :01:00.1: enabling device ( -> 0002)
{echo IGD >/sys/kernel/debug/vgaswitcheroo/switch}
{echo OFF >/sys/kernel/debug/vgaswitcheroo/switch}
hda_intel: Disabling MSI
hda-intel: :01:00.1: Handle VGA-switcheroo audio client
hda-intel: Disabling :01:00.1 via VGA-switcheroo
VGA switcheroo: switched nouveau off
[drm] nouveau :01:00.0: Disabling display...
[drm] nouveau :01:00.0: Disabling fbcon...
[drm] nouveau :01:00.0: Unpinning framebuffer(s)...
[drm] nouveau :01:00.0: Evicting buffers...
[drm] nouveau :01:00.0: Idling channels...
[drm] nouveau :01:00.0: Suspending GPU objects...
[drm] nouveau :01:00.0: And we're gone!
hda-intel: spurious response 0x0:0x0, last cmd=0x1f0004
{repeats 220 times}
hda-intel: spurious response 0x0:0x0, last cmd=0x1f0004
HDMI: failed to get afg sub nodes
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [3.6-rc7] switcheroo race with Intel HDA...

2012-10-09 Thread Daniel J Blueman
On 9 October 2012 18:07, Takashi Iwai  wrote:
> At Tue, 09 Oct 2012 12:04:08 +0200,
> Takashi Iwai wrote:
>>
>> At Tue, 9 Oct 2012 00:34:09 +0800,
>> Daniel J Blueman wrote:
>> >
>> > On 8 October 2012 20:58, Takashi Iwai  wrote:
>> > > At Tue, 25 Sep 2012 13:20:05 +0800,
>> > > Daniel J Blueman wrote:
>> > >> On my Macbook with a discrete Nvidia GPU, there is a race between
>> > >> selecting the integrated GPU and putting the discrete GPU into D3 [1],
>> > >> reliably causing a kernel oops [2].
>> > >>
>> > >> Introducing a delay of ~1s between the calls prevents this. When the
>> > >> second 'OFF' write path executes, it looks like struct azx at
>> > >> card->private_data hasn't yet been allocated yet [3], so there is
>> > >> likely some locking missing.
>> > >
>> > > It's rather pci_get_drvdata() returning NULL (i.e. card is NULL, thus
>> > > card->private_data causes Oops).  Could you check the patch like below
>> > > and see whether you get a kernel warning (but no Oops) or the problem
>> > > gets fixed by shifting the assignment of pci drvdata?
>> > [...]
>> >
>> > Good patching. Calling pci_set_drvdata later prevents the oops in HDA,
>> > though we see unexpected 0x0 responses in the response ring buffer
>> > [1], which we don't see when there's a >~1.5s delay between IGD and
>> > OFF.
>>
>> If the previous patch fixed, it means that the switching occurred
>> during the device was being probed.  Maybe a better approach to
>> register the VGA switcheroo after the proper initialization.
>>
>> The patch below is a revised one.  Please give it a try.
>
> Also, it's not clear which card spews the spurious response.
> Apply the patch below in addition.
[...]

hda-intel: :01:00.1: spurious response 0x0:0x0, last cmd=0x1f0004
$ lspci -s :1:0.1
01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff)

It's the NVIDIA device which presumably hasn't completed it's
transition to D3 at the time the OFF is executed.

Thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [3.6-rc7] switcheroo race with Intel HDA...

2012-10-09 Thread Daniel J Blueman
On 9 October 2012 21:04, Takashi Iwai  wrote:
> At Tue, 9 Oct 2012 19:23:56 +0800,
> Daniel J Blueman wrote:
>> On 9 October 2012 18:07, Takashi Iwai  wrote:
>> > At Tue, 09 Oct 2012 12:04:08 +0200,
>> > Takashi Iwai wrote:
>> >> At Tue, 9 Oct 2012 00:34:09 +0800,
>> >> Daniel J Blueman wrote:
>> >> > On 8 October 2012 20:58, Takashi Iwai  wrote:
>> >> > > At Tue, 25 Sep 2012 13:20:05 +0800,
>> >> > > Daniel J Blueman wrote:
>> >> > >> On my Macbook with a discrete Nvidia GPU, there is a race between
>> >> > >> selecting the integrated GPU and putting the discrete GPU into D3 
>> >> > >> [1],
>> >> > >> reliably causing a kernel oops [2].
>> >> > >>
>> >> > >> Introducing a delay of ~1s between the calls prevents this. When the
>> >> > >> second 'OFF' write path executes, it looks like struct azx at
>> >> > >> card->private_data hasn't yet been allocated yet [3], so there is
>> >> > >> likely some locking missing.
>> >> > >
>> >> > > It's rather pci_get_drvdata() returning NULL (i.e. card is NULL, thus
>> >> > > card->private_data causes Oops).  Could you check the patch like below
>> >> > > and see whether you get a kernel warning (but no Oops) or the problem
>> >> > > gets fixed by shifting the assignment of pci drvdata?
>> >> > [...]
>> >> >
>> >> > Good patching. Calling pci_set_drvdata later prevents the oops in HDA,
>> >> > though we see unexpected 0x0 responses in the response ring buffer
>> >> > [1], which we don't see when there's a >~1.5s delay between IGD and
>> >> > OFF.
>> >>
>> >> If the previous patch fixed, it means that the switching occurred
>> >> during the device was being probed.  Maybe a better approach to
>> >> register the VGA switcheroo after the proper initialization.
>> >>
>> >> The patch below is a revised one.  Please give it a try.
>> >
>> > Also, it's not clear which card spews the spurious response.
>> > Apply the patch below in addition.
>> [...]
>>
>> hda-intel: :01:00.1: spurious response 0x0:0x0, last cmd=0x1f0004
>> $ lspci -s :1:0.1
>> 01:00.1 Audio device: NVIDIA Corporation Device 0e1b (rev ff)
>>
>> It's the NVIDIA device which presumably hasn't completed it's
>> transition to D3 at the time the OFF is executed.
>
> OK, then could you try the patch below on the top of previous two
> patches?

The first IGD switcheroo command fails to switch to the integrated GPU:

# cat /sys/kernel/debug/vgaswitcheroo/switch
0:DIS:+:Pwr::01:00.0
1:IGD: :Pwr::00:02.0
2:DIS-Audio: :Pwr::01:00.1
# echo IGD >/sys/kernel/debug/vgaswitcheroo/switch
vga_switcheroo: client 1 refused switch

I also instrumented snd_hda_lock_devices, but none of the failure
paths are being taken, which would leave inconsistent state, as the
return value isn't checked.

Thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: switcheroo registration vs switching race...

2012-12-03 Thread Daniel J Blueman
On 3 December 2012 19:17, Takashi Iwai  wrote:
> At Wed, 28 Nov 2012 09:45:39 +0100,
> Takashi Iwai wrote:
>>
>> At Wed, 28 Nov 2012 11:45:07 +0800,
>> Daniel J Blueman wrote:
>> >
>> > Hi Seth, Dave, Takashi,
>> >
>> > If I power down the unused discrete GPU before lightdm starts by
>> > fiddling with the sysfs file [1] in the upstart script, I see a race
>> > manifesting as the discrete GPU's HDA controller timing out to
>> > commands [2].
>> >
>> > Adding some debug, I see that the registered audio devices are put
>> > into D3 before the GPU is, but it turns out that the discrete (and
>> > internal) GPU's HDA controller gets registered a bit later, so the
>> > list is empty. The symptom is since the HDA driver it's talking to
>> > hardware which is now in D3.
>> >
>> > We could add a mutex to nouveau to allow us to wait for the DGPU HDA
>> > controller, but perhaps this should be solved at a higher level in the
>> > vgaswitcheroo code; what do you think?
>>
>> Maybe it's a side effect for the recent effort to fix another race in
>> the probe.  A part of them problem is that the registration is done at
>> the very last of probing.
>>
>> Instead of delaying the registration, how about the patch below?
>
> Ping.  If this really works, I'd like to queue it for 3.8 merge, at
> least...

Ping ack; I was trying to find time to understand another race that
occurs with GPU probing after switching, but is separate from the
situation before switching, here.

In the context of writing the switch, it looks like struct azx isn't
allocated by the time azx_vs_set_state accesses it [1,2]; racing with
azx_codec_create?

The full dmesg output is at: http://quora.org/2012/hda-switch-oops.txt

Thanks,
  Daniel

--- [1]

BUG: unable to handle kernel NULL pointer dereference at 0170
IP: [] azx_vs_set_state+0x26/0x1a0 [snd_hda_intel]
PGD 26323d067 PUD 264f58067 PMD 0
Oops:  [#1] SMP
Modules linked in: snd_hda_codec_hdmi snd_hda_codec_cirrus rfcomm bnep
nls_iso8859_1 joydev hid_apple bcm5974 nouveau coretemp kvm_intel b43
kvm uvcvideo videobuf2_core videobuf2_vmalloc videobuf2_memops
ghash_clmulni_intel smsc75xx usbnet mii ttm snd_hda_intel(+)
snd_hda_codec snd_hwdep ssb i915 snd_pcm mxm_wmi snd_timer apple_gmux
applesmc mei lpc_ich microcode hwmon mfd_core input_polldev bcma snd
drm_kms_helper snd_page_alloc video apple_bl sdhci_pci sdhci mmc_core
CPU 1
Pid: 967, comm: sh Not tainted 3.7.0-rc7-expert+ #8 Apple Inc.
MacBookPro10,1/Mac-C3EC7CD22292981F
RIP: 0010:[] []
azx_vs_set_state+0x26/0x1a0 [snd_hda_intel]
RSP: 0018:88025198de48 EFLAGS: 00010286
RAX:  RBX: 880251960a00 RCX: 
RDX:  RSI:  RDI: 880265b41098
RBP: 88025198de68 R08: 0003 R09: 1000
R10: 7fffe481b730 R11: 0246 R12: 880265b41098
R13:  R14: 88025198df50 R15: 
FS: 7f4961480700() GS:88026f24() knlGS:
CS: 0010 DS:  ES:  CR0: 80050033
CR2: 0170 CR3: 000263cd3000 CR4: 001407e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process sh (pid: 967, threadinfo 88025198c000, task 88025d635820)
Stack:
 88025d635820 880251960a00  88025198de98
 88025198de88 812b8e77 880263ef1740 0004
 88025198def8 812b947c 88020a46464f 81107982
Call Trace:
 [] set_audio_state+0x67/0x70
 [] vga_switcheroo_debugfs_write+0xbc/0x380
 [] ? __alloc_fd+0x42/0x110
 [] ? __fd_install+0x29/0x60
 [] vfs_write+0xa3/0x160
 [] sys_write+0x4d/0xa0
 [] ? do_page_fault+0x9/0x10
 [] system_call_fastpath+0x1a/0x1f
Code: 00 00 00 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 4c 8d a7 98
00 00 00 4c 89 e7 48 89 5d e8 4c 89 6d f8 41 89 f5 e8 fa a4 0d e1 <48>
8b 98 70 01 00 00 0f b6 83 dd 01 00 00 a8 10 75 34 45 85 ed
RIP [] azx_vs_set_state+0x26/0x1a0 [snd_hda_intel]
 RSP 
CR2: 0170

--- [2]

$ gdb ./sound/pci/hda/snd-hda-intel.ko
(gdb) list *(azx_vs_set_state+0x26)
0x3036 is in azx_vs_set_state (sound/pci/hda/hda_intel.c:2628).
2623
2624static void azx_vs_set_state(struct pci_dev *pci,
2625         enum vga_switcheroo_state state)
2626{
2627struct snd_card *card = pci_get_drvdata(pci);
2628struct azx *chip = card->private_data;
2629bool disabled;
2630
2631if (chip->init_failed)
2632return;
--
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: switcheroo registration vs switching race...

2012-12-03 Thread Daniel J Blueman
On 3 December 2012 22:40, Takashi Iwai  wrote:
> At Mon, 3 Dec 2012 22:25:52 +0800,
> Daniel J Blueman wrote:
>>
>> On 3 December 2012 19:17, Takashi Iwai  wrote:
>> > At Wed, 28 Nov 2012 09:45:39 +0100,
>> > Takashi Iwai wrote:
>> >>
>> >> At Wed, 28 Nov 2012 11:45:07 +0800,
>> >> Daniel J Blueman wrote:
>> >> >
>> >> > Hi Seth, Dave, Takashi,
>> >> >
>> >> > If I power down the unused discrete GPU before lightdm starts by
>> >> > fiddling with the sysfs file [1] in the upstart script, I see a race
>> >> > manifesting as the discrete GPU's HDA controller timing out to
>> >> > commands [2].
>> >> >
>> >> > Adding some debug, I see that the registered audio devices are put
>> >> > into D3 before the GPU is, but it turns out that the discrete (and
>> >> > internal) GPU's HDA controller gets registered a bit later, so the
>> >> > list is empty. The symptom is since the HDA driver it's talking to
>> >> > hardware which is now in D3.
>> >> >
>> >> > We could add a mutex to nouveau to allow us to wait for the DGPU HDA
>> >> > controller, but perhaps this should be solved at a higher level in the
>> >> > vgaswitcheroo code; what do you think?
>> >>
>> >> Maybe it's a side effect for the recent effort to fix another race in
>> >> the probe.  A part of them problem is that the registration is done at
>> >> the very last of probing.
>> >>
>> >> Instead of delaying the registration, how about the patch below?
>> >
>> > Ping.  If this really works, I'd like to queue it for 3.8 merge, at
>> > least...
>>
>> Ping ack; I was trying to find time to understand another race that
>> occurs with GPU probing after switching, but is separate from the
>> situation before switching, here.
>>
>> In the context of writing the switch, it looks like struct azx isn't
>> allocated by the time azx_vs_set_state accesses it [1,2]; racing with
>> azx_codec_create?
>
> It was allocated, but it wasn't assigned properly in pci drvdata.
>
> Below is the revised patch.  Just moved pci_set_drvdata() before
> register_vga_switcheroo().  Could you retest with it?

Superb; this addresses the oops.

~1 second after the DGPU is put into D3, I still often see "hda-intel:
spurious response 0x0:0x0, last cmd=0x170500":
http://quora.org/2012/hda-switch-spurious.txt

Presumably this implies the read of the ring-buffer pointer returned
0x, so the HDA driver understands the pointer to have wrapped
and processes the 191 unwritten entries?

Daniel
--
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: switcheroo registration vs switching race...

2012-12-03 Thread Daniel J Blueman
On 4 December 2012 00:23, Takashi Iwai  wrote:
> At Mon, 3 Dec 2012 23:08:28 +0800,
> Daniel J Blueman wrote:
>>
>> On 3 December 2012 22:40, Takashi Iwai  wrote:
>> > At Mon, 3 Dec 2012 22:25:52 +0800,
>> > Daniel J Blueman wrote:
>> >>
>> >> On 3 December 2012 19:17, Takashi Iwai  wrote:
>> >> > At Wed, 28 Nov 2012 09:45:39 +0100,
>> >> > Takashi Iwai wrote:
>> >> >>
>> >> >> At Wed, 28 Nov 2012 11:45:07 +0800,
>> >> >> Daniel J Blueman wrote:
>> >> >> >
>> >> >> > Hi Seth, Dave, Takashi,
>> >> >> >
>> >> >> > If I power down the unused discrete GPU before lightdm starts by
>> >> >> > fiddling with the sysfs file [1] in the upstart script, I see a race
>> >> >> > manifesting as the discrete GPU's HDA controller timing out to
>> >> >> > commands [2].
>> >> >> >
>> >> >> > Adding some debug, I see that the registered audio devices are put
>> >> >> > into D3 before the GPU is, but it turns out that the discrete (and
>> >> >> > internal) GPU's HDA controller gets registered a bit later, so the
>> >> >> > list is empty. The symptom is since the HDA driver it's talking to
>> >> >> > hardware which is now in D3.
>> >> >> >
>> >> >> > We could add a mutex to nouveau to allow us to wait for the DGPU HDA
>> >> >> > controller, but perhaps this should be solved at a higher level in 
>> >> >> > the
>> >> >> > vgaswitcheroo code; what do you think?
>> >> >>
>> >> >> Maybe it's a side effect for the recent effort to fix another race in
>> >> >> the probe.  A part of them problem is that the registration is done at
>> >> >> the very last of probing.
>> >> >>
>> >> >> Instead of delaying the registration, how about the patch below?
>> >> >
>> >> > Ping.  If this really works, I'd like to queue it for 3.8 merge, at
>> >> > least...
>> >>
>> >> Ping ack; I was trying to find time to understand another race that
>> >> occurs with GPU probing after switching, but is separate from the
>> >> situation before switching, here.
>> >>
>> >> In the context of writing the switch, it looks like struct azx isn't
>> >> allocated by the time azx_vs_set_state accesses it [1,2]; racing with
>> >> azx_codec_create?
>> >
>> > It was allocated, but it wasn't assigned properly in pci drvdata.
>> >
>> > Below is the revised patch.  Just moved pci_set_drvdata() before
>> > register_vga_switcheroo().  Could you retest with it?
>>
>> Superb; this addresses the oops.
>
> OK, I'll queue it to sound tree for 3.8 kernel with Cc to stable.
>
>> ~1 second after the DGPU is put into D3, I still often see "hda-intel:
>> spurious response 0x0:0x0, last cmd=0x170500":
>> http://quora.org/2012/hda-switch-spurious.txt
>
> Hm, it's not clear who triggers these messages.  I'll try to check the
> code paths.
>
>> Presumably this implies the read of the ring-buffer pointer returned
>> 0x, so the HDA driver understands the pointer to have wrapped
>> and processes the 191 unwritten entries?
>
> Good point.  Actually there is one bug that looks obviously wrong
> (writing 32bit value to CORBWP).  Maybe it has been working just
> because writing CORBRP doesn't influence except for the reset bit.
>
> Reading CORBWP as a byte is OK, but this could be better in a word so
> that we can check 0x as invalid.
>
> A test patch is below.  Hopefully this improves the situation...

I'll check this out tomorrow and also instrument the code to get a
backtrace, since there may still be an underlying race with the
previous patches:

[8.203827] snd_hda_intel :00:1b.0: enabling device ( -> 0002)
[8.203936] snd_hda_intel :00:1b.0: irq 51 for MSI/MSI-X
[   10.981297] VGA switcheroo: switched nouveau off
[   10.981383] nouveau  [ DRM] suspending fbcon...
[   10.981388] nouveau  [ DRM] suspending display...
[   10.981687] nouveau  [ DRM] unpinning framebuffer(s)...
[   10.981825] nouveau  [ DRM] evicting buffers...
[   10.992948] nouveau  [ DRM] suspending client object trees...
[   11.310697] hda-intel: azx_get_response timeout, switching to
polling mode: l

Re: [PATCH 4/4 v8] AMD64 EDAC: Fix type usage in NB IDs and memory ranges

2012-12-04 Thread Daniel J Blueman

On 01/12/2012 01:17, Borislav Petkov wrote:

On Fri, Nov 30, 2012 at 04:44:20PM +0800, Daniel J Blueman wrote:

Use appropriate types for northbridge IDs and memory ranges. Mark immutable
data const and keep within compilation unit on related structures. Tested on
multi-socket server and multi-server, multi-socket NumaConnect setup.

v7: Refactor patches grouping changes
v8: Drop unneeded change; use const and static where appropriate

Signed-off-by: Daniel J Blueman 
---
  arch/x86/include/asm/amd_nb.h |2 +-
  drivers/edac/amd64_edac.c |   26 +-
  drivers/edac/amd64_edac.h |6 +++---
  3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 417eb24..d2e703b 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
  }

-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline struct amd_northbridge *node_to_amd_nb(u16 node)


I'm dropping this change from this patch because if we go with it,
we'll have to change all callsites of node_to_amd_nb which would cause
unnecessary churn.



So, I've applied the final patchset and uploaded a branch here:

git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git numascale

Please give it a run on both configurations and let me know if something
is still amiss.


It works well on fam10h and fam15h boxes, with and without Numaconnect.

Thanks,
  Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] Add NumaChip remote PCI support

2012-12-04 Thread Daniel J Blueman
Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but
preventing access to AMD Northbridges which shouldn't respond.

v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes
v3: Express dependency on MMCONFIG

Signed-off-by: Daniel J Blueman 
---
 arch/x86/Kconfig |2 +
 arch/x86/include/asm/numachip/numachip.h |   20 +
 arch/x86/kernel/apic/apic_numachip.c |2 +
 arch/x86/pci/Makefile|1 +
 arch/x86/pci/numachip.c  |  129 ++
 5 files changed, 154 insertions(+)
 create mode 100644 arch/x86/include/asm/numachip/numachip.h
 create mode 100644 arch/x86/pci/numachip.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 46c3bff..50e8700 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -374,6 +374,7 @@ config X86_NUMACHIP
depends on NUMA
depends on SMP
depends on X86_X2APIC
+   depends on PCI_MMCONFIG
---help---
  Adds support for Numascale NumaChip large-SMP systems. Needed to
  enable more than ~168 cores.
diff --git a/arch/x86/include/asm/numachip/numachip.h 
b/arch/x86/include/asm/numachip/numachip.h
new file mode 100644
index 000..fe7f60c
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -0,0 +1,20 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific header file
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_H
+
+extern int __init pci_numachip_init(void);
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
+
diff --git a/arch/x86/kernel/apic/apic_numachip.c 
b/arch/x86/kernel/apic/apic_numachip.c
index a65829a..9c2aa89 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
return 0;
 
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+   x86_init.pci.arch_init = pci_numachip_init;
 
map_csrs();
 
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 3af5a1e..ee0af58 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11)   += sta2x11-fixup.o
 obj-$(CONFIG_X86_VISWS)+= visws.o
 
 obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o
+obj-$(CONFIG_X86_NUMACHIP) += numachip.o
 
 obj-$(CONFIG_X86_INTEL_MID)+= mrst.o
 
diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
new file mode 100644
index 000..7307d9d
--- /dev/null
+++ b/arch/x86/pci/numachip.c
@@ -0,0 +1,129 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific PCI code
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ * PCI accessor functions derived from mmconfig_64.c
+ *
+ */
+
+#include 
+#include 
+
+static u8 limit __read_mostly;
+
+static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, 
unsigned int devfn)
+{
+   struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
+
+   if (cfg && cfg->virt)
+   return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
+   return NULL;
+}
+
+static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value)
+{
+   char __iomem *addr;
+
+   /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+   if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
+err:   *value = -1;
+   return -EINVAL;
+   }
+
+   /* Ensure AMD Northbridges don't decode reads to other devices */
+   if (unlikely(bus == 0 && devfn >= limit)) {
+   *value = -1;
+   return 0;
+   }
+
+   rcu_read_lock();
+   addr = pci_dev_base(seg, bus, devfn);
+   if (!addr) {
+   rcu_read_unlock();
+   goto err;
+   }
+
+   switch (len) {
+   case 1:
+   *value = mmio_config_readb(addr + reg);
+   break;
+   case 2:
+   *value = mmio_config_readw(addr + reg);
+   break;
+   case 4:
+   *value = mmio_config_readl(addr + reg);
+   break;
+   }
+   rcu_read_unlock();
+
+   return 0;
+}
+
+static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus,
+  unsigned

Re: switcheroo registration vs switching race...

2012-12-04 Thread Daniel J Blueman
On 4 December 2012 01:10, Takashi Iwai  wrote:
> At Tue, 4 Dec 2012 00:50:56 +0800,
> Daniel J Blueman wrote:
>>
>> On 4 December 2012 00:23, Takashi Iwai  wrote:
>> > At Mon, 3 Dec 2012 23:08:28 +0800,
>> > Daniel J Blueman wrote:
>> >>
>> >> On 3 December 2012 22:40, Takashi Iwai  wrote:
>> >> > At Mon, 3 Dec 2012 22:25:52 +0800,
>> >> > Daniel J Blueman wrote:
>> >> >>
>> >> >> On 3 December 2012 19:17, Takashi Iwai  wrote:
>> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100,
>> >> >> > Takashi Iwai wrote:
>> >> >> >>
>> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800,
>> >> >> >> Daniel J Blueman wrote:
>> >> >> >> >
>> >> >> >> > Hi Seth, Dave, Takashi,
>> >> >> >> >
>> >> >> >> > If I power down the unused discrete GPU before lightdm starts by
>> >> >> >> > fiddling with the sysfs file [1] in the upstart script, I see a 
>> >> >> >> > race
>> >> >> >> > manifesting as the discrete GPU's HDA controller timing out to
>> >> >> >> > commands [2].
>> >> >> >> >
>> >> >> >> > Adding some debug, I see that the registered audio devices are put
>> >> >> >> > into D3 before the GPU is, but it turns out that the discrete (and
>> >> >> >> > internal) GPU's HDA controller gets registered a bit later, so the
>> >> >> >> > list is empty. The symptom is since the HDA driver it's talking to
>> >> >> >> > hardware which is now in D3.
>> >> >> >> >
>> >> >> >> > We could add a mutex to nouveau to allow us to wait for the DGPU 
>> >> >> >> > HDA
>> >> >> >> > controller, but perhaps this should be solved at a higher level 
>> >> >> >> > in the
>> >> >> >> > vgaswitcheroo code; what do you think?
>> >> >> >>
>> >> >> >> Maybe it's a side effect for the recent effort to fix another race 
>> >> >> >> in
>> >> >> >> the probe.  A part of them problem is that the registration is done 
>> >> >> >> at
>> >> >> >> the very last of probing.
>> >> >> >>
>> >> >> >> Instead of delaying the registration, how about the patch below?
>> >> >> >
>> >> >> > Ping.  If this really works, I'd like to queue it for 3.8 merge, at
>> >> >> > least...
>> >> >>
>> >> >> Ping ack; I was trying to find time to understand another race that
>> >> >> occurs with GPU probing after switching, but is separate from the
>> >> >> situation before switching, here.
>> >> >>
>> >> >> In the context of writing the switch, it looks like struct azx isn't
>> >> >> allocated by the time azx_vs_set_state accesses it [1,2]; racing with
>> >> >> azx_codec_create?
>> >> >
>> >> > It was allocated, but it wasn't assigned properly in pci drvdata.
>> >> >
>> >> > Below is the revised patch.  Just moved pci_set_drvdata() before
>> >> > register_vga_switcheroo().  Could you retest with it?
>> >>
>> >> Superb; this addresses the oops.
>> >
>> > OK, I'll queue it to sound tree for 3.8 kernel with Cc to stable.
>> >
>> >> ~1 second after the DGPU is put into D3, I still often see "hda-intel:
>> >> spurious response 0x0:0x0, last cmd=0x170500":
>> >> http://quora.org/2012/hda-switch-spurious.txt
>> >
>> > Hm, it's not clear who triggers these messages.  I'll try to check the
>> > code paths.
>> >
>> >> Presumably this implies the read of the ring-buffer pointer returned
>> >> 0x, so the HDA driver understands the pointer to have wrapped
>> >> and processes the 191 unwritten entries?
>> >
>> > Good point.  Actually there is one bug that looks obviously wrong
>> > (writing 32bit value to CORBWP).  Maybe it has been working just
>> > because writing CORBRP doesn't influence 

[PATCH] HDA: Add PCI device prefix for clarity

2012-12-04 Thread Daniel J Blueman
When printing, use a prefix of the PCI domain, bus, device and function
as in other drivers, to differentiate multiple devices.

Important for reporting and debugging.

Signed-off-by: Daniel J Blueman 
---
 sound/pci/hda/hda_intel.c |  110 +++--
 1 file changed, 57 insertions(+), 53 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index f9d870e..cdfebbd 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -185,7 +185,7 @@ MODULE_DESCRIPTION("Intel HDA driver");
 #ifdef CONFIG_SND_VERBOSE_PRINTK
 #define SFX/* nop */
 #else
-#define SFX"hda-intel: "
+#define SFX"hda-intel %s: "
 #endif
 
 #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO)
@@ -703,7 +703,7 @@ static int azx_alloc_cmd_io(struct azx *chip)
  snd_dma_pci_data(chip->pci),
  PAGE_SIZE, &chip->rb);
if (err < 0) {
-   snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n");
+   snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n", 
pci_name(chip->pci));
return err;
}
mark_pages_wc(chip, &chip->rb, true);
@@ -836,7 +836,7 @@ static void azx_update_rirb(struct azx *chip)
chip->rirb.cmds[addr]--;
} else
snd_printk(KERN_ERR SFX "spurious response %#x:%#x, "
-  "last cmd=%#08x\n",
+  "last cmd=%#08x\n", pci_name(chip->pci),
   res, res_ex,
   chip->last_cmd[addr]);
}
@@ -881,7 +881,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus 
*bus,
if (!chip->polling_mode && chip->poll_count < 2) {
snd_printdd(SFX "azx_get_response timeout, "
   "polling the codec once: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
do_poll = 1;
chip->poll_count++;
goto again;
@@ -891,7 +891,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus 
*bus,
if (!chip->polling_mode) {
snd_printk(KERN_WARNING SFX "azx_get_response timeout, "
   "switching to polling mode: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
chip->polling_mode = 1;
goto again;
}
@@ -899,7 +899,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus 
*bus,
if (chip->msi) {
snd_printk(KERN_WARNING SFX "No response from codec, "
   "disabling MSI: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
free_irq(chip->irq, chip);
chip->irq = -1;
pci_disable_msi(chip->pci);
@@ -966,7 +966,7 @@ static int azx_single_wait_for_response(struct azx *chip, 
unsigned int addr)
}
if (printk_ratelimit())
snd_printd(SFX "get_response timeout: IRS=0x%x\n",
-  azx_readw(chip, IRS));
+  pci_name(chip->pci), azx_readw(chip, IRS));
chip->rirb.res[addr] = -1;
return -EIO;
 }
@@ -994,7 +994,7 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val)
}
if (printk_ratelimit())
snd_printd(SFX "send_cmd timeout: IRS=0x%x, val=0x%x\n",
-  azx_readw(chip, IRS), val);
+  pci_name(chip->pci), azx_readw(chip, IRS), val);
return -EIO;
 }
 
@@ -1080,7 +1080,7 @@ static int azx_reset(struct azx *chip, int full_reset)
   __skip:
/* check to see if controller is ready */
if (!azx_readb(chip, GCTL)) {
-   snd_printd(SFX "azx_reset: controller not ready!\n");
+   snd_printd(SFX "azx_reset: controller not ready!\n", 
pci_name(chip->pci));
return -EBUSY;
}
 
@@ -1092,7 +1092,7 @@ static int azx_reset(struct azx *chip, int full_reset)
/* detect codecs */
if (!chip->codec_mask) {
chip->codec_mask = azx_readw(chip, STATESTS);
-   snd_printdd(SFX "codec_mask = 0x%x\n", chip->codec_mask);
+   snd_printdd(SFX "codec_mask = 0x%x\n", pci_name(chip->pci), 
chip->codec_mask);
}
 
re

Re: switcheroo registration vs switching race...

2012-12-04 Thread Daniel J Blueman
On 4 December 2012 21:55, Takashi Iwai  wrote:
> At Tue, 04 Dec 2012 14:23:05 +0100,
> Takashi Iwai wrote:
>>
>> At Tue, 4 Dec 2012 20:58:55 +0800,
>> Daniel J Blueman wrote:
>> >
>> > On 4 December 2012 01:10, Takashi Iwai  wrote:
>> > > At Tue, 4 Dec 2012 00:50:56 +0800,
>> > > Daniel J Blueman wrote:
>> > >>
>> > >> On 4 December 2012 00:23, Takashi Iwai  wrote:
>> > >> > At Mon, 3 Dec 2012 23:08:28 +0800,
>> > >> > Daniel J Blueman wrote:
>> > >> >>
>> > >> >> On 3 December 2012 22:40, Takashi Iwai  wrote:
>> > >> >> > At Mon, 3 Dec 2012 22:25:52 +0800,
>> > >> >> > Daniel J Blueman wrote:
>> > >> >> >>
>> > >> >> >> On 3 December 2012 19:17, Takashi Iwai  wrote:
>> > >> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100,
>> > >> >> >> > Takashi Iwai wrote:
>> > >> >> >> >>
>> > >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800,
>> > >> >> >> >> Daniel J Blueman wrote:
>> > >> >> >> >> >
>> > >> >> >> >> > Hi Seth, Dave, Takashi,
>> > >> >> >> >> >
>> > >> >> >> >> > If I power down the unused discrete GPU before lightdm 
>> > >> >> >> >> > starts by
>> > >> >> >> >> > fiddling with the sysfs file [1] in the upstart script, I 
>> > >> >> >> >> > see a race
>> > >> >> >> >> > manifesting as the discrete GPU's HDA controller timing out 
>> > >> >> >> >> > to
>> > >> >> >> >> > commands [2].
>> > >> >> >> >> >
>> > >> >> >> >> > Adding some debug, I see that the registered audio devices 
>> > >> >> >> >> > are put
>> > >> >> >> >> > into D3 before the GPU is, but it turns out that the 
>> > >> >> >> >> > discrete (and
>> > >> >> >> >> > internal) GPU's HDA controller gets registered a bit later, 
>> > >> >> >> >> > so the
>> > >> >> >> >> > list is empty. The symptom is since the HDA driver it's 
>> > >> >> >> >> > talking to
>> > >> >> >> >> > hardware which is now in D3.
>> > >> >> >> >> >
>> > >> >> >> >> > We could add a mutex to nouveau to allow us to wait for the 
>> > >> >> >> >> > DGPU HDA
>> > >> >> >> >> > controller, but perhaps this should be solved at a higher 
>> > >> >> >> >> > level in the
>> > >> >> >> >> > vgaswitcheroo code; what do you think?
>> > >> >> >> >>
>> > >> >> >> >> Maybe it's a side effect for the recent effort to fix another 
>> > >> >> >> >> race in
>> > >> >> >> >> the probe.  A part of them problem is that the registration is 
>> > >> >> >> >> done at
>> > >> >> >> >> the very last of probing.
>> > >> >> >> >>
>> > >> >> >> >> Instead of delaying the registration, how about the patch 
>> > >> >> >> >> below?
>> > >> >> >> >
>> > >> >> >> > Ping.  If this really works, I'd like to queue it for 3.8 
>> > >> >> >> > merge, at
>> > >> >> >> > least...
>> > >> >> >>
>> > >> >> >> Ping ack; I was trying to find time to understand another race 
>> > >> >> >> that
>> > >> >> >> occurs with GPU probing after switching, but is separate from the
>> > >> >> >> situation before switching, here.
>> > >> >> >>
>> > >> >> >> In the context of writing the switch, it looks like struct azx 
>> > >> >> >

[PATCH v2] HDA: Add PCI device prefix for clarity

2012-12-04 Thread Daniel J Blueman
When printing, use a prefix of the PCI domain, bus, device and function
as in other drivers, to differentiate multiple devices.

Important for reporting and debugging. A future step is to tidy this up with
dev_printk et al.

v2: Move conversion specifier into call site, preventing build issues

Signed-off-by: Daniel J Blueman 
---
 sound/pci/hda/hda_intel.c |  134 +++--
 1 file changed, 69 insertions(+), 65 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index f9d870e..eb92ab4 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -185,7 +185,7 @@ MODULE_DESCRIPTION("Intel HDA driver");
 #ifdef CONFIG_SND_VERBOSE_PRINTK
 #define SFX/* nop */
 #else
-#define SFX"hda-intel: "
+#define SFX"hda-intel "
 #endif
 
 #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO)
@@ -703,7 +703,7 @@ static int azx_alloc_cmd_io(struct azx *chip)
  snd_dma_pci_data(chip->pci),
  PAGE_SIZE, &chip->rb);
if (err < 0) {
-   snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n");
+   snd_printk(KERN_ERR SFX "%s: cannot allocate CORB/RIRB\n", 
pci_name(chip->pci));
return err;
}
mark_pages_wc(chip, &chip->rb, true);
@@ -835,8 +835,8 @@ static void azx_update_rirb(struct azx *chip)
smp_wmb();
chip->rirb.cmds[addr]--;
} else
-   snd_printk(KERN_ERR SFX "spurious response %#x:%#x, "
-  "last cmd=%#08x\n",
+   snd_printk(KERN_ERR SFX "%s: spurious response %#x:%#x, 
"
+  "last cmd=%#08x\n", pci_name(chip->pci),
   res, res_ex,
   chip->last_cmd[addr]);
}
@@ -879,9 +879,9 @@ static unsigned int azx_rirb_get_response(struct hda_bus 
*bus,
}
 
if (!chip->polling_mode && chip->poll_count < 2) {
-   snd_printdd(SFX "azx_get_response timeout, "
+   snd_printdd(SFX "%s: azx_get_response timeout, "
   "polling the codec once: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
do_poll = 1;
chip->poll_count++;
goto again;
@@ -889,17 +889,17 @@ static unsigned int azx_rirb_get_response(struct hda_bus 
*bus,
 
 
if (!chip->polling_mode) {
-   snd_printk(KERN_WARNING SFX "azx_get_response timeout, "
+   snd_printk(KERN_WARNING SFX "%s: azx_get_response timeout, "
   "switching to polling mode: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
chip->polling_mode = 1;
goto again;
}
 
if (chip->msi) {
-   snd_printk(KERN_WARNING SFX "No response from codec, "
+   snd_printk(KERN_WARNING SFX "%s: No response from codec, "
   "disabling MSI: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
free_irq(chip->irq, chip);
chip->irq = -1;
pci_disable_msi(chip->pci);
@@ -965,8 +965,8 @@ static int azx_single_wait_for_response(struct azx *chip, 
unsigned int addr)
udelay(1);
}
if (printk_ratelimit())
-   snd_printd(SFX "get_response timeout: IRS=0x%x\n",
-  azx_readw(chip, IRS));
+   snd_printd(SFX "%s: get_response timeout: IRS=0x%x\n",
+  pci_name(chip->pci), azx_readw(chip, IRS));
chip->rirb.res[addr] = -1;
return -EIO;
 }
@@ -993,8 +993,8 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val)
udelay(1);
}
if (printk_ratelimit())
-   snd_printd(SFX "send_cmd timeout: IRS=0x%x, val=0x%x\n",
-  azx_readw(chip, IRS), val);
+   snd_printd(SFX "%s: send_cmd timeout: IRS=0x%x, val=0x%x\n",
+  pci_name(chip->pci), azx_readw(chip, IRS), val);
return -EIO;
 }
 
@@ -1080,7 +1080,7 @@ static int azx_reset(struct azx *chip, int full_reset)
   __skip:
/* check to see if controller is ready */
if (!azx_readb(chip, GCTL)) {
-

Re: switcheroo registration vs switching race...

2012-12-04 Thread Daniel J Blueman
On 4 December 2012 23:03, Takashi Iwai  wrote:
> At Tue, 4 Dec 2012 22:46:47 +0800,
> Daniel J Blueman wrote:
>>
>> On 4 December 2012 21:55, Takashi Iwai  wrote:
>> > At Tue, 04 Dec 2012 14:23:05 +0100,
>> > Takashi Iwai wrote:
>> >>
>> >> At Tue, 4 Dec 2012 20:58:55 +0800,
>> >> Daniel J Blueman wrote:
>> >> >
>> >> > On 4 December 2012 01:10, Takashi Iwai  wrote:
>> >> > > At Tue, 4 Dec 2012 00:50:56 +0800,
>> >> > > Daniel J Blueman wrote:
>> >> > >>
>> >> > >> On 4 December 2012 00:23, Takashi Iwai  wrote:
>> >> > >> > At Mon, 3 Dec 2012 23:08:28 +0800,
>> >> > >> > Daniel J Blueman wrote:
>> >> > >> >>
>> >> > >> >> On 3 December 2012 22:40, Takashi Iwai  wrote:
>> >> > >> >> > At Mon, 3 Dec 2012 22:25:52 +0800,
>> >> > >> >> > Daniel J Blueman wrote:
>> >> > >> >> >>
>> >> > >> >> >> On 3 December 2012 19:17, Takashi Iwai  wrote:
>> >> > >> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100,
>> >> > >> >> >> > Takashi Iwai wrote:
>> >> > >> >> >> >>
>> >> > >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800,
>> >> > >> >> >> >> Daniel J Blueman wrote:
>> >> > >> >> >> >> >
>> >> > >> >> >> >> > Hi Seth, Dave, Takashi,
>> >> > >> >> >> >> >
>> >> > >> >> >> >> > If I power down the unused discrete GPU before lightdm 
>> >> > >> >> >> >> > starts by
>> >> > >> >> >> >> > fiddling with the sysfs file [1] in the upstart script, I 
>> >> > >> >> >> >> > see a race
>> >> > >> >> >> >> > manifesting as the discrete GPU's HDA controller timing 
>> >> > >> >> >> >> > out to
>> >> > >> >> >> >> > commands [2].
>> >> > >> >> >> >> >
>> >> > >> >> >> >> > Adding some debug, I see that the registered audio 
>> >> > >> >> >> >> > devices are put
>> >> > >> >> >> >> > into D3 before the GPU is, but it turns out that the 
>> >> > >> >> >> >> > discrete (and
>> >> > >> >> >> >> > internal) GPU's HDA controller gets registered a bit 
>> >> > >> >> >> >> > later, so the
>> >> > >> >> >> >> > list is empty. The symptom is since the HDA driver it's 
>> >> > >> >> >> >> > talking to
>> >> > >> >> >> >> > hardware which is now in D3.
>> >> > >> >> >> >> >
>> >> > >> >> >> >> > We could add a mutex to nouveau to allow us to wait for 
>> >> > >> >> >> >> > the DGPU HDA
>> >> > >> >> >> >> > controller, but perhaps this should be solved at a higher 
>> >> > >> >> >> >> > level in the
>> >> > >> >> >> >> > vgaswitcheroo code; what do you think?
>> >> > >> >> >> >>
>> >> > >> >> >> >> Maybe it's a side effect for the recent effort to fix 
>> >> > >> >> >> >> another race in
>> >> > >> >> >> >> the probe.  A part of them problem is that the registration 
>> >> > >> >> >> >> is done at
>> >> > >> >> >> >> the very last of probing.
>> >> > >> >> >> >>
>> >> > >> >> >> >> Instead of delaying the registration, how about the patch 
>> >> > >> >> >> >> below?
>> >> > >> >> >> >
>> >> > >> >> >> > Ping.  If this really works, I'd like to queue it f

Re: switcheroo registration vs switching race...

2012-12-05 Thread Daniel J Blueman
On 5 December 2012 00:04, Takashi Iwai  wrote:
> At Tue, 4 Dec 2012 23:54:39 +0800,
> Daniel J Blueman wrote:
>>
>> On 4 December 2012 23:03, Takashi Iwai  wrote:
>> > At Tue, 4 Dec 2012 22:46:47 +0800,
>> > Daniel J Blueman wrote:
>> >>
>> >> On 4 December 2012 21:55, Takashi Iwai  wrote:
>> >> > At Tue, 04 Dec 2012 14:23:05 +0100,
>> >> > Takashi Iwai wrote:
>> >> >>
>> >> >> At Tue, 4 Dec 2012 20:58:55 +0800,
>> >> >> Daniel J Blueman wrote:
>> >> >> >
>> >> >> > On 4 December 2012 01:10, Takashi Iwai  wrote:
>> >> >> > > At Tue, 4 Dec 2012 00:50:56 +0800,
>> >> >> > > Daniel J Blueman wrote:
>> >> >> > >>
>> >> >> > >> On 4 December 2012 00:23, Takashi Iwai  wrote:
>> >> >> > >> > At Mon, 3 Dec 2012 23:08:28 +0800,
>> >> >> > >> > Daniel J Blueman wrote:
>> >> >> > >> >>
>> >> >> > >> >> On 3 December 2012 22:40, Takashi Iwai  wrote:
>> >> >> > >> >> > At Mon, 3 Dec 2012 22:25:52 +0800,
>> >> >> > >> >> > Daniel J Blueman wrote:
>> >> >> > >> >> >>
>> >> >> > >> >> >> On 3 December 2012 19:17, Takashi Iwai  
>> >> >> > >> >> >> wrote:
>> >> >> > >> >> >> > At Wed, 28 Nov 2012 09:45:39 +0100,
>> >> >> > >> >> >> > Takashi Iwai wrote:
>> >> >> > >> >> >> >>
>> >> >> > >> >> >> >> At Wed, 28 Nov 2012 11:45:07 +0800,
>> >> >> > >> >> >> >> Daniel J Blueman wrote:
>> >> >> > >> >> >> >> >
>> >> >> > >> >> >> >> > Hi Seth, Dave, Takashi,
>> >> >> > >> >> >> >> >
>> >> >> > >> >> >> >> > If I power down the unused discrete GPU before lightdm 
>> >> >> > >> >> >> >> > starts by
>> >> >> > >> >> >> >> > fiddling with the sysfs file [1] in the upstart 
>> >> >> > >> >> >> >> > script, I see a race
>> >> >> > >> >> >> >> > manifesting as the discrete GPU's HDA controller 
>> >> >> > >> >> >> >> > timing out to
>> >> >> > >> >> >> >> > commands [2].
>> >> >> > >> >> >> >> >
>> >> >> > >> >> >> >> > Adding some debug, I see that the registered audio 
>> >> >> > >> >> >> >> > devices are put
>> >> >> > >> >> >> >> > into D3 before the GPU is, but it turns out that the 
>> >> >> > >> >> >> >> > discrete (and
>> >> >> > >> >> >> >> > internal) GPU's HDA controller gets registered a bit 
>> >> >> > >> >> >> >> > later, so the
>> >> >> > >> >> >> >> > list is empty. The symptom is since the HDA driver 
>> >> >> > >> >> >> >> > it's talking to
>> >> >> > >> >> >> >> > hardware which is now in D3.
>> >> >> > >> >> >> >> >
>> >> >> > >> >> >> >> > We could add a mutex to nouveau to allow us to wait 
>> >> >> > >> >> >> >> > for the DGPU HDA
>> >> >> > >> >> >> >> > controller, but perhaps this should be solved at a 
>> >> >> > >> >> >> >> > higher level in the
>> >> >> > >> >> >> >> > vgaswitcheroo code; what do you think?
>> >> >> > >> >> >> >>
>> >> >> > >> >> >> >> Maybe it's a side effect for the recent effort to fix 
>> >> >> > >> >> >>

[PATCH v3] HDA: Add PCI device prefix for clarity

2012-12-05 Thread Daniel J Blueman
When printing, use a prefix of the PCI domain, bus, device and function
as in other drivers, to differentiate multiple devices.

Important for reporting and debugging. A future step is to tidy this up with
dev_printk et al.

v2: Move conversion specifier into call site, preventing build issues
v3: Refactor for Takashi's for-next branch

Signed-off-by: Daniel J Blueman 
---
 sound/pci/hda/hda_intel.c |  125 +++--
 1 file changed, 64 insertions(+), 61 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 22ecadc..eb48109 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -192,7 +192,7 @@ MODULE_DESCRIPTION("Intel HDA driver");
 #ifdef CONFIG_SND_VERBOSE_PRINTK
 #define SFX/* nop */
 #else
-#define SFX"hda-intel: "
+#define SFX"hda-intel "
 #endif
 
 #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO)
@@ -717,7 +717,7 @@ static int azx_alloc_cmd_io(struct azx *chip)
  snd_dma_pci_data(chip->pci),
  PAGE_SIZE, &chip->rb);
if (err < 0) {
-   snd_printk(KERN_ERR SFX "cannot allocate CORB/RIRB\n");
+   snd_printk(KERN_ERR SFX "%s: cannot allocate CORB/RIRB\n", 
pci_name(chip->pci));
return err;
}
mark_pages_wc(chip, &chip->rb, true);
@@ -894,9 +894,9 @@ static unsigned int azx_rirb_get_response(struct hda_bus 
*bus,
}
 
if (!chip->polling_mode && chip->poll_count < 2) {
-   snd_printdd(SFX "azx_get_response timeout, "
+   snd_printdd(SFX "%s: azx_get_response timeout, "
   "polling the codec once: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
do_poll = 1;
chip->poll_count++;
goto again;
@@ -904,17 +904,17 @@ static unsigned int azx_rirb_get_response(struct hda_bus 
*bus,
 
 
if (!chip->polling_mode) {
-   snd_printk(KERN_WARNING SFX "azx_get_response timeout, "
+   snd_printk(KERN_WARNING SFX "%s: azx_get_response timeout, "
   "switching to polling mode: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
chip->polling_mode = 1;
goto again;
}
 
if (chip->msi) {
-   snd_printk(KERN_WARNING SFX "No response from codec, "
+   snd_printk(KERN_WARNING SFX "%s: No response from codec, "
   "disabling MSI: last cmd=0x%08x\n",
-  chip->last_cmd[addr]);
+  pci_name(chip->pci), chip->last_cmd[addr]);
free_irq(chip->irq, chip);
chip->irq = -1;
pci_disable_msi(chip->pci);
@@ -980,8 +980,8 @@ static int azx_single_wait_for_response(struct azx *chip, 
unsigned int addr)
udelay(1);
}
if (printk_ratelimit())
-   snd_printd(SFX "get_response timeout: IRS=0x%x\n",
-  azx_readw(chip, IRS));
+   snd_printd(SFX "%s: get_response timeout: IRS=0x%x\n",
+  pci_name(chip->pci), azx_readw(chip, IRS));
chip->rirb.res[addr] = -1;
return -EIO;
 }
@@ -1008,8 +1008,8 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 
val)
udelay(1);
}
if (printk_ratelimit())
-   snd_printd(SFX "send_cmd timeout: IRS=0x%x, val=0x%x\n",
-  azx_readw(chip, IRS), val);
+   snd_printd(SFX "%s: send_cmd timeout: IRS=0x%x, val=0x%x\n",
+  pci_name(chip->pci), azx_readw(chip, IRS), val);
return -EIO;
 }
 
@@ -1095,7 +1095,7 @@ static int azx_reset(struct azx *chip, int full_reset)
   __skip:
/* check to see if controller is ready */
if (!azx_readb(chip, GCTL)) {
-   snd_printd(SFX "azx_reset: controller not ready!\n");
+   snd_printd(SFX "%s: azx_reset: controller not ready!\n", 
pci_name(chip->pci));
return -EBUSY;
}
 
@@ -1107,7 +1107,7 @@ static int azx_reset(struct azx *chip, int full_reset)
/* detect codecs */
if (!chip->codec_mask) {
chip->codec_mask = azx_readw(chip, STATESTS);
-   snd_printdd(SFX "codec_mask = 0x%x\n", chip->codec_mask);
+   snd_printdd(SFX "%s: codec_mask = 0x%x\n", pci_name(c

[PATCH] Fix printing when no interrupt is allocated

2012-10-18 Thread Daniel J Blueman
Previously a new line is implicitly added in the no GSI case:

[7.185182] pci 0001:00:12.0: can't derive routing for PCI INT A
[7.191352] pci 0001:00:12.0: PCI INT A: no GSI
[7.195956]  - using ISA IRQ 10

The code thus prints a blank line where no legacy IRQ is available:

[1.650124] pci :00:14.0: can't derive routing for PCI INT A
[1.650126] pci :00:14.0: PCI INT A: no GSI
[1.650126] 
[1.650180] pci :00:14.0: can't derive routing for PCI INT A

Fix this by making the newline explicit and removing the superfluous
one.

Signed-off-by: Daniel J Blueman 
---
 drivers/acpi/pci_irq.c |8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 0eefa12..2c37996 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -459,7 +459,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 */
if (gsi < 0) {
u32 dev_gsi;
-   dev_warn(&dev->dev, "PCI INT %c: no GSI", pin_name(pin));
+   dev_warn(&dev->dev, "PCI INT %c: no GSI\n", pin_name(pin));
/* Interrupt Line values above 0xF are forbidden */
if (dev->irq > 0 && (dev->irq <= 0xF) &&
(acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) {
@@ -467,11 +467,9 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
acpi_register_gsi(&dev->dev, dev_gsi,
  ACPI_LEVEL_SENSITIVE,
  ACPI_ACTIVE_LOW);
-   return 0;
-   } else {
-   printk("\n");
-   return 0;
}
+
+   return 0;
}
 
rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 RESEND] Add NumaChip remote PCI support

2012-12-06 Thread Daniel J Blueman

On 01/12/2012 00:45, Bjorn Helgaas wrote:

On Thu, Nov 29, 2012 at 10:28 PM, Daniel J Blueman

On 29/11/2012 07:08, Bjorn Helgaas wrote:

On Wed, Nov 21, 2012 at 1:39 AM, Daniel J Blueman
 wrote:


Add NumaChip-specific PCI access mechanism via MMCONFIG cycles, but
preventing access to AMD Northbridges which shouldn't respond.

v2: Use PCI_DEVFN in precomputed constant limit; drop unneeded includes

Signed-off-by: Daniel J Blueman 
---
   arch/x86/include/asm/numachip/numachip.h |   20 +
   arch/x86/kernel/apic/apic_numachip.c |2 +
   arch/x86/pci/Makefile|1 +
   arch/x86/pci/numachip.c  |  134
++
   4 files changed, 157 insertions(+)
   create mode 100644 arch/x86/include/asm/numachip/numachip.h
   create mode 100644 arch/x86/pci/numachip.c

diff --git a/arch/x86/include/asm/numachip/numachip.h
b/arch/x86/include/asm/numachip/numachip.h
new file mode 100644
index 000..d35e71a
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -0,0 +1,20 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General
Public
+ * License.  See the file "COPYING" in the main directory of this
archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific header file
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_H
+
+extern int __init pci_numachip_init(void);
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
+
diff --git a/arch/x86/kernel/apic/apic_numachip.c
b/arch/x86/kernel/apic/apic_numachip.c
index a65829a..9c2aa89 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -22,6 +22,7 @@
   #include 
   #include 

+#include 
   #include 
   #include 
   #include 
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
  return 0;

  x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+   x86_init.pci.arch_init = pci_numachip_init;

  map_csrs();

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 3af5a1e..ee0af58 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11)   += sta2x11-fixup.o
   obj-$(CONFIG_X86_VISWS)+= visws.o

   obj-$(CONFIG_X86_NUMAQ)+= numaq_32.o
+obj-$(CONFIG_X86_NUMACHIP) += numachip.o



It looks like this depends on CONFIG_PCI_MMCONFIG for
pci_mmconfig_lookup().  Are there config constraints that force
CONFIG_PCI_MMCONFIG=y when CONFIG_X86_NUMACHIP=y?



I'll revise the patch with this constraint after we work out the best
approach for below.



   obj-$(CONFIG_X86_INTEL_MID)+= mrst.o

diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
new file mode 100644
index 000..3773e05
--- /dev/null
+++ b/arch/x86/pci/numachip.c
@@ -0,0 +1,129 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General
Public
+ * License.  See the file "COPYING" in the main directory of this
archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific PCI code
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to 
+ *
+ * PCI accessor functions derived from mmconfig_64.c
+ *
+ */
+
+#include 
+#include 
+
+static u8 limit __read_mostly;
+
+static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int
bus, unsigned int devfn)
+{
+   struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
+
+   if (cfg && cfg->virt)
+   return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn <<
12));
+   return NULL;
+}



Most of this file is copied directly from mmconfig_64.c (as you
mentioned above).  I wonder if we could avoid the code duplication by
making the pci_dev_base() implementation in mmconfig_64.c a weak
definition.  Then you could just supply a non-weak pci_dev_base() here
that would override that default version.  Your version would look
something like:

char __iomem *pci_dev_base(unsigned int seg, unsigned int bus,
unsigned int devfn)
{
struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);

if (cfg && cfg->virt && devfn < limit)
return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
return NULL;
}

That would be different from what you have in this patch because reads
& writes to devices above "limit" would return -EINVAL rather than 0
as you do here.  Would that be a problem?



That would work nicely (pointer lookup and inlining etc aside) if there was
the runtime ability to override pci_dev_base only if the NumaChip signature
was detected.

We could expose pci_dev_base via struct x86_init_pci; the extra complexity
and performance tradeoff may not be worth it for a single case perhaps?


Oh, right, I forgot that you can't decide this at 

[PATCH] nouveau: Fix crash after D3

2012-11-06 Thread Daniel J Blueman
In 3.7-rc4, when starting X with the integrated GPU and suspending the discrete 
GPU,
after one or more 32-bit applications are used (eg Skype) and X is stopped,
we hit a panic.

Prevent this by testing if the fini function is valid.

Full panic bootlog is at: http://quora.org/2012/nouveau/dmesg-crash.txt
Xorg.log is at: http://quora.org/2012/nouveau/Xorg.0.log-crash.txt
Kernel log after fix is at: http://quora.org/2012/nouveau/dmesg-fix.txt

Signed-off-by: Daniel J Blueman 
---
 drivers/gpu/drm/nouveau/core/core/object.c |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/core/object.c 
b/drivers/gpu/drm/nouveau/core/core/object.c
index 0daab62..3da3525 100644
--- a/drivers/gpu/drm/nouveau/core/core/object.c
+++ b/drivers/gpu/drm/nouveau/core/core/object.c
@@ -354,12 +354,16 @@ static int
 nouveau_object_decf(struct nouveau_object *object)
 {
int ret;
+   struct nouveau_ofuncs *pfuncs;
 
nv_trace(object, "stopping...\n");
 
-   ret = nv_ofuncs(object)->fini(object, false);
-   if (ret)
-   nv_warn(object, "failed fini, %d\n", ret);
+   pfuncs = nv_ofuncs(object);
+   if (pfuncs->fini) {
+   ret = nv_ofuncs(object)->fini(object, false);
+   if (ret)
+   nv_warn(object, "failed fini, %d\n", ret);
+   }
 
if (object->engine) {
mutex_lock(&nv_subdev(object->engine)->mutex);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH, RESEND] nouveau: Prevent kernel log mangling

2012-11-07 Thread Daniel J Blueman
On 3.7-rc4, add missing newline to to prevent the following kernel log
line getting appended to the current one after switching the integrated
GPU and suspending the discrete GPU.

Signed-off-by: Daniel J Blueman 
---
 drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c 
b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c
index fe1ebf1..dc27e79 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/aux.c
@@ -50,7 +50,7 @@ auxch_init(struct nouveau_i2c *aux, int ch)
ctrl = nv_rd32(aux, 0x00e4e4 + (ch * 0x50));
udelay(1);
if (!timeout--) {
-   AUX_ERR("begin idle timeout 0x%08x", ctrl);
+   AUX_ERR("begin idle timeout 0x%08x\n", ctrl);
return -EBUSY;
}
} while (ctrl & 0x0301);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


3.8-rc2: EFI framebuffer lock inversion...

2013-01-03 Thread Daniel J Blueman
obe_device+0x76/0x240
[] __driver_attach+0xa3/0xb0
[] bus_for_each_dev+0x4d/0x90
[] driver_attach+0x19/0x20
[] bus_add_driver+0x1a0/0x270
[] driver_register+0x72/0x170
[] platform_driver_register+0x41/0x50
[] platform_driver_probe+0x16/0xa0
[] efifb_init+0x273/0x292
[] do_one_initcall+0x11a/0x170
[] kernel_init+0x11c/0x290
[] ret_from_fork+0x7c/0xb0

-> #0 ((fb_notifier_list).rwsem){.+}:
[] validate_chain.isra.33+0x1000/0x10d0
[] __lock_acquire+0x3a1/0xb60
[] lock_acquire+0x5a/0x70
[] down_read+0x47/0x5c
[] __blocking_notifier_call_chain+0x51/0xc0
[] blocking_notifier_call_chain+0x11/0x20
[] fb_notifier_call_chain+0x16/0x20
[] fb_set_suspend+0x46/0x60
[] nouveau_fbcon_set_suspend+0x92/0xc0 [nouveau]
[] nouveau_do_suspend+0x51/0x200 [nouveau]
[] nouveau_pmops_suspend+0x2f/0x80 [nouveau]
[] nouveau_switcheroo_set_state+0x5c/0xc0 [nouveau]
[] vga_switchoff+0x17/0x40
[] vga_switcheroo_debugfs_write+0xca/0x380
[] vfs_write+0xa3/0x160
[] sys_write+0x4d/0xa0
[] system_call_fastpath+0x1a/0x1f

other info that might help us debug this:

 Possible unsafe locking scenario:

CPU0  CPU1
  
 lock(console_lock);
lock((fb_notifier_list).rwsem);
lock(console_lock);
 lock((fb_notifier_list).rwsem);

 *** DEADLOCK ***

2 locks held by sh/1017:
 #0: (vgasr_mutex){+.+.+.}, at: []
vga_switcheroo_debugfs_write+0x57/0x380
 #1: (console_lock){+.+.+.}, at: []
nouveau_fbcon_set_suspend+0x25/0xc0 [nouveau]

stack backtrace:
Pid: 1017, comm: sh Not tainted 3.8.0-rc2-expert #1
Call Trace:
 [] print_circular_bug+0x28e/0x29f
 [] validate_chain.isra.33+0x1000/0x10d0
 [] __lock_acquire+0x3a1/0xb60
 [] ? __lock_is_held+0x54/0x80
 [] lock_acquire+0x5a/0x70
 [] ? __blocking_notifier_call_chain+0x51/0xc0
 [] down_read+0x47/0x5c
 [] ? __blocking_notifier_call_chain+0x51/0xc0
 [] __blocking_notifier_call_chain+0x51/0xc0
 [] blocking_notifier_call_chain+0x11/0x20
 [] fb_notifier_call_chain+0x16/0x20
 [] fb_set_suspend+0x46/0x60
 [] ? console_lock+0x77/0x80
 [] ? nouveau_fbcon_set_suspend+0x25/0xc0 [nouveau]
 [] nouveau_fbcon_set_suspend+0x92/0xc0 [nouveau]
 [] nouveau_do_suspend+0x51/0x200 [nouveau]
 [] nouveau_pmops_suspend+0x2f/0x80 [nouveau]
 [] nouveau_switcheroo_set_state+0x5c/0xc0 [nouveau]
 [] vga_switchoff+0x17/0x40
 [] vga_switcheroo_debugfs_write+0xca/0x380
 [] vfs_write+0xa3/0x160
 [] sys_write+0x4d/0xa0
 [] system_call_fastpath+0x1a/0x1f
nouveau [   DRM] suspending display...
nouveau [   DRM] unpinning framebuffer(s)...
nouveau [   DRM] evicting buffers...
nouveau [   DRM] suspending client object trees...
tg3 :0a:00.0 eth0: Link is up at 1000 Mbps, full duplex
tg3 :0a:00.0 eth0: Flow control is on for TX and on for RX
nouveau E[   I2C][:01:00.0] AUXCH(3): begin idle timeout 0x
nouveau E[   I2C][:01:00.0] AUXCH(2): begin idle timeout 0x
nouveau E[   I2C][:01:00.0] AUXCH(1): begin idle timeout 0xffff
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 3.8-rc2: EFI framebuffer lock inversion...

2013-01-03 Thread Daniel J Blueman
On 3 January 2013 21:11, Alan Cox  wrote:
> On Thu, 3 Jan 2013 20:56:30 +0800
> Daniel J Blueman  wrote:
>
>> On 3.8-rc2 with lockdep enabled and dual-GPU setup (Macbook Pro
>> Retina), I see two releated lock inversion issues with the EFI
>> framebuffer, leading to possible deadlock: when X takes over from the
>> EFI framebuffer [1] and when nouveau releases the framebuffer when
>> being vgaswitcherood [2].
>>
>> Let me know if you'd like any testing or analysis when I can get the time.
>
> The fb layer locking was broken. I posted patches early December which
> should have fixed the ones we know about. ('fb: Rework locking to fix
> lock ordering on takeover').

Superb work, Alan!

The only patch I could find [1] (mid Nov) looks like it needs another
sites updating, since we now see an i915 vs efifb lock ordering issue
[2].

I can get some time next week to take a look if it helps.

Thanks,
  Daniel

--- [1] https://patchwork.kernel.org/patch/1757061/

--- [2]

[drm] Memory usable by graphics device = 2048M
checking generic (b000 144) vs hw (b000 1000)
fb: conflicting fb hw usage inteldrmfb vs EFI VGA - removing generic driver

==
[ INFO: possible circular locking dependency detected ]
3.8.0-rc2-expert+ #2 Not tainted
---
modprobe/603 is trying to acquire lock:
 (console_lock){+.+.+.}, at: [] unbind_con_driver+0x3f/0x200

but task is already holding lock:
 ((fb_notifier_list).rwsem){.+}, at: []
__blocking_notifier_call_chain+0x51/0xc0

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 ((fb_notifier_list).rwsem){.+}:
[] __lock_acquire+0x3a1/0xb60
[] lock_acquire+0x5a/0x70
[] down_read+0x47/0x5c
[] __blocking_notifier_call_chain+0x51/0xc0
[] blocking_notifier_call_chain+0x11/0x20
[] fb_notifier_call_chain+0x16/0x20
[] register_framebuffer+0x1c0/0x300
[] efifb_probe+0x40f/0x496
[] platform_drv_probe+0x3e/0x70
[] driver_probe_device+0x76/0x240
[] __driver_attach+0xa3/0xb0
[] bus_for_each_dev+0x4d/0x90
[] driver_attach+0x19/0x20
[] bus_add_driver+0x1a0/0x270
[] driver_register+0x72/0x170
[] platform_driver_register+0x41/0x50
[] platform_driver_probe+0x16/0xa0
[] efifb_init+0x273/0x292
[] do_one_initcall+0x11a/0x170
[] kernel_init+0x11c/0x290
[] ret_from_fork+0x7c/0xb0

-> #0 (console_lock){+.+.+.}:
[] validate_chain.isra.33+0x1000/0x10d0
[] __lock_acquire+0x3a1/0xb60
[] lock_acquire+0x5a/0x70
[] console_lock+0x77/0x80
[] unbind_con_driver+0x3f/0x200
[] fbcon_event_notify+0x447/0x8b0
[] notifier_call_chain+0x55/0x110
[] __blocking_notifier_call_chain+0x67/0xc0
[] blocking_notifier_call_chain+0x11/0x20
[] fb_notifier_call_chain+0x16/0x20
[] do_unregister_framebuffer+0x5b/0x110
[] do_remove_conflicting_framebuffers+0x158/0x190
[] remove_conflicting_framebuffers+0x3a/0x60
[] i915_driver_load+0x7d4/0xe70 [i915]
[] drm_get_pci_dev+0x17e/0x2b0
[] i915_pci_probe+0x36/0x90 [i915]
[] local_pci_probe+0x46/0x80
[] pci_device_probe+0x101/0x110
[] driver_probe_device+0x76/0x240
[] __driver_attach+0xa3/0xb0
[] bus_for_each_dev+0x4d/0x90
[] driver_attach+0x19/0x20
[] bus_add_driver+0x1a0/0x270
[] driver_register+0x72/0x170
[] __pci_register_driver+0x5f/0x70
[] drm_pci_init+0x115/0x130
[] i915_init+0x66/0x68 [i915]
[] do_one_initcall+0x11a/0x170
[] load_module+0xfd4/0x13c0
[] sys_init_module+0xb7/0xe0
[] system_call_fastpath+0x1a/0x1f

other info that might help us debug this:

 Possible unsafe locking scenario:

CPU0  CPU1
  
 lock((fb_notifier_list).rwsem);
lock(console_lock);
lock((fb_notifier_list).rwsem);
 lock(console_lock);

 *** DEADLOCK ***

6 locks held by modprobe/603:
 #0: (&__lockdep_no_validate__){..}, at: []
__driver_attach+0x53/0xb0
 #1: (&__lockdep_no_validate__){..}, at: []
__driver_attach+0x61/0xb0
 #2: (drm_global_mutex){+.+.+.}, at: []
drm_get_pci_dev+0xbc/0x2b0
 #3: (registration_lock){+.+.+.}, at: []
remove_conflicting_framebuffers+0x2b/0x60
 #4: (&fb_info->lock){+.+.+.}, at: [] lock_fb_info+0x21/0x60
 #5: ((fb_notifier_list).rwsem){.+}, at: []
__blocking_notifier_call_chain+0x51/0xc0

stack backtrace:
Pid: 603, comm: modprobe Not tainted 3.8.0-rc2-expert+ #2
Call Trace:
 [] print_circular_bug+0x28e/0x29f
 [] validate_chain.isra.33+0x1000/0x10d0
 [] __lock_acquire+0x3a1/0xb60
 [] ? _raw_spin_unlock_irqrestore+0x3a/0x70
 [] ? trace_hardirqs_on_caller+0x10d/0x1a0
 [] lock_acquire+0x5a/0x70
 [] ? unbind_con_driver+0x3f/0x200
 [] console_lock+0x77/0x80
 [] ? unbind_con_driver+0x3f/0x200
 [] unbind_con_driver+0x3f/0x200
 []

Re: 3.8-rc2: EFI framebuffer lock inversion...

2013-01-03 Thread Daniel J Blueman
On 3 January 2013 22:11, Sedat Dilek  wrote:
> Hi Daniel,
>
> just wanted to test the fb-fix [2] from Alan and followed the thread in [1].
> Me is also working with i915 KMS.
>
> I looked at nouveau KMS driver and adapted the part for i915:
>
> drivers/gpu/drm/nouveau/nouveau_drm.c-200-  /* remove conflicting
> drivers (vesafb, efifb etc) */
> drivers/gpu/drm/nouveau/nouveau_drm.c:201:  aper = alloc_apertures(3);
> drivers/gpu/drm/nouveau/nouveau_drm.c-202-  if (!aper)
> drivers/gpu/drm/nouveau/nouveau_drm.c-203-  return -ENOMEM;
>
> Untested by me, feel free to test.
>
> Maybe some of the i915 and/or fb driver experts can comment on the problem.

The structure array from alloc_apertures is just used for the PCI base
address registers, so it's important here.

I'll take a look at the efifb locking later.

Thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


3.8-rc2: pciehp waitqueue hang...

2013-01-03 Thread Daniel J Blueman
When the Apple thunderbolt ethernet adapter comes loose on my Macbook
Pro Retina (Intel DSL3510), we see pci_slot_name return
non-deterministic data (ie varying each boot), and we see pciehp_wp
remain armed with events causing the kthread to get stuck:

tg3 :0a:00.0 eth0: Link is up at 1000 Mbps, full duplex
tg3 :0a:00.0 eth0: Flow control is on for TX and on for RX

pciehp :06:03.0:pcie24: Card not present on Slot(3)
tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not
clear MAC_TX_MODE=
tg3 :0a:00.0 eth0: No firmware running
tg3 :0a:00.0 eth0: Link is down
pcieport :00:01.1: System wakeup enabled by ACPI
pciehp :09:00.0:pcie24: unloading service driver pciehp
pciehp :09:00.0:pcie24: Latch open on
Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon)
pciehp :09:00.0:pcie24: Button pressed on
Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon)
pciehp :09:00.0:pcie24: Card present on
Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon)
pciehp :09:00.0:pcie24: Power fault on slot
\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon
pciehp :09:00.0:pcie24: Power fault bit 0 set
pciehp :09:00.0:pcie24: PCI slot
#\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon
- powering on due to button press.
pciehp :09:00.0:pcie24: Link Training Error occurs
pciehp :09:00.0:pcie24: Failed to check link status
INFO: task kworker/0:1:52 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/0:1   D 880265893090   0  52   2 0x
 8802655456f8 0046 81a21a60 880265545fd8
 4000 880265545fd8 880265892bb0 880265adc8d0
 059e 0082 880265545668 810415aa
Call Trace:
 [] ? console_unlock+0x1fa/0x4a0
 [] ? trace_hardirqs_off+0xd/0x10
 [] ? vprintk_emit+0x1c9/0x510
 [] schedule+0x24/0x70
 [] schedule_timeout+0x19c/0x1e0
 [] wait_for_common+0xe3/0x180
 [] ? flush_workqueue+0x111/0x4d0
 [] ? try_to_wake_up+0x2d0/0x2d0
 [] wait_for_completion+0x18/0x20
 [] flush_workqueue+0x1d6/0x4d0
 [] ? flush_workqueue_prep_cwqs+0x200/0x200
 [] pciehp_release_ctrl+0x39/0x90
 [] pciehp_remove+0x25/0x30
 [] pcie_port_remove_service+0x52/0x70
 [] __device_release_driver+0x77/0xe0
 [] device_release_driver+0x29/0x40
 [] bus_remove_device+0xf1/0x140
 [] device_del+0x127/0x1c0
 [] ? resume_iter+0x40/0x40
 [] device_unregister+0x11/0x20
 [] remove_iter+0x35/0x40
 [] device_for_each_child+0x36/0x70
 [] pcie_port_device_remove+0x21/0x40
 [] pcie_portdrv_remove+0x28/0x50
 [] pci_device_remove+0x41/0xc0
 [] __device_release_driver+0x77/0xe0
 [] device_release_driver+0x29/0x40
 [] bus_remove_device+0xf1/0x140
 [] device_del+0x127/0x1c0
 [] device_unregister+0x11/0x20
 [] pci_stop_bus_device+0x8c/0xa0
 [] pci_stop_bus_device+0x35/0xa0
 [] pci_stop_and_remove_bus_device+0x11/0x20
 [] pciehp_unconfigure_device+0x91/0x190
 [] ? pciehp_power_thread+0x2d/0x110
 [] pciehp_disable_slot+0x71/0x220
 [] pciehp_power_thread+0xe6/0x110
 [] process_one_work+0x193/0x550
 [] ? process_one_work+0x131/0x550
 [] ? pciehp_disable_slot+0x220/0x220
 [] worker_thread+0x15d/0x400
 [] ? trace_hardirqs_on+0xd/0x10
 [] ? rescuer_thread+0x210/0x210
 [] kthread+0xd6/0xe0
 [] ? _raw_spin_unlock_irq+0x2b/0x50
 [] ? __init_kthread_worker+0x70/0x70
 [] ret_from_fork+0x7c/0xb0
 [] ? __init_kthread_worker+0x70/0x70
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 3.8-rc2: pciehp waitqueue hang...

2013-01-03 Thread Daniel J Blueman
On 3 January 2013 23:41, Jiang Liu  wrote:
> On 01/03/2013 11:11 PM, Daniel J Blueman wrote:
>> When the Apple thunderbolt ethernet adapter comes loose on my Macbook
>> Pro Retina (Intel DSL3510), we see pci_slot_name return
>> non-deterministic data (ie varying each boot), and we see pciehp_wp
>> remain armed with events causing the kthread to get stuck:
>>
>> tg3 :0a:00.0 eth0: Link is up at 1000 Mbps, full duplex
>> tg3 :0a:00.0 eth0: Flow control is on for TX and on for RX
>> 
>> pciehp :06:03.0:pcie24: Card not present on Slot(3)
>> tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not
>> clear MAC_TX_MODE=
>> tg3 :0a:00.0 eth0: No firmware running
>> tg3 :0a:00.0 eth0: Link is down
>> pcieport :00:01.1: System wakeup enabled by ACPI
>> pciehp :09:00.0:pcie24: unloading service driver pciehp
>> pciehp :09:00.0:pcie24: Latch open on
>> Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon)
>> pciehp :09:00.0:pcie24: Button pressed on
>> Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon)
>> pciehp :09:00.0:pcie24: Card present on
>> Slot(\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon)
>> pciehp :09:00.0:pcie24: Power fault on slot
>> \xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon
>> pciehp :09:00.0:pcie24: Power fault bit 0 set
>> pciehp :09:00.0:pcie24: PCI slot
>> #\xfff89\xffbbe\x02\xff88\x\x\xffe09\xffbbe\x02\xff88\x\xfbcon
>> - powering on due to button press.
>> pciehp :09:00.0:pcie24: Link Training Error occurs
>> pciehp :09:00.0:pcie24: Failed to check link status
>> INFO: task kworker/0:1:52 blocked for more than 120 seconds.
[...]

> Hi Daniel,
> It seems like an issue caused by recursive PCIe HPC.
> Could you please help to try the patch from:
> http://www.spinics.net/lists/linux-pci/msg18625.html
> Thanks!
> Gerry

(adding Yijing)

Splendid; this fixes this failure nicely [1], finally releasing the bus.

If nothing else, I feel this should be queud for 3.8-rc3.

Many thanks,
  Daniel

--- [1]


pciehp :06:03.0:pcie24: Card not present on Slot(3)
tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not
clear MAC_TX_MODE=
tg3 :0a:00.0 eth0: No firmware running
tg3 :0a:00.0 eth0: Link is down
[sched_delayed] sched: RT throttling activated
pcieport :00:01.1: System wakeup enabled by ACPI
pciehp :09:00.0:pcie24: unloading service driver pciehp
pciehp :09:00.0:pcie24: Latch open on
Slot(\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon)
pciehp :09:00.0:pcie24: Button pressed on
Slot(\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon)
pciehp :09:00.0:pcie24: Card present on
Slot(\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon)
pciehp :09:00.0:pcie24: Power fault on slot
\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon
pciehp :09:00.0:pcie24: Power fault bit 0 set
pciehp :09:00.0:pcie24: PCI slot
#\xffb0\x04Pd\x02\xff88\x\x\xff98\x04Pd\x02\xff88\x\xfbcon
- powering on due to button press.
pciehp :09:00.0:pcie24: Link Training Error occurs
pciehp :09:00.0:pcie24: Failed to check link status
pci_bus :0a: busn_res: [bus 0a] is released
pci_bus :09: busn_res: [bus 09-0a] is released
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] HDA: Fix sound resume hang

2012-12-18 Thread Daniel J Blueman
Resuming a switcheroo'd HDA controller hangs since the completion
is one-shot (thus works the first time). Fix by using completions
that explictly need rearming, so remain fired before.

Signed-off-by: Daniel J Blueman 
---
 sound/pci/hda/hda_intel.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 22ecadc..e12b939 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2856,7 +2856,7 @@ static int azx_free(struct azx *chip)
azx_notifier_unregister(chip);
 
chip->init_failed = 1; /* to be sure */
-   complete(&chip->probe_wait);
+   complete_all(&chip->probe_wait);
 
if (use_vga_switcheroo(chip)) {
if (chip->disabled && chip->bus)
@@ -3482,7 +3482,7 @@ static int __devinit azx_probe(struct pci_dev *pci,
pm_runtime_put_noidle(&pci->dev);
 
dev++;
-   complete(&chip->probe_wait);
+   complete_all(&chip->probe_wait);
return 0;
 
 out_free:
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3, v5] AMD64 EDAC: Add muli-domain support

2012-11-16 Thread Daniel J Blueman

On 12/11/2012 21:24, Borislav Petkov wrote:

On Mon, Nov 05, 2012 at 02:05:24PM +0800, Daniel J Blueman wrote:

Fix the handling of memory controller detection to index the array
of detected Northbridges, allowing memory controllers over multiple
PCI domains in federated systems eg using Numascale's NumaConnect/
NumaChip.

v4: Generate linear Northbridge ID by indexing detected Northbridges
v5: Reorder functions to prevent extra function declaration; merge 4th
 patch; simplify Fam15h code; add detail to warning

Signed-off-by: Daniel J Blueman 


Acked-by: Borislav Petkov 

Btw, I don't have access to a multi-socket single-board AMD system right
now so would you please test the patchset on such a system too, if you
haven't done so yet?

Thanks a lot.


Yep, the expected memory controller indexes, population, column-strobe 
rows, banks and sysfs paths are detected on my hex-northbridge fam10h 
box with 3.7-rc5 with these patches:


EDAC MC: Ver: 3.0.0
AMD64 EDAC driver v3.4.0
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 0).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x8 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS2: Registered DDR3 RAM
EDAC amd64: CS3: Registered DDR3 RAM
EDAC MC0: Giving out device to 'amd64_edac' 'F10h': DEV :00:18.2
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 1).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x8 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS2: Registered DDR3 RAM
EDAC amd64: CS3: Registered DDR3 RAM
EDAC MC1: Giving out device to 'amd64_edac' 'F10h': DEV :00:19.2
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 2).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x8 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS2: Registered DDR3 RAM
EDAC amd64: CS3: Registered DDR3 RAM
EDAC MC2: Giving out device to 'amd64_edac' 'F10h': DEV :00:1a.2
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 3).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x8 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS2: Registered DDR3 RAM
EDAC amd64: CS3: Registered DDR3 RAM
EDAC MC3: Giving out device to 'amd64_edac' 'F10h': DEV :00:1b.2
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 4).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x8 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS2: Registered DDR3 RAM
EDAC amd64: CS3: Registered DDR3 RAM
EDAC MC4: Giving out device to 'amd64_edac' 'F10h': DEV :00:1c.2
EDAC amd64: DRAM ECC enabled.
EDAC amd64: F10h detected (node 5).
EDAC MC: DCT0 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC MC: DCT1 chip selects:
EDAC amd64: MC: 0:   0MB 1:   0MB
EDAC amd64: MC: 2: 4096MB 3: 4096MB
EDAC amd64: MC: 4:   0MB 5:   0MB
EDAC amd64: MC: 6:   0MB 7:   0MB
EDAC amd64: using x8 syndromes.
EDAC amd64: MCT channel count: 2
EDAC amd64: CS2: Registered DDR3 RAM
EDAC amd64: CS3: Registered DDR3 RAM
EDAC MC5: Giving out device to 'amd64_edac' 'F10h': DEV :00:1d.2
EDAC PCI0: Giving out device to module 'amd64_edac' controller 'EDAC PCI 
controller': DEV ':00:18.2' (POLLED)


root@ibm-x3755-01:/sys/devices/system/edac# ls -d mc/mc*/{rank*,csrow*}
mc/mc0/csrow

[PATCH] AHCI: fix build warning when PM && !PM_SLEEP

2012-11-19 Thread Daniel J Blueman
Change the conditional around ahci_suspend/resume to the same as the
SIMPLE_DEV_PM_OPS macro that uses these functions, fixing an unused build
warning.

Signed-off-by: Daniel J Blueman 
---
 drivers/ata/ahci_platform.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index b1ae480..b7078af 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -238,7 +238,7 @@ static int __devexit ahci_remove(struct platform_device 
*pdev)
return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int ahci_suspend(struct device *dev)
 {
struct ahci_platform_data *pdata = dev_get_platdata(dev);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3, v6] AMD64 EDAC: Add muli-domain support

2012-11-19 Thread Daniel J Blueman
Fix the handling of memory controller detection to index the array
of detected Northbridges, allowing memory controllers over multiple
PCI domains in federated systems eg using Numascale's NumaConnect/
NumaChip.

v4: Generate linear Northbridge ID by indexing detected Northbridges
v5: Reorder functions to prevent extra function declaration; merge 4th
patch; simplify Fam15h code; add detail to warning
v6: Remove unused variable after simplification

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h |   13 +++
 drivers/edac/amd64_edac.c |   48 +
 drivers/edac/amd64_edac.h |6 --
 3 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9..9f5532a 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,19 @@ static inline struct amd_northbridge *node_to_amd_nb(int 
node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
 
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
+{
+   int i;
+
+   for (i = 0; i != amd_nb_num(); i++)
+   if (pci_domain_nr(node_to_amd_nb(i)->misc->bus) == 
pci_domain_nr(pdev->bus) &&
+   PCI_SLOT(node_to_amd_nb(i)->misc->devfn) == 
PCI_SLOT(pdev->devfn))
+   return i;
+
+   WARN(1, "Unable to find AMD Northbridge identifier for %s\n", 
pci_name(pdev));
+   return 0;
+}
+
 #else
 
 #define amd_nb_num(x)  0
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index cc8e7c7..8de8873 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -982,6 +982,24 @@ static u64 get_error_address(struct mce *m)
return addr;
 }
 
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+   unsigned int device,
+   struct pci_dev *related)
+{
+   struct pci_dev *dev = NULL;
+
+   dev = pci_get_device(vendor, device, dev);
+   while (dev) {
+   if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
+   (dev->bus->number == related->bus->number) &&
+   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+   break;
+   dev = pci_get_device(vendor, device, dev);
+   }
+
+   return dev;
+}
+
 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
 {
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1001,11 +1019,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt 
*pvt, unsigned range)
 
/* Factor in CC6 save area by reading dst node's limit reg */
if (c->x86 == 0x15) {
-   struct pci_dev *f1 = NULL;
-   u8 nid = dram_dst_node(pvt, range);
+   struct pci_dev *misc, *f1 = NULL;
+   u16 nid = dram_dst_node(pvt, range);
u32 llim;
 
-   f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 
1));
+   misc = node_to_amd_nb(nid)->misc;
+   f1 = pci_get_related_function(misc->vendor, 
PCI_DEVICE_ID_AMD_15H_NB_F1, misc);
if (WARN_ON(!f1))
return;
 
@@ -1712,23 +1731,6 @@ static struct amd64_family_type amd64_family_types[] = {
},
 };
 
-static struct pci_dev *pci_get_related_function(unsigned int vendor,
-   unsigned int device,
-   struct pci_dev *related)
-{
-   struct pci_dev *dev = NULL;
-
-   dev = pci_get_device(vendor, device, dev);
-   while (dev) {
-   if ((dev->bus->number == related->bus->number) &&
-   (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
-   break;
-   dev = pci_get_device(vendor, device, dev);
-   }
-
-   return dev;
-}
-
 /*
  * These are tables of eigenvectors (one per line) which can be used for the
  * construction of the syndrome tables. The modified syndrome search algorithm
@@ -2546,7 +2548,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = get_node_id(F2);
+   u8 nid = amd_get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2637,7 +2639,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = get_node_id(pdev);
+   u8 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
s

[PATCH 2/3, v3] AMD64 EDAC: Support >255 memory controllers

2012-11-19 Thread Daniel J Blueman
As the AMD64 last-level-cache ID is 16-bits and federated systems
eg using Numascale's NumaConnect/NumaChip can have more than 255 memory
controllers, use 16-bits to store the ID.

v2: Avoid change to intlv_en variable
v3: Drop unneeded change to index

Signed-off-by: Daniel J Blueman 
---
 drivers/edac/amd64_edac.c |   17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 8de8873..6e3f002 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -942,7 +942,8 @@ static u64 get_error_address(struct mce *m)
struct amd64_pvt *pvt;
u64 cc6_base, tmp_addr;
u32 tmp;
-   u8 mce_nid, intlv_en;
+   u16 mce_nid;
+   u8 intlv_en;
 
if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
return addr;
@@ -2298,7 +2299,7 @@ out:
return ret;
 }
 
-static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
 {
cpumask_var_t cmask;
int cpu;
@@ -2336,7 +2337,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings 
*s, u8 nid, bool on)
return 0;
 }
 
-static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
   struct pci_dev *F3)
 {
bool ret = true;
@@ -2388,7 +2389,7 @@ static bool enable_ecc_error_reporting(struct 
ecc_settings *s, u8 nid,
return ret;
 }
 
-static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
struct pci_dev *F3)
 {
u32 value, mask = 0x3;  /* UECC/CECC enable */
@@ -2427,7 +2428,7 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";
 
-static bool ecc_enabled(struct pci_dev *F3, u8 nid)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 {
u32 value;
u8 ecc_en = 0;
@@ -2548,7 +2549,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
int err = 0, ret;
-   u8 nid = amd_get_node_id(F2);
+   u16 nid = amd_get_node_id(F2);
 
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2639,7 +2640,7 @@ err_ret:
 static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
 const struct pci_device_id 
*mc_type)
 {
-   u8 nid = amd_get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
int ret = 0;
@@ -2689,7 +2690,7 @@ static void __devexit amd64_remove_one_instance(struct 
pci_dev *pdev)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
-   u8 nid = amd_get_node_id(pdev);
+   u16 nid = amd_get_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3, v3] AMD64 EDAC: Cleanup type usage to be consistent

2012-11-19 Thread Daniel J Blueman
As the Northbridge IDs are at most 16-bits, use the same type
consistently and cleanup some indexes to use smaller types.

v2: Drop changes for later cleanups
v3: Further changes suggested by Boris

Signed-off-by: Daniel J Blueman 
---
 arch/x86/include/asm/amd_nb.h|2 +-
 arch/x86/include/asm/processor.h |2 +-
 arch/x86/kernel/cpu/amd.c|4 ++--
 drivers/edac/amd64_edac.c|   16 
 drivers/edac/amd64_edac.h|6 +++---
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 9f5532a..b0815a0 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -76,7 +76,7 @@ static inline bool amd_nb_has_feature(unsigned feature)
return ((amd_northbridges.flags & feature) == feature);
 }
 
-static inline struct amd_northbridge *node_to_amd_nb(int node)
+static inline struct amd_northbridge *node_to_amd_nb(u16 node)
 {
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : 
NULL;
 }
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ad1fc85..eb3ba58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -934,7 +934,7 @@ extern void start_thread(struct pt_regs *regs, unsigned 
long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);
 
 struct aperfmperf {
u64 aperf, mperf;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f7e98a2..52cab1f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
-int amd_get_nb_id(int cpu)
+u16 amd_get_nb_id(int cpu)
 {
-   int id = 0;
+   u16 id = 0;
 #ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
 #endif
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6e3f002..b27412a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
  * DRAM base/limit associated with node_id
  */
 static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
-  unsigned nid)
+  u8 nid)
 {
u64 addr;
 
@@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct 
mem_ctl_info *mci,
u64 sys_addr)
 {
struct amd64_pvt *pvt;
-   unsigned node_id;
+   u8 node_id;
u32 intlv_en, bits;
 
/*
@@ -1021,7 +1021,7 @@ static void read_dram_base_limit_regs(struct amd64_pvt 
*pvt, unsigned range)
/* Factor in CC6 save area by reading dst node's limit reg */
if (c->x86 == 0x15) {
struct pci_dev *misc, *f1 = NULL;
-   u16 nid = dram_dst_node(pvt, range);
+   u8 nid = dram_dst_node(pvt, range);
u32 llim;
 
misc = node_to_amd_nb(nid)->misc;
@@ -1348,7 +1348,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, 
u64 sys_addr,
 }
 
 /* Convert the sys_addr to the normalized DCT address */
-static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range,
+static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
 u64 sys_addr, bool hi_rng,
 u32 dct_sel_base_addr)
 {
@@ -1399,7 +1399,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, 
unsigned range,
  * checks if the csrow passed in is marked as SPARED, if so returns the new
  * spare row
  */
-static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
+static int f10_process_possible_spare(struct amd64_pvt *pvt, u16 dct, int 
csrow)
 {
int tmp_cs;
 
@@ -1424,7 +1424,7 @@ static int f10_process_possible_spare(struct amd64_pvt 
*pvt, u8 dct, int csrow)
  * -EINVAL:  NOT FOUND
  * 0..csrow = Chip-Select Row
  */
-static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct)
+static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
 {
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
@@ -2256,7 +2256,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 }
 
 /* get all cores on this DCT */
-static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
+static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
 {
int cpu;
 
@@ -2266,7 +2266,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask 
*mask, unsigned nid)
 }
 
 /* check MCG_CTL on all the cpus on this node */
-static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid)
+static bool amd64_nb_mce_bank_enabled_on_node(u16 nid)
 {
cpumask_var_t mask;
int cpu, nbe;
diff --git a/drivers/edac/amd64_edac.h b/dri

Re: 3.9-rc6 ext4: free_rb_tree_fname oops

2013-06-23 Thread Daniel J Blueman
On 16 April 2013 15:37, Daniel J Blueman  wrote:
> When using e4defrag on a ext4 filesystem created a month ago, I ran
> into this fatal page fault [1]
>  while running e4defrag on 3.9-rc6 (Ubuntu mainline).
>
> e2fsdump output is at http://quora.org/2012/e2fsdump.txt ; let me know
> if you need any more info.

With 3.9.6 mainline, I got the exact same protection fault at
free_rb_tree_fname() from ext4_htree_free_dir_info() [1]. This
suggests use-after-free, as there's no pagetable mapping.

There is nothing special with my setups, so there is fair chance it's
reproducible there with e4defrag on a few month old filesystem and
recent kernels.

Thanks,
  Daniel

> --- [1]
>
> general protection fault:  [#1] SMP
> Modules linked in: btrfs raid6_pq zlib_deflate xor ufs qnx4 hfsplus
> hfs minix ntfs msdos jfs xfs libcrc32c reiserfs ext2 8021q garp
> parport_pc ppdev rfcomm bnep nfsd auth_rpcgss nfs_acl nfs lockd sunrpc
> fscache snd_hda_codec_hdmi snd_hda_codec_realtek coretemp kvm_intel
> kvm snd_hda_intel snd_hda_codec snd_hwdep ghash_clmulni_intel arc4
> bridge iwldvm joydev i915 cryptd snd_pcm mac80211 stp llc
> snd_page_alloc drm_kms_helper drm snd_seq_midi snd_seq_midi_event
> snd_rawmidi snd_seq psmouse snd_seq_device btusb ir_sony_decoder
> ir_rc5_decoder ir_lirc_codec lirc_dev ir_sanyo_decoder
> ir_mce_kbd_decoder ir_jvc_decoder serio_raw ir_rc6_decoder iwlwifi
> ir_nec_decoder snd_timer i2c_algo_bit rc_rc6_mce microcode nuvoton_cir
> snd rc_core bluetooth soundcore mac_hid cfg80211 mei lpc_ich video lp
> parport hid_generic usbhid hid r8169 ahci libahci
> CPU 0
> Pid: 18139, comm: e4defrag Not tainted 3.9.0-030900rc6-generic
> #201304080035 ZOTAC XX/XX
> RIP: 0010:[] [] 
> free_rb_tree_fname+0x28/0xb0
> RSP: 0018:8801134a9e28 EFLAGS: 00010202
> RAX: 0036b44b8001 RBX: 880080e09018 RCX: 000180400028
> RDX: 0036b44b8001 RSI: 0001 RDI: 88013b001700
> RBP: 8801134a9e48 R08:  R09: eadbe380
> R10: 812381bc R11: 0206 R12: 
> R13: 880036f8ec80 R14: 880036f8ebc8 R15: 8800ade074c0
> FS: 7fd1923d7740() GS:88013fa0() knlGS:
> CS: 0010 DS:  ES:  CR0: 80050033
> CR2: 013974d8 CR3: 0001352f2000 CR4: 000407f0
> DR0:  DR1:  DR2: 
> DR3:  DR6: 0ff0 DR7: 0400
> Process e4defrag (pid: 18139, threadinfo 8801134a8000, task
> 880138d9c5f0)
> Stack:
>  880036f8ec80 4010 880021a2f900 8800ade074c0
>  8801134a9e68 81238f36 4010 88013890f000
>  8801134a9e78 81238f6a 8801134a9ec8 8119f57a
> Call Trace:
>  [] ext4_htree_free_dir_info+0x16/0x30
>  [] ext4_release_dir+0x1a/0x20
>  [] __fput+0xba/0x240
>  [] fput+0xe/0x10
>  [] task_work_run+0xc8/0xf0
>  [] do_notify_resume+0xaa/0xc0
>  [] int_signal+0x12/0x17
> Code: 90 90 90 66 66 66 66 90 55 48 89 e5 41 56 41 55 49 89 fd 41 54
> 53 48 8b 1f 48 85 db 74 67 48 8b 43 10 eb 11 0f 1f 80 00 00 00 00 <48>
> 8b 50 10 48 89 c3 48 89 d0 48 85 c0 75 f1 48 8b 43 08 48 85
> RIP [] free_rb_tree_fname+0x28/0xb0
>  RSP 
> ---[ end trace 02741f61e6b3c24b ]---
> general protection fault:  [#2] SMP
> Modules linked in: btrfs raid6_pq zlib_deflate xor ufs qnx4 hfsplus
> hfs minix ntfs msdos jfs xfs libcrc32c reiserfs ext2 8021q garp
> parport_pc ppdev rfcomm bnep nfsd auth_rpcgss nfs_acl nfs lockd sunrpc
> fscache snd_hda_codec_hdmi snd_hda_codec_realtek coretemp kvm_intel
> kvm snd_hda_intel snd_hda_codec snd_hwdep ghash_clmulni_intel arc4
> bridge iwldvm joydev i915 cryptd snd_pcm mac80211 stp llc
> snd_page_alloc drm_kms_helper drm snd_seq_midi snd_seq_midi_event
> snd_rawmidi snd_seq psmouse snd_seq_device btusb ir_sony_decoder
> ir_rc5_decoder ir_lirc_codec lirc_dev ir_sanyo_decoder
> ir_mce_kbd_decoder ir_jvc_decoder serio_raw ir_rc6_decoder iwlwifi
> ir_nec_decoder snd_timer i2c_algo_bit rc_rc6_mce microcode nuvoton_cir
> snd rc_core bluetooth soundcore mac_hid cfg80211 mei lpc_ich video lp
> parport hid_generic usbhid hid r8169 ahci libahci
> CPU 0
> Pid: 18139, comm: e4defrag Tainted: G   D   3.9.0-030900rc6-generic
> #201304080035 ZOTAC XX/XX
> RIP: 0010:[] [] 
> free_rb_tree_fname+0x28/0xb0
> RSP: 0018:8801134a9b78 EFLAGS: 00010202
> RAX: 0036b44b8001 RBX: 880080e09018 RCX: 0001
> RDX: 0036b44b8001 RSI: 88013890fb00 RDI: 880036f8ef80
> RBP: 8801134a9b98 R08:  R09: 
> R10: 88013890fb10 R11:  R12: 4010
> R13: 880036f8ef80 R14: 8800ade07108 R15

13GB dcache+inode cache hash tables

2013-06-25 Thread Daniel J Blueman
As memory capacity increases, we see the dentry and inode cache hash 
tables grow to wild sizes [1], eg 13GB is consumed on a 4.5TB system.


Perhaps a better approach adds a linear component to an exponent to give 
tuned scaling, given that spatial locality is an advantage in hash table 
and careful use of resources.


The same approach would fit to other hash tables (mount-cache, TCP 
established, TCP bind, UDP, UDP-Lite, Dquot-cache) with different 
coefficients, so perhaps we could generalise.


If so what are reasonable reference points and assumptions?

Thanks,
  Daniel

--- [1]

1GB:
Dentry cache hash table entries: 131072 (order: 7, 524288 bytes)
Inode-cache hash table entries: 65536 (order: 6, 262144 bytes)

8GB:
Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes)
Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes)

1TB:
Dentry cache hash table entries: 134217728 (order: 18, 1073741824 bytes)
Inode-cache hash table entries: 67108864 (order: 17, 536870912 bytes)

4.5TB
Dentry cache hash table entries: 1073741824 (order: 21, 8589934592 bytes)
Inode-cache hash table entries: 536870912 (order: 20, 4294967296 bytes)
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Transparent on-demand memory setup initialization embedded in the (GFP) buddy allocator

2013-06-26 Thread Daniel J Blueman

On Wednesday, June 26, 2013 9:30:02 PM UTC+8, Andrew Morton wrote:
>
> On Wed, 26 Jun 2013 11:22:48 +0200 Ingo Molnar  wrote:
>
> > except that on 32 TB
> > systems we don't spend ~2 hours initializing 8,589,934,592 page heads.
>
> That's about a million a second which is crazy slow - even my 
prehistoric desktop

> is 100x faster than that.
>
> Where's all this time actually being spent?

The complexity of a directory-lookup architecture to make the 
(intrinsically unscalable) cache-coherency protocol scalable gives you a 
~1us roundtrip to remote NUMA nodes.


Probably a lot of time is spent in some memsets, and RMW cycles which 
are setting page bits, which are intrinsically synchronous, so the 
initialising core can't get to 12 or so outstanding memory transactions.


Since EFI memory ranges have a flag to state if they are zerod (which 
may be a fair assumption for memory on non-bootstrap processor NUMA 
nodes), we can probably collapse the RMWs to just writes.


A normal write will require a coherency cycle, then a fetch and a 
writeback when it's evicted from the cache. For this purpose, 
non-temporal writes would eliminate the cache line fetch and give a 
massive increase in bandwidth. We wouldn't even need a store-fence as 
the initialising core is the only one online.


Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 13GB dcache+inode cache hash tables

2013-06-27 Thread Daniel J Blueman

On 25/06/2013 17:48, Eric Dumazet wrote:

On Tue, 2013-06-25 at 16:56 +0800, Daniel J Blueman wrote:

As memory capacity increases, we see the dentry and inode cache hash
tables grow to wild sizes [1], eg 13GB is consumed on a 4.5TB system.

Perhaps a better approach adds a linear component to an exponent to give
tuned scaling, given that spatial locality is an advantage in hash table
and careful use of resources.

The same approach would fit to other hash tables (mount-cache, TCP
established, TCP bind, UDP, UDP-Lite, Dquot-cache) with different
coefficients, so perhaps we could generalise.



TCP hash table is limited to 512K slots, unless overridden.
TCP bind limited to 64K slots.
UDP limited to 64K slots.


If so what are reasonable reference points and assumptions?


I do not know what you have in mind, please show us a patch ;)

[...]

Alright, I'll see what I can get together in the next week or so when I 
can fit it in.


Dan
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


BCM57765: timeout waiting for hardware interrupt

2013-08-23 Thread Daniel J Blueman
Chris et al,

with 3.11-rc6 and the Broadcom BCM57765 card reader [1] in my Macbook
Retina, interrupts are not seen during card insertion:

sdhci: Secure Digital Host Controller Interface driver
sdhci: Copyright(c) Pierre Ossman
sdhci-pci :03:00.1: SDHCI controller found [14e4:16bc] (rev 10)
mmc0: no vqmmc regulator found
mmc0: no vmmc regulator found
mmc0: SDHCI controller on PCI [:03:00.1] using ADMA
[...]
mmc0: Timeout waiting for hardware interrupt.

However /proc/interrupts shows 4 interrupts occurring at insertion time:

 17:  6  0  0  0  0  0
 0  0   IO-APIC-fasteoi   mmc0

Debugging shows the interrupt handler inspecting the state of the
hardware and finding no work to do; are the missing regulators
unexpected?

--- [1]

$ sudo lspci -s 03:00.1 -v
03:00.1 SD Host controller: Broadcom Corporation NetXtreme BCM57765
Memory Card Reader (rev 10) (prog-if 01)
Subsystem: Broadcom Corporation Device 96bc
Flags: bus master, fast devsel, latency 0, IRQ 17
Memory at c182 (64-bit, prefetchable) [size=64K]
Capabilities: [48] Power Management version 3
Capabilities: [58] MSI: Enable- Count=1/1 Maskable- 64bit+
Capabilities: [ac] Express Endpoint, MSI 00
Capabilities: [100] Advanced Error Reporting
Capabilities: [150] Power Budgeting 
Capabilities: [160] Virtual Channel
Kernel driver in use: sdhci-pci
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: False-positive RCU stall warnings on large systems...

2013-03-05 Thread Daniel J Blueman

On 02/26/2013 12:32 AM, Paul E. McKenney wrote:

On Wed, Feb 20, 2013 at 11:35:57AM +0800, Daniel J Blueman wrote:

On 20/02/2013 02:16, Paul E. McKenney wrote:

On Wed, Feb 20, 2013 at 12:34:12AM +0800, Daniel J Blueman wrote:

Hi Paul,

On some of our larger servers with many hundreds of cores and when
under high duress, we can see scheduler RCU stall warnings [1], so
find we have to increase the hardcoded RCU_STALL_RAT_DELAY up from 2
and RCU_JIFFIES_TILL_FORCE_QS up from 3.


Disabling RCU_FAST_NO_HZ will likely remove the need to adjust
RCU_JIFFIES_TILL_FORCE_QS.  Changes in my -rcu tree will likely remove the
need to adjust these two in 3.10 or 3.11, depending on how testing goes.


Is there a more sustainable way to account for this to avoid it
being hard-coded, such as making it and dependent timeouts a
fraction of CONFIG_RCU_CPU_STALL_TIMEOUT?


Maybe...  But what this means is that your system is so heavily loaded
that the CPU in question is failing to make it to RCU's softirq handler
in two jiffies worth of time.  This is a function of workload rather
than of the number of CPUs.


On the other hand, perhaps this is just caused by clock jitter (eg
due to distance from a contended clock source)? So increasing these
a bit may just be adequate in general...


Hmmm...  What version of the kernel are you running?


The example below occurs with v3.8, but we see the same with
previous kernels eg v3.5.


There is always the rcutree.rcu_cpu_stall_timeout parameter that sets
the stall timeout in seconds.  This may be specified at boot time or
via sysfs at runtime.  The default is now 21 seconds.


Of course, when using the local TSC, you'd see no jitter relative to
coherent transactions (eg memory writes), but when the HPET is used
across a large system, coherent transactions to distant cores are
just so much faster, as there's massive congestion to the shared
HPET behind various HT and PCIe bridges. This could be where the
jitter arises from, if I'm guessing jitter is the problem here.


Agreed, timing jitter could cause problems.  That said, the code uses
the jiffies counter to compute these timings.  Are you seeing similar
memory contention on the jiffies counter itself?


The contention we see in general are when cores contend for a spinlock 
and when there are lots of concurrent HPET reads (Opterons allow only 4 
outstanding reads to the IO hub).


It's probably possible to reproduce rcu_sched stalls on a quad-socket 
box with 64 cores and the right workload with the TSC disabled.


In 3.9-rc1 with RCU_FAST_NO_HZ disabled, we've seen stalls of 4 jiffies 
[2], but without the "Stall ended" message. This is with a workload 
which allocates ~256GB of memory over 192 cores.


Thanks,
  Daniel


--- [1]

[ 3939.010085] INFO: rcu_sched detected stalls on CPUs/tasks: {}
(detected by 1, t=29662 jiffies, g=3053, c=3052, q=598)
[ 3939.020008] INFO: Stall ended before state dump start


--- [2]

[10660.110620] INFO: rcu_sched self-detected stall on CPU { 39}  (t=4 
jiffies g=1169 c=1168 q=8)

[10660.110620] Pid: 11747, comm: sp.B Not tainted 3.9.0-rc1-advanced #6
[10660.110620] Call Trace:
[10660.110620][] ? 
rcu_check_callbacks+0x2d2/0x5f0

[10660.110620]  [] ? run_posix_cpu_timers+0x3a/0x790
[10660.110620]  [] ? update_process_times+0x3f/0x80
[10660.110620]  [] ? tick_sched_handle.isra.8+0x30/0x40
[10660.110620]  [] ? tick_sched_timer+0x42/0x70
[10660.110620]  [] ? __run_hrtimer.isra.30+0x4a/0xe0
[10660.110620]  [] ? hrtimer_interrupt+0xe5/0x220
[10660.110620]  [] ? smp_apic_timer_interrupt+0x63/0xa0
[10660.110620]  [] ? apic_timer_interrupt+0x67/0x70
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


3.9-rc6 ext4: free_rb_tree_fname oops

2013-04-16 Thread Daniel J Blueman
/0x480
 [] oops_end+0xb9/0x100
 [] die+0x58/0x90
 [] do_general_protection+0xdc/0x160
 [] general_protection+0x28/0x30
 [] ? free_rb_tree_fname+0x5c/0xb0
 [] ? free_rb_tree_fname+0x28/0xb0
 [] ? free_rb_tree_fname+0x5c/0xb0
 [] ext4_htree_free_dir_info+0x16/0x30
 [] ext4_release_dir+0x1a/0x20
 [] __fput+0xba/0x240
 [] fput+0xe/0x10
 [] task_work_run+0xc8/0xf0
 [] do_notify_resume+0xaa/0xc0
 [] int_signal+0x12/0x17
Code: 90 90 90 66 66 66 66 90 55 48 89 e5 41 56 41 55 49 89 fd 41 54
53 48 8b 1f 48 85 db 74 67 48 8b 43 10 eb 11 0f 1f 80 00 00 00 00 <48>
8b 50 10 48 89 c3 48 89 d0 48 85 c0 75 f1 48 8b 43 08 48 85
RIP [] free_rb_tree_fname+0x28/0xb0
 RSP 
---[ end trace 02741f61e6b3c24c ]---
Fixing recursive fault but reboot is needed!
--
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


rts5139 polling overhead...

2012-08-06 Thread Daniel J Blueman
Hi Edwin,

The Realsil/Realtek rts5139 card reader driver polls it's hardware at
20Hz and has racked up 25 minutes of processor time over a few days on
this Sandy Bridge media centre box, without any cards inserted. This
is more than the sum of all the other processes (including X):

$ ps -ef | grep rts5139
root   691 2  0 Aug03 ?00:00:36 [rts5139-control]
root   693 2  0 Aug03 ?00:25:36 [rts5139-polling]

The kernel is stock 3.5.0 without debug; would it help if I log a bug
report or eg collect further detail?

Thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[3.5.1] tg3 waitqueue hang on hotplug remove...

2012-08-09 Thread Daniel J Blueman
Hi Matt, Michael,

On my Macbook Retina with 3.5.1, I see the tg3 external adapter (via
Thunderbolt) get logically disconnected after a while despite
remaining connected (Thunderbolt issues).

The problem is though, that the pciehp_wq workqueue fails to complete
flushing from the call to pcie_cleanup_slot (inlined in
pciehp_release_ctrl) [1]; looks like tg3_tx or so is missing a
finish_wait(), no?

Daniel

--- [1]

pcieport :00:01.0: irq 42 for MSI/MSI-X
pcieport :00:01.1: irq 43 for MSI/MSI-X
pcieport :00:01.2: irq 44 for MSI/MSI-X
pcieport :05:00.0: irq 45 for MSI/MSI-X
pcieport :06:00.0: irq 46 for MSI/MSI-X
pcieport :06:03.0: irq 47 for MSI/MSI-X
pcieport :06:04.0: irq 48 for MSI/MSI-X
pcieport :06:05.0: irq 49 for MSI/MSI-X
pcieport :06:06.0: irq 50 for MSI/MSI-X
pcieport :08:00.0: irq 51 for MSI/MSI-X
pcieport :09:00.0: irq 52 for MSI/MSI-X
pci_hotplug: PCI Hot Plug PCI Core version: 0.5
pciehp :06:00.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid
 ss_did 
pciehp :06:00.0:pcie24: service driver pciehp loaded
pciehp :06:03.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid
 ss_did 
pciehp :06:03.0:pcie24: service driver pciehp loaded
pciehp :06:04.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid
 ss_did 
pciehp :06:04.0:pcie24: service driver pciehp loaded
pciehp :06:05.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid
 ss_did 
pciehp :06:05.0:pcie24: service driver pciehp loaded
pciehp :06:06.0:pcie24: HPC vendor_id 8086 device_id 1547 ss_vid
 ss_did 
pciehp :06:06.0:pcie24: service driver pciehp loaded
pciehp :09:00.0:pcie24: HPC vendor_id 8086 device_id 1549 ss_vid 0 ss_did 0
pciehp :09:00.0:pcie24: service driver pciehp loaded
pciehp: PCI Express Hot Plug Controller Driver version: 0.4
tg3 :0a:00.0: eth0: Tigon3 [partno(BCM957762) rev 57766000] (PCI
Express) MAC address 40:6c:8f:36:1a:67
tg3 :0a:00.0: eth0: attached PHY is 57765 (10/100/1000Base-T
Ethernet) (WireSpeed[1], EEE[0])
tg3 :0a:00.0: eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] TSOcap[1]
tg3 :0a:00.0: eth0: dma_rwctrl[0001] dma_mask[64-bit]
...
pciehp :06:03.0:pcie24: Card not present on Slot(3)
tg3 :0a:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not
clear MAC_TX_MODE=
tg3 :0a:00.0: eth1: No firmware running
tg3 :0a:00.0: eth1: Link is down
[sched_delayed] sched: RT throttling activated
pciehp :09:00.0:pcie24: unloading service driver pciehp
INFO: task kworker/0:2:3072 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/0:2   D 8180cc20   0 3072   2 0x
 880237f75800 0046 0001 880237f757b0
 880237f75fd8 880237f75fd8 880237f75fd8 00013940
 88025f3b5c00 880237eb5c00  7fff
Call Trace:
 [] schedule+0x29/0x70
 [] schedule_timeout+0x2a5/0x320
 [] ? default_spin_lock_flags+0x9/0x10
 [] ? pde_put+0x79/0xa0
 [] wait_for_common+0xdf/0x180
 [] ? pde_put+0x79/0xa0
 [] ? try_to_wake_up+0x200/0x200
 [] wait_for_completion+0x1d/0x20
 [] flush_workqueue+0x143/0x400
 [] ? pciehp_disable_slot+0x1f0/0x1f0
 [] pciehp_release_ctrl+0x46/0xa0
 [] pciehp_remove+0x27/0x30
 [] pcie_port_remove_service+0x57/0x70
 [] __device_release_driver+0x7c/0xe0
 [] device_release_driver+0x2c/0x40
 [] bus_remove_device+0xe1/0x120
 [] ? resume_iter+0x40/0x40
 [] device_del+0x120/0x1b0
 [] ? resume_iter+0x40/0x40
 [] device_unregister+0x16/0x30
 [] remove_iter+0x3d/0x50
 [] device_for_each_child+0x44/0x70
 [] pcie_port_device_remove+0x26/0x40
 [] pcie_portdrv_remove+0x16/0x30
 [] pci_device_remove+0x46/0x110
 [] __device_release_driver+0x7c/0xe0
 [] device_release_driver+0x2c/0x40
 [] bus_remove_device+0xe1/0x120
 [] device_del+0x120/0x1b0
 [] device_unregister+0x16/0x30
 [] pci_stop_bus_device+0x94/0xa0
 [] pci_stop_bus_device+0x43/0xa0
 [] pci_stop_and_remove_bus_device+0x16/0x30
 [] pciehp_unconfigure_device+0x91/0x190
 [] pciehp_disable_slot+0x75/0x1f0
 [] pciehp_power_thread+0xe3/0x110
 [] process_one_work+0x11a/0x480
 [] worker_thread+0x165/0x370
 [] ? manage_workers.isra.29+0x130/0x130
 [] kthread+0x93/0xa0
 [] kernel_thread_helper+0x4/0x10
 [] ? kthread_freezable_should_stop+0x70/0x70
 [] ? gs_change+0x13/0x13
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6.25-rc2, 2.6.24-rc8] page allocation failure...

2008-02-17 Thread Daniel J Blueman
I'm still hitting this with e1000e on 2.6.25-rc2, 10 times again.

It's clearly non-fatal, but then do we expect it to occur?

Daniel

--- [dmesg]

[ 1250.822786] swapper: page allocation failure. order:3, mode:0x4020
[ 1250.822786] Pid: 0, comm: swapper Not tainted 2.6.25-rc2-119 #2
[ 1250.822786]
[ 1250.822786] Call Trace:
[ 1250.822786][] __alloc_pages+0x34e/0x3a0
[ 1250.822786]  [] ? __netdev_alloc_skb+0x1f/0x40
[ 1250.822786]  [] __slab_alloc+0x102/0x3d0
[ 1250.822786]  [] ? __netdev_alloc_skb+0x1f/0x40
[ 1250.822786]  [] __kmalloc_track_caller+0x7b/0xc0
[ 1250.822786]  [] __alloc_skb+0x6f/0x160
[ 1250.822786]  [] __netdev_alloc_skb+0x1f/0x40
[ 1250.822786]  [] e1000_alloc_rx_buffers+0x1ed/0x260
[ 1250.822786]  [] e1000_clean_rx_irq+0x22a/0x330
[ 1250.822786]  [] e1000_clean+0x1e1/0x540
[ 1250.822786]  [] ? tick_program_event+0x45/0x70
[ 1250.822786]  [] net_rx_action+0x9a/0x150
[ 1250.822786]  [] __do_softirq+0x74/0xf0
[ 1250.822786]  [] call_softirq+0x1c/0x30
[ 1250.822786]  [] do_softirq+0x3d/0x80
[ 1250.822786]  [] irq_exit+0x85/0x90
[ 1250.822786]  [] do_IRQ+0x85/0x100
[ 1250.822786]  [] ? mwait_idle+0x0/0x50
[ 1250.822786]  [] ret_from_intr+0x0/0xa
[ 1250.822786][] ? mwait_idle+0x45/0x50
[ 1250.822786]  [] ? enter_idle+0x22/0x30
[ 1250.822786]  [] ? cpu_idle+0x74/0xa0
[ 1250.822786]  [] ? rest_init+0x55/0x60
[ 1250.822786]
[ 1250.822786] Mem-info:
[ 1250.822786] DMA per-cpu:
[ 1250.822786] CPU0: hi:0, btch:   1 usd:   0
[ 1250.822786] CPU1: hi:0, btch:   1 usd:   0
[ 1250.822786] DMA32 per-cpu:
[ 1250.822786] CPU0: hi:  186, btch:  31 usd: 179
[ 1250.822786] CPU1: hi:  186, btch:  31 usd: 159
[ 1250.822786] Active:59792 inactive:67236 dirty:4775 writeback:4779 unstable:0
[ 1250.822786]  free:2232 slab:122927 mapped:3846 pagetables:715 bounce:0
[ 1250.822786] DMA free:3984kB min:36kB low:44kB high:52kB active:4kB
inactive:560kB present:10076kB pages_scanned:0 all_unreclaimable? no
[ 1250.822786] lowmem_reserve[]: 0 992 992 992
[ 1250.822786] DMA32 free:4944kB min:4008kB low:5008kB high:6012kB
active:239164kB inactive:268384kB present:1015936kB pages_scanned:0
all_unreclaimable? no
[ 1250.822786] lowmem_reserve[]: 0 0 0 0
[ 1250.822786] DMA: 6*4kB 1*8kB 1*16kB 1*32kB 1*64kB 0*128kB 1*256kB
1*512kB 1*1024kB 1*2048kB 0*4096kB = 3984kB
[ 1250.822786] DMA32: 836*4kB 148*8kB 18*16kB 0*32kB 1*64kB 1*128kB
0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 5008kB
[ 1250.822786] 88530 total pagecache pages
[ 1250.822786] Swap cache: add 33, delete 32, find 0/0
[ 1250.822786] Free swap  = 248872kB
[ 1250.822786] Total swap = 248996kB
[ 1250.822786] Free swap:   248872kB
[ 1250.822786] 261600 pages of RAM
[ 1250.822786] 5448 reserved pages
[ 1250.822786] 55715 pages shared
[ 1250.822786] 1 pages swap cached

On Feb 14, 2008 8:40 PM, Daniel J Blueman <[EMAIL PROTECTED]> wrote:
> One of my x86-64 (1GB) systems experienced order 1 page alloc failure
> after ~4 days of uptime and 9 times again in the hours since.
>
> I understand the behaviour is that the reclaim kthread should have had
> a couple of free pages around for this type allocation, but obviously
> didn't (a number of times).
>
> What other information may help with this?
>
> Daniel
>
> --- dmesg
> swapper: page allocation failure. order:1, mode:0x4020
> Pid: 0, comm: swapper Not tainted 2.6.24-rc8-117 #1
>
> Call Trace:
>   [] __alloc_pages+0x336/0x390
> [] __netdev_alloc_skb+0x17/0x40
> [] __slab_alloc+0x145/0x3d0
> [] __netdev_alloc_skb+0x17/0x40
> [] __kmalloc_track_caller+0xf6/0x100
> [] __alloc_skb+0x6f/0x150
> [] __netdev_alloc_skb+0x17/0x40
> [] e1000_alloc_rx_buffers+0x17a/0x3a0
> [] ip_local_deliver_finish+0x83/0x1a0
> [] e1000_clean_rx_irq+0x34d/0x550
> [] e1000_intr_msi+0x81/0x110
> [] handle_IRQ_event+0x34/0x70
> [] handle_edge_irq+0xc9/0x150
> [] do_IRQ+0x7b/0x100
> [] mwait_idle+0x0/0x50
> [] ret_from_intr+0x0/0xa
>   [] lapic_next_event+0x0/0x10
> [] mwait_idle+0x42/0x50
> [] cpu_idle+0x75/0xa0
> [] start_kernel+0x25a/0x2e0
> [] _sinittext+0x117/0x120
>
> Mem-info:
> DMA per-cpu:
> CPU0: Hot: hi:0, btch:   1 usd:   0   Cold: hi:0, btch:   1 usd:  
>  0
> CPU1: Hot: hi:0, btch:   1 usd:   0   Cold: hi:0, btch:   1 usd:  
>  0
> DMA32 per-cpu:
> CPU0: Hot: hi:  186, btch:  31 usd: 155   Cold: hi:   62, btch:  15 usd:  
> 60
> CPU1: Hot: hi:  186, btch:  31 usd:  14   Cold: hi:   62, btch:  15 usd:  
> 42
> Active:114794 inactive:53917 dirty:8137 writeback:5025 unstable:0
>  free:3549 slab:79629 mapped:4643 pagetables:1017 bounce:0
> DMA free:3968kB min:40kB low:48kB high:60kB active:72kB inactive:120kB
> present:10236kB pages_scanned:0 all_unreclaimable? no
> lowmem_reserve[]: 0 992 992 992
> DMA32 free:10228kB min:4008kB low:5008kB high:6012kB active:459104kB
> inactive:215548kB present:

3.5.0 iwlagn AP crash...

2012-07-22 Thread Daniel J Blueman
Hi Johannes et al,

When running my Centrino Wireless-N 130 BGN (rev 0xb0) card in nl80211
AP mode with hostapd on linux 3.5.0, I immediately hit this fatal
pagefault [1].

I can cook a debug kernel, reproduce, disassemble the code and do some
quick analysis, if that helps get the ball rolling?

Thanks!
  Daniel

--- [1]

BUG: unable to handle kernel NULL pointer dereference at  (null)
IP: [] ieee80211_ave_rssi+0xd/0x50 [mac80211]
PGD 116616067 PUD 115c22067 PMD 0
Oops:  [#1] SMP
CPU 0
Modules linked in:
 netconsole configfs snd_hda_codec_hdmi snd_hda_codec_realtek xt_hl
ip6t_rt nf_conntrack_ipv6 nf_defrag_ipv6 ipt_REJECT snd_hda_intel
snd_hda_codec snd_hwdep xt_limit xt_tcpudp xt_addrtype snd_pcm
ir_lirc_codec lirc_dev ir_mce_kbd_decoder ir_sanyo_decoder
ir_sony_decoder xt_state ir_jvc_decoder snd_seq_midi snd_rawmidi
ip6table_filter ip6_tables joydev ir_rc6_decoder snd_seq_midi_event
nf_conntrack_netbios_ns nf_conntrack_broadcast snd_seq hid_generic
arc4 ir_rc5_decoder nf_nat_ftp nf_nat snd_timer nf_conntrack_ipv4
snd_seq_device nf_defrag_ipv4 usbhid i915 hid coretemp drm_kms_helper
iwlwifi mac80211 nf_conntrack_ftp ir_nec_decoder drm i2c_algo_bit
rts5139(C) kvm_intel btusb snd nf_conntrack kvm psmouse bluetooth
cfg80211 mac_hid ghash_clmulni_intel rc_rc6_mce lpc_ich soundcore
iptable_filter snd_page_alloc mei ip_tables x_tables nuvoton_cir
rc_core serio_raw cryptd microcode video r8169
Pid: 0, comm: swapper/0 Tainted: G C  3.5.0-030500-generic
#201207211835 ZOTAC XX
/XX
RIP: 0010:[] []
ieee80211_ave_rssi+0xd/0x50 [mac80211]
RSP: 0018:88011fa03c60 EFLAGS: 00010286
RDX:  RSI: 880115b26008 RDI: 
RBP: 88011fa03c70 R08: a03b82e8 R09: 
R10:  R11: 0001 R12: 880115b26008
R13: 880115b26008 R14: 880117bd1f50 R15: 880115b26000
FS: () GS:88011fa0() knlGS:
CS: 0010 DS:  ES:  CR0: 8005003b
CR2:  CR3: 000116371000 CR4: 000407f0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process swapper/0 (pid: 0, threadinfo 81c0, task 81c13440)
Stack:
 8801 8801 88011fa03c90 a037d997
 880117bd1f50 880115b26000 88011fa03cc0 a037de09
 880117bd1f40  880117bd1f40 88011fa03d98
Call Trace:
 
 [] iwlagn_fill_txpower_mode+0x27/0x100 [iwlwifi]
 [] iwlagn_bt_coex_profile_notif+0x189/0x250 [iwlwifi]
 [] iwl_rx_dispatch+0xbc/0x120 [iwlwifi]
 [] iwl_rx_handle+0xcf/0x190 [iwlwifi]
 [] iwl_irq_tasklet+0x353/0x9b0 [iwlwifi]
 [] tasklet_action+0x64/0xe0
 [] __do_softirq+0xa8/0x210
 [] ? _raw_spin_lock+0xe/0x20
 [] call_softirq+0x1c/0x30
 [] do_softirq+0x65/0xa0
 [] irq_exit+0x8e/0xb0
 [] do_IRQ+0x63/0xe0
 [] common_interrupt+0x6a/0x6a
 
 [] ? default_spin_lock_flags+0x9/0x10
 [] ? intel_idle+0xea/0x150
 [] ? intel_idle+0xcc/0x150
 [] cpuidle_enter+0x19/0x20
 [] cpuidle_idle_call+0xac/0x2a0
 [] cpu_idle+0xcf/0x120
 [] rest_init+0x72/0x74
 [] start_kernel+0x3b7/0x3c4
 [] ? repair_env_string+0x5a/0x5a
 [] x86_64_start_reservations+0x131/0x135
 [] ? early_idt_handlers+0x120/0x120
 [] x86_64_start_kernel+0xcd/0xdc
Code: 48 89 45 d8 48 8b 5d d8 4c 39 e3 75 c1 90 48 83 c4 10 5b 41 5c
41 5d 41 5e 5d c3 0f 1f 00 55 48 89 e5 48 83 ec 10 66 66 90 3f 02 75
05 8b 47 8c c9 c3 31 c0 80 3d 85 52 04 00 01 74 f3
RIP [] ieee80211_ave_rssi+0xd/0x50 [mac80211]
 RSP 
CR2: 
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 3.5.0 iwlagn AP crash...

2012-07-22 Thread Daniel J Blueman
On 22 July 2012 17:15, Daniel J Blueman  wrote:
> Hi Johannes et al,
>
> When running my Centrino Wireless-N 130 BGN (rev 0xb0) card in nl80211
> AP mode with hostapd on linux 3.5.0, I immediately hit this fatal
> pagefault [1].
>
> I can cook a debug kernel, reproduce, disassemble the code and do some
> quick analysis, if that helps get the ball rolling?
>
> Thanks!
>   Daniel
>
> --- [1]
>
> BUG: unable to handle kernel NULL pointer dereference at  (null)
> IP: [] ieee80211_ave_rssi+0xd/0x50 [mac80211]

>From my debug kernel, sdata is clearly NULL:

(gdb) list *0x815b74f8
0x815b74f8 is in ieee80211_ave_rssi (net/mac80211/util.c:1801).
1796int ieee80211_ave_rssi(struct ieee80211_vif *vif)
1797{
1798struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
1799struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1800
1801if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) {
1802/* non-managed type inferfaces */
1803return 0;
1804    }
1805return ifmgd->ave_beacon_signal;
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH, repost] Prevent interface errors with Seagate FreeAgent GoFlex

2012-07-22 Thread Daniel J Blueman
When using my Seagate FreeAgent GoFlex eSATAp external disk enclosure,
interface errors are always seen until 1.5Gbps is negotiated [1]. This
occurs using any disk in the enclosure, and when the disk is connected
directly with a generic passive eSATAp cable, we see stable 3Gbps
operation as expected.

Blacklist 3Gbps mode to avoid dataloss and the ~30s delay bus reset
and renegotiation incurs.

Signed-off-by: Daniel J Blueman 
---
 drivers/ata/libata-core.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 28db50b..0781510 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4119,6 +4119,7 @@ static const struct ata_blacklist_entry
ata_device_blacklist [] = {

/* Devices which aren't very happy with higher link speeds */
{ "WD My Book", NULL,   ATA_HORKAGE_1_5_GBPS, },
+   { "Seagate FreeAgent GoFlex",   NULL,   ATA_HORKAGE_1_5_GBPS, },

/*
 * Devices which choke on SETXFER.  Applies only if both the

--- [1]


ata5: exception Emask 0x10 SAct 0x0 SErr 0x405 action 0xe frozen
ata5: irq_stat 0x00400040, connection status changed
ata5: SError: { PHYRdyChg CommWake DevExch }
ata5: hard resetting link
ata5: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
ata5.00: ATA-8: Seagate FreeAgent GoFlex, 0110, max UDMA/133
ata5.00: 2930277168 sectors, multi 0: LBA48
ata5.00: configured for UDMA/133
ata5: EH complete
scsi 4:0:0:0: Direct-Access   ATA   Seagate FreeAgen 0110 PQ: 0 ANSI: 5
sd 4:0:0:0: [sdb] 2930277168 512-byte logical blocks: (1.50 TB/1.36 TiB)
sd 4:0:0:0: [sdb] 4096-byte physical blocks
sd 4:0:0:0: [sdb] Write Protect is off
sd 4:0:0:0: [sdb] Mode Sense: 00 3a 00 00
sd 4:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't
support DPO or FUA
sd 4:0:0:0: Attached scsi generic sg1 type 0
 sdb: unknown partition table
sd 4:0:0:0: [sdb] Attached SCSI disk
EXT4-fs (dm-0): mounted filesystem with ordered data mode. Opts: (null)
ata5.00: exception Emask 0x10 SAct 0x0 SErr 0x400100 action 0x6 frozen
ata5.00: irq_stat 0x0800, interface fatal error
ata5: SError: { UnrecovData Handshk }
ata5.00: failed command: WRITE DMA EXT
ata5.00: cmd 35/00:00:38:db:61/00:04:6d:00:00/e0 tag 0 dma 524288 out
 res 50/00:00:37:db:61/00:00:6d:00:00/e0 Emask 0x10 (ATA bus error)
ata5.00: status: { DRDY }
ata5: hard resetting link
ata5: link is slow to respond, please be patient (ready=0)
ata5: COMRESET failed (errno=-16)
ata5: hard resetting link
ata5: link is slow to respond, please be patient (ready=0)
ata5: COMRESET failed (errno=-16)
ata5: hard resetting link
ata5: link is slow to respond, please be patient (ready=0)
ata5: COMRESET failed (errno=-16)
ata5: limiting SATA link speed to 1.5 Gbps
ata5: hard resetting link
ata5: SATA link up 1.5 Gbps (SStatus 113 SControl 310)
ata5.00: configured for UDMA/133
sd 4:0:0:0: [sdb]
Result: hostbyte=0x00 driverbyte=0x08
sd 4:0:0:0: [sdb]
Sense Key : 0xb [current] [descriptor]
Descriptor sense data with sense descriptors (in hex):
72 0b 00 00 00 00 00 0c 00 0a 80 00 00 00 00 00
6d 61 db 37
sd 4:0:0:0: [sdb]
ASC=0x0 ASCQ=0x0
sd 4:0:0:0: [sdb] CDB:
cdb[0]=0x2a: 2a 00 6d 61 db 38 00 04 00 00
end_request: I/O error, dev sdb, sector 1835129656
Buffer I/O error on device dm-0, logical block 229390950
Buffer I/O error on device dm-0, logical block 229390951
Buffer I/O error on device dm-0, logical block 229390952
Buffer I/O error on device dm-0, logical block 229390953
Buffer I/O error on device dm-0, logical block 229390954
Buffer I/O error on device dm-0, logical block 229390955
Buffer I/O error on device dm-0, logical block 229390956
Buffer I/O error on device dm-0, logical block 229390957
Buffer I/O error on device dm-0, logical block 229390958
Buffer I/O error on device dm-0, logical block 229390959
Buffer I/O error on device dm-0, logical block 229390960
Buffer I/O error on device dm-0, logical block 229390961
Buffer I/O error on device dm-0, logical block 229390962
Buffer I/O error on device dm-0, logical block 229390963
Buffer I/O error on device dm-0, logical block 229390964
Buffer I/O error on device dm-0, logical block 229390965
Buffer I/O error on device dm-0, logical block 229390966
Buffer I/O error on device dm-0, logical block 229390967
Buffer I/O error on device dm-0, logical block 229390968
Buffer I/O error on device dm-0, logical block 229390969
Buffer I/O error on device dm-0, logical block 229390970
Buffer I/O error on device dm-0, logical block 229390971
Buffer I/O error on device dm-0, logical block 229390972
Buffer I/O error on device dm-0, logical block 229390973
Buffer I/O error on device dm-0, logical block 229390974
Buffer I/O error on device dm-0, logical block 229390975
Buffer I/O error on device dm-0, logical block 229390976
Buffer I/O error on device dm-0, logical block 229390977
Buffer I/O error on device dm-0, logical block 229390978
Buffer I/O error on device dm-0, lo

[PATCH v3] Add device ID for Bluetooth on Macbook Pro 2012

2012-08-16 Thread Daniel J Blueman
Add the device ID for supporting the Macbook Pro 2012 'MacBookPro10,1'.
The bluetooth device presents itself as:

T:  Bus=02 Lev=04 Prnt=04 Port=02 Cnt=03 Dev#=  8 Spd=12  MxCh= 0
D:  Ver= 2.00 Cls=ff(vend.) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=05ac ProdID=8286 Rev=00.86
S:  Manufacturer=Apple Inc.
S:  Product=Bluetooth USB Host Controller
C:  #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=0mA
I:  If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none)
I:  If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none)
I:  If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
I:  If#= 3 Alt= 0 #EPs= 0 Cls=fe(app. ) Sub=01 Prot=01 Driver=(none)

Patch originally written and tested by clipcarl (forums.opensuse.org).

Signed-off-by: Daniel J Blueman 
Signed-off-by: Henrik Rydberg 
---
 drivers/bluetooth/btusb.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index e272214..61f4eb7 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -76,6 +76,9 @@ static struct usb_device_id btusb_table[] = {
/* Apple MacMini5,1 */
{ USB_DEVICE(0x05ac, 0x8281) },
 
+   /* Apple MacBookPro10,1 */
+   { USB_DEVICE(0x05ac, 0x8286) },
+
/* AVM BlueFRITZ! USB v2.0 */
{ USB_DEVICE(0x057c, 0x3800) },
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


nouveau Macbook EDID fix upstreaming...

2012-08-17 Thread Daniel J Blueman
It looks like Ryan's Nouveau patch [1,2] to get the I2C working for
EDID (thus modelines) is crucial for avoiding the Nvidia binary
drivers on MacbookPro 2012s.

Any plans/chance for it to be upstreamed to eg 3.6-rc3?

Many thanks,
  Daniel

--- [1]

diff -urNp a/drivers/gpu/drm/nouveau/nouveau_bios.c
b/drivers/gpu/drm/nouveau/nouveau_bios.c
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c2012-07-08 22:48:05.589828510 
-0500
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c2012-07-15 00:28:18.578864693 
-0500
@@ -6461,6 +6461,9 @@ nouveau_run_vbios_init(struct drm_device
}
}

+   if (!bios->execute)
+   nouveau_gpio_reset(dev);
+
return ret;
 }

[2] https://bugs.freedesktop.org/show_bug.cgi?id=51971
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Add MSI support for SDHCI PCI hosts

2012-08-17 Thread Daniel J Blueman
Allow module parameter 'enable_msi' to request an MSI interrupt for
hosts where available (presently PCI). Useful as a workaround on
platforms where the legacy interrupt is broken.

Signed-off-by: Daniel J Blueman 
---
 drivers/mmc/host/sdhci-pci.c |   30 ++
 drivers/mmc/host/sdhci.c |   23 +++
 drivers/mmc/host/sdhci.h |2 ++
 3 files changed, 55 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 504da71..fbde589 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -934,6 +934,34 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host)
return 0;
 }
 
+static int sdhci_pci_enable_msi(struct sdhci_host *host)
+{
+   struct sdhci_pci_slot *slot;
+   struct pci_dev *pdev;
+   int ret;
+
+   slot = sdhci_priv(host);
+   pdev = slot->chip->pdev;
+
+   ret = pci_enable_msi(pdev);
+   if (ret)
+   return ret;
+
+   host->irq = pdev->irq;
+   return 0;
+}
+
+static void sdhci_pci_disable_msi(struct sdhci_host *host)
+{
+   struct sdhci_pci_slot *slot;
+   struct pci_dev *pdev;
+
+   slot = sdhci_priv(host);
+   pdev = slot->chip->pdev;
+
+   pci_disable_msi(pdev);
+}
+
 static int sdhci_pci_8bit_width(struct sdhci_host *host, int width)
 {
u8 ctrl;
@@ -976,6 +1004,8 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host)
 
 static struct sdhci_ops sdhci_pci_ops = {
.enable_dma = sdhci_pci_enable_dma,
+   .enable_msi = sdhci_pci_enable_msi,
+   .disable_msi= sdhci_pci_disable_msi,
.platform_8bit_width= sdhci_pci_8bit_width,
.hw_reset   = sdhci_pci_hw_reset,
 };
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 9a11dc3..9fa2180 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -45,6 +45,7 @@
 
 static unsigned int debug_quirks = 0;
 static unsigned int debug_quirks2;
+static bool enable_msi;
 
 static void sdhci_finish_data(struct sdhci_host *);
 
@@ -2433,6 +2434,9 @@ int sdhci_suspend_host(struct sdhci_host *host)
 
free_irq(host->irq, host);
 
+   if (host->ops->disable_msi && enable_msi)
+   host->ops->disable_msi(host);
+
return ret;
 }
 
@@ -2447,6 +2451,12 @@ int sdhci_resume_host(struct sdhci_host *host)
host->ops->enable_dma(host);
}
 
+   if (host->ops->enable_msi && enable_msi) {
+   ret = host->ops->enable_msi(host);
+   if (ret)
+   return ret;
+   }
+
ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
  mmc_hostname(host->mmc), host);
if (ret)
@@ -3024,6 +3034,12 @@ int sdhci_add_host(struct sdhci_host *host)
host->tuning_timer.function = sdhci_tuning_timer;
}
 
+   if (host->ops->enable_msi && enable_msi) {
+   ret = host->ops->enable_msi(host);
+   if (ret)
+   return ret;
+   }
+
ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
mmc_hostname(mmc), host);
if (ret) {
@@ -3071,6 +3087,8 @@ int sdhci_add_host(struct sdhci_host *host)
 reset:
sdhci_reset(host, SDHCI_RESET_ALL);
free_irq(host->irq, host);
+   if (host->ops->disable_msi && enable_msi)
+   host->ops->disable_msi(host);
 #endif
 untasklet:
tasklet_kill(&host->card_tasklet);
@@ -3114,6 +3132,9 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
free_irq(host->irq, host);
 
+   if (host->ops->disable_msi && enable_msi)
+   host->ops->disable_msi(host);
+
del_timer_sync(&host->timer);
 
tasklet_kill(&host->card_tasklet);
@@ -3162,6 +3183,7 @@ module_exit(sdhci_drv_exit);
 
 module_param(debug_quirks, uint, 0444);
 module_param(debug_quirks2, uint, 0444);
+module_param(enable_msi, bool, 0444);
 
 MODULE_AUTHOR("Pierre Ossman ");
 MODULE_DESCRIPTION("Secure Digital Host Controller Interface core driver");
@@ -3169,3 +3191,4 @@ MODULE_LICENSE("GPL");
 
 MODULE_PARM_DESC(debug_quirks, "Force certain quirks.");
 MODULE_PARM_DESC(debug_quirks2, "Force certain other quirks.");
+MODULE_PARM_DESC(debug_quirks2, "Enable MSI interrupt support where 
possible.");
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 97653ea..df4e003 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -264,6 +264,8 @@ struct sdhci_ops {
void(*set_clock)(struct sdhci_host *host, unsigned int clock);
 
int (*enable_dma)(struct sdhci_host *host);
+   int (*enable_msi)(struct sd

  1   2   3   4   >