Author: jhb
Date: Tue Mar 16 16:01:19 2010
New Revision: 205214
URL: http://svn.freebsd.org/changeset/base/205214

Log:
  - Extend the machine check record structure to include several fields useful
    for parsing model-specific and other fields in machine check events
    including the global machine check capabilities and status registers,
    CPU identification, and the FreeBSD CPU ID.
  - Report these added fields in the console log of a machine check so that
    a record structure can be reconstituted from the console messages.
  - Parse new architectural errors including memory controller errors.
  
  MFC after:    1 week

Modified:
  head/sys/amd64/amd64/mca.c
  head/sys/amd64/include/mca.h
  head/sys/amd64/include/specialreg.h
  head/sys/i386/i386/mca.c
  head/sys/i386/include/mca.h
  head/sys/i386/include/specialreg.h

Modified: head/sys/amd64/amd64/mca.c
==============================================================================
--- head/sys/amd64/amd64/mca.c  Tue Mar 16 15:55:41 2010        (r205213)
+++ head/sys/amd64/amd64/mca.c  Tue Mar 16 16:01:19 2010        (r205214)
@@ -186,19 +186,46 @@ mca_error_request(uint16_t mca_error)
        return ("???");
 }
 
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+       switch ((mca_error & 0x70) >> 4) {
+       case 0x0:
+               return ("GEN");
+       case 0x1:
+               return ("RD");
+       case 0x2:
+               return ("WR");
+       case 0x3:
+               return ("AC");
+       case 0x4:
+               return ("MS");
+       }
+       return ("???");
+}
+
 /* Dump details about a single machine check. */
 static void __nonnull(1)
 mca_log(const struct mca_record *rec)
 {
        uint16_t mca_error;
 
-       printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+       printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
            (long long)rec->mr_status);
-       printf("MCA: CPU %d ", rec->mr_apic_id);
+       printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+           (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+       printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+           rec->mr_cpu_id, rec->mr_apic_id);
+       printf("MCA: CPU %d ", rec->mr_cpu);
        if (rec->mr_status & MC_STATUS_UC)
                printf("UNCOR ");
-       else
+       else {
                printf("COR ");
+               if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+                       printf("(%lld) ", ((long long)rec->mr_status &
+                           MC_STATUS_COR_COUNT) >> 38);
+       }
        if (rec->mr_status & MC_STATUS_PCC)
                printf("PCC ");
        if (rec->mr_status & MC_STATUS_OVER)
@@ -221,6 +248,9 @@ mca_log(const struct mca_record *rec)
        case 0x0004:
                printf("FRC error");
                break;
+       case 0x0005:
+               printf("internal parity error");
+               break;
        case 0x0400:
                printf("internal timer error");
                break;
@@ -245,6 +275,17 @@ mca_log(const struct mca_record *rec)
                        break;
                }
 
+               /* Memory controller error. */
+               if ((mca_error & 0xef80) == 0x0080) {
+                       printf("%s channel ", mca_error_mmtype(mca_error));
+                       if ((mca_error & 0x000f) != 0x000f)
+                               printf("%d", mca_error & 0x000f);
+                       else
+                               printf("??");
+                       printf(" memory error");
+                       break;
+               }
+               
                /* Cache error. */
                if ((mca_error & 0xef00) == 0x0100) {
                        printf("%sCACHE %s %s error",
@@ -322,6 +363,11 @@ mca_check_status(int bank, struct mca_re
                rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
        rec->mr_tsc = rdtsc();
        rec->mr_apic_id = PCPU_GET(apic_id);
+       rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+       rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+       rec->mr_cpu_id = cpu_id;
+       rec->mr_cpu_vendor_id = cpu_vendor_id;
+       rec->mr_cpu = PCPU_GET(cpuid);
 
        /*
         * Clear machine check.  Don't do this for uncorrectable

Modified: head/sys/amd64/include/mca.h
==============================================================================
--- head/sys/amd64/include/mca.h        Tue Mar 16 15:55:41 2010        
(r205213)
+++ head/sys/amd64/include/mca.h        Tue Mar 16 16:01:19 2010        
(r205214)
@@ -37,6 +37,11 @@ struct mca_record {
        uint64_t        mr_tsc;
        int             mr_apic_id;
        int             mr_bank;
+       uint64_t        mr_mcg_cap;
+       uint64_t        mr_mcg_status;
+       int             mr_cpu_id;
+       int             mr_cpu_vendor_id;
+       int             mr_cpu;
 };
 
 #ifdef _KERNEL

Modified: head/sys/amd64/include/specialreg.h
==============================================================================
--- head/sys/amd64/include/specialreg.h Tue Mar 16 15:55:41 2010        
(r205213)
+++ head/sys/amd64/include/specialreg.h Tue Mar 16 16:01:19 2010        
(r205214)
@@ -267,6 +267,7 @@
 #define        MSR_MTRR16kBase         0x258
 #define        MSR_MTRR4kBase          0x268
 #define        MSR_PAT                 0x277
+#define        MSR_MC0_CTL2            0x280
 #define        MSR_MTRRdefType         0x2ff
 #define        MSR_MC0_CTL             0x400
 #define        MSR_MC0_STATUS          0x401
@@ -352,8 +353,10 @@
 #define        MCG_CAP_COUNT           0x000000ff
 #define        MCG_CAP_CTL_P           0x00000100
 #define        MCG_CAP_EXT_P           0x00000200
+#define        MCG_CAP_CMCI_P          0x00000400
 #define        MCG_CAP_TES_P           0x00000800
 #define        MCG_CAP_EXT_CNT         0x00ff0000
+#define        MCG_CAP_SER_P           0x01000000
 #define        MCG_STATUS_RIPV         0x00000001
 #define        MCG_STATUS_EIPV         0x00000002
 #define        MCG_STATUS_MCIP         0x00000004
@@ -363,9 +366,14 @@
 #define        MSR_MC_STATUS(x)        (MSR_MC0_STATUS + (x) * 4)
 #define        MSR_MC_ADDR(x)          (MSR_MC0_ADDR + (x) * 4)
 #define        MSR_MC_MISC(x)          (MSR_MC0_MISC + (x) * 4)
+#define        MSR_MC_CTL2(x)          (MSR_MC0_CTL2 + (x))    /* If 
MCG_CAP_CMCI_P */
 #define        MC_STATUS_MCA_ERROR     0x000000000000ffffUL
 #define        MC_STATUS_MODEL_ERROR   0x00000000ffff0000UL
 #define        MC_STATUS_OTHER_INFO    0x01ffffff00000000UL
+#define        MC_STATUS_COR_COUNT     0x001fffc000000000UL    /* If 
MCG_CAP_TES_P */
+#define        MC_STATUS_TES_STATUS    0x0060000000000000UL    /* If 
MCG_CAP_TES_P */
+#define        MC_STATUS_AR            0x0080000000000000UL    /* If 
MCG_CAP_CMCI_P */
+#define        MC_STATUS_S             0x0100000000000000UL    /* If 
MCG_CAP_CMCI_P */
 #define        MC_STATUS_PCC           0x0200000000000000UL
 #define        MC_STATUS_ADDRV         0x0400000000000000UL
 #define        MC_STATUS_MISCV         0x0800000000000000UL
@@ -373,6 +381,10 @@
 #define        MC_STATUS_UC            0x2000000000000000UL
 #define        MC_STATUS_OVER          0x4000000000000000UL
 #define        MC_STATUS_VAL           0x8000000000000000UL
+#define        MC_MISC_RA_LSB          0x000000000000003fUL    /* If 
MCG_CAP_SER_P */
+#define        MC_MISC_ADDRESS_MODE    0x00000000000001c0UL    /* If 
MCG_CAP_SER_P */
+#define        MC_CTL2_THRESHOLD       0x0000000000003fffUL
+#define        MC_CTL2_CMCI_EN         0x0000000040000000UL
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.

Modified: head/sys/i386/i386/mca.c
==============================================================================
--- head/sys/i386/i386/mca.c    Tue Mar 16 15:55:41 2010        (r205213)
+++ head/sys/i386/i386/mca.c    Tue Mar 16 16:01:19 2010        (r205214)
@@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error)
        return ("???");
 }
 
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+       switch ((mca_error & 0x70) >> 4) {
+       case 0x0:
+               return ("GEN");
+       case 0x1:
+               return ("RD");
+       case 0x2:
+               return ("WR");
+       case 0x3:
+               return ("AC");
+       case 0x4:
+               return ("MS");
+       }
+       return ("???");
+}
+
 /* Dump details about a single machine check. */
 static void __nonnull(1)
 mca_log(const struct mca_record *rec)
 {
        uint16_t mca_error;
 
-       printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+       printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
            (long long)rec->mr_status);
-       printf("MCA: CPU %d ", rec->mr_apic_id);
+       printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+           (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+       printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+           rec->mr_cpu_id, rec->mr_apic_id);
+       printf("MCA: CPU %d ", rec->mr_cpu);
        if (rec->mr_status & MC_STATUS_UC)
                printf("UNCOR ");
-       else
+       else {
                printf("COR ");
+               if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+                       printf("(%lld) ", ((long long)rec->mr_status &
+                           MC_STATUS_COR_COUNT) >> 38);
+       }
        if (rec->mr_status & MC_STATUS_PCC)
                printf("PCC ");
        if (rec->mr_status & MC_STATUS_OVER)
@@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec)
        case 0x0004:
                printf("FRC error");
                break;
+       case 0x0005:
+               printf("internal parity error");
+               break;
        case 0x0400:
                printf("internal timer error");
                break;
@@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec)
                        break;
                }
 
+               /* Memory controller error. */
+               if ((mca_error & 0xef80) == 0x0080) {
+                       printf("%s channel ", mca_error_mmtype(mca_error));
+                       if ((mca_error & 0x000f) != 0x000f)
+                               printf("%d", mca_error & 0x000f);
+                       else
+                               printf("??");
+                       printf(" memory error");
+                       break;
+               }
+               
                /* Cache error. */
                if ((mca_error & 0xef00) == 0x0100) {
                        printf("%sCACHE %s %s error",
@@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_re
                rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
        rec->mr_tsc = rdtsc();
        rec->mr_apic_id = PCPU_GET(apic_id);
+       rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+       rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+       rec->mr_cpu_id = cpu_id;
+       rec->mr_cpu_vendor_id = cpu_vendor_id;
+       rec->mr_cpu = PCPU_GET(cpuid);
 
        /*
         * Clear machine check.  Don't do this for uncorrectable

Modified: head/sys/i386/include/mca.h
==============================================================================
--- head/sys/i386/include/mca.h Tue Mar 16 15:55:41 2010        (r205213)
+++ head/sys/i386/include/mca.h Tue Mar 16 16:01:19 2010        (r205214)
@@ -37,6 +37,11 @@ struct mca_record {
        uint64_t        mr_tsc;
        int             mr_apic_id;
        int             mr_bank;
+       uint64_t        mr_mcg_cap;
+       uint64_t        mr_mcg_status;
+       int             mr_cpu_id;
+       int             mr_cpu_vendor_id;
+       int             mr_cpu;
 };
 
 #ifdef _KERNEL

Modified: head/sys/i386/include/specialreg.h
==============================================================================
--- head/sys/i386/include/specialreg.h  Tue Mar 16 15:55:41 2010        
(r205213)
+++ head/sys/i386/include/specialreg.h  Tue Mar 16 16:01:19 2010        
(r205214)
@@ -273,6 +273,7 @@
 #define        MSR_MTRR16kBase         0x258
 #define        MSR_MTRR4kBase          0x268
 #define        MSR_PAT                 0x277
+#define        MSR_MC0_CTL2            0x280
 #define        MSR_MTRRdefType         0x2ff
 #define        MSR_MC0_CTL             0x400
 #define        MSR_MC0_STATUS          0x401
@@ -421,8 +422,10 @@
 #define        MCG_CAP_COUNT           0x000000ff
 #define        MCG_CAP_CTL_P           0x00000100
 #define        MCG_CAP_EXT_P           0x00000200
+#define        MCG_CAP_CMCI_P          0x00000400
 #define        MCG_CAP_TES_P           0x00000800
 #define        MCG_CAP_EXT_CNT         0x00ff0000
+#define        MCG_CAP_SER_P           0x01000000
 #define        MCG_STATUS_RIPV         0x00000001
 #define        MCG_STATUS_EIPV         0x00000002
 #define        MCG_STATUS_MCIP         0x00000004
@@ -432,9 +435,14 @@
 #define        MSR_MC_STATUS(x)        (MSR_MC0_STATUS + (x) * 4)
 #define        MSR_MC_ADDR(x)          (MSR_MC0_ADDR + (x) * 4)
 #define        MSR_MC_MISC(x)          (MSR_MC0_MISC + (x) * 4)
+#define        MSR_MC_CTL2(x)          (MSR_MC0_CTL2 + (x))    /* If 
MCG_CAP_CMCI_P */
 #define        MC_STATUS_MCA_ERROR     0x000000000000ffffULL
 #define        MC_STATUS_MODEL_ERROR   0x00000000ffff0000ULL
 #define        MC_STATUS_OTHER_INFO    0x01ffffff00000000ULL
+#define        MC_STATUS_COR_COUNT     0x001fffc000000000ULL   /* If 
MCG_CAP_TES_P */
+#define        MC_STATUS_TES_STATUS    0x0060000000000000ULL   /* If 
MCG_CAP_TES_P */
+#define        MC_STATUS_AR            0x0080000000000000ULL   /* If 
MCG_CAP_CMCI_P */
+#define        MC_STATUS_S             0x0100000000000000ULL   /* If 
MCG_CAP_CMCI_P */
 #define        MC_STATUS_PCC           0x0200000000000000ULL
 #define        MC_STATUS_ADDRV         0x0400000000000000ULL
 #define        MC_STATUS_MISCV         0x0800000000000000ULL
@@ -442,6 +450,10 @@
 #define        MC_STATUS_UC            0x2000000000000000ULL
 #define        MC_STATUS_OVER          0x4000000000000000ULL
 #define        MC_STATUS_VAL           0x8000000000000000ULL
+#define        MC_MISC_RA_LSB          0x000000000000003fULL   /* If 
MCG_CAP_SER_P */
+#define        MC_MISC_ADDRESS_MODE    0x00000000000001c0ULL   /* If 
MCG_CAP_SER_P */
+#define        MC_CTL2_THRESHOLD       0x0000000000003fffULL
+#define        MC_CTL2_CMCI_EN         0x0000000040000000ULL
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to