On Tue, 27 Mar 2018, Li-Wen Hsu wrote:

On Mon, Mar 26, 2018 at 20:35:12 -1000, Jeff Roberson wrote:
The patch has been on my branch for weeks and has been tested by a half
dozen people.  I'm sorry it does not work for you.  If you reverted 331605
the change that followed should not have built properly.  Did you build
cleanly?  Can you share your kernel config?

I tried with and without EARLY_AP_STARTUP and with and without NUMA.  I'm
not having any trouble booting.  Did you make cleandepend && make depend?

It also hangs in our testing system:

https://ci.freebsd.org/job/FreeBSD-head-amd64-test/6817/console
https://ci.freebsd.org/job/FreeBSD-head-i386-test/1000/console

It is built cleanly and uses unmodified GENERIC config.

The artifacts used are available here:

https://artifact.ci.freebsd.org/snapshot/head/r331606/amd64/amd64/
https://artifact.ci.freebsd.org/snapshot/head/r331606/i386/i386/

Hope these information help.

Could someone who was experiencing the hang try the enclosed patch? I can only verify that it continues to boot for me but I believe this fixes the bug on other systems.

I believe the issue was that cpuset_domain[0] was initialized too late on some systems. It depended on what kind of hardware was present and which sysinit with SI_ORDER_ANY ran first.

Thanks,
Jeff


Li-Wen

--
Li-Wen Hsu <lw...@freebsd.org>
https://lwhsu.org
Index: amd64/include/intr_machdep.h
===================================================================
--- amd64/include/intr_machdep.h        (revision 331610)
+++ amd64/include/intr_machdep.h        (working copy)
@@ -132,6 +132,7 @@ struct intsrc {
        u_long *is_straycount;
        u_int is_index;
        u_int is_handlers;
+       u_int is_domain;
        u_int is_cpu;
 };
 
@@ -168,7 +169,7 @@ void        intr_add_cpu(u_int cpu);
 #endif
 int    intr_add_handler(const char *name, int vector, driver_filter_t filter, 
                         driver_intr_t handler, void *arg, enum intr_type 
flags, 
-                        void **cookiep);    
+                        void **cookiep, int domain);    
 #ifdef SMP
 int    intr_bind(u_int vector, u_char cpu);
 #endif
@@ -176,7 +177,7 @@ int intr_config_intr(int vector, enum intr_trigger
     enum intr_polarity pol);
 int    intr_describe(u_int vector, void *ih, const char *descr);
 void   intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
-u_int  intr_next_cpu(void);
+u_int  intr_next_cpu(int domain);
 struct intsrc *intr_lookup_source(int vector);
 int    intr_register_pic(struct pic *pic);
 int    intr_register_source(struct intsrc *isrc);
Index: i386/include/intr_machdep.h
===================================================================
--- i386/include/intr_machdep.h (revision 331610)
+++ i386/include/intr_machdep.h (working copy)
@@ -132,6 +132,7 @@ struct intsrc {
        u_long *is_straycount;
        u_int is_index;
        u_int is_handlers;
+       u_int is_domain;
        u_int is_cpu;
 };
 
@@ -158,7 +159,8 @@ void        elcr_write_trigger(u_int irq, enum intr_trigg
 void   intr_add_cpu(u_int cpu);
 #endif
 int    intr_add_handler(const char *name, int vector, driver_filter_t filter,
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
+    int domain);
 #ifdef SMP
 int    intr_bind(u_int vector, u_char cpu);
 #endif
@@ -166,7 +168,7 @@ int intr_config_intr(int vector, enum intr_trigger
     enum intr_polarity pol);
 int    intr_describe(u_int vector, void *ih, const char *descr);
 void   intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
-u_int  intr_next_cpu(void);
+u_int  intr_next_cpu(int domain);
 struct intsrc *intr_lookup_source(int vector);
 int    intr_register_pic(struct pic *pic);
 int    intr_register_source(struct intsrc *isrc);
Index: kern/kern_cpuset.c
===================================================================
--- kern/kern_cpuset.c  (revision 331604)
+++ kern/kern_cpuset.c  (working copy)
@@ -1363,6 +1363,7 @@ cpuset_thread0(void)
 {
        struct cpuset *set;
        int error;
+       int i;
 
        cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL,
            NULL, NULL, UMA_ALIGN_PTR, 0);
@@ -1374,11 +1375,11 @@ cpuset_thread0(void)
         * cpuset_create() due to NULL parent.
         */
        set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
-       CPU_FILL(&set->cs_mask);
+       CPU_COPY(&all_cpus, &set->cs_mask);
        LIST_INIT(&set->cs_children);
        LIST_INSERT_HEAD(&cpuset_ids, set, cs_link);
        set->cs_ref = 1;
-       set->cs_flags = CPU_SET_ROOT;
+       set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY;
        set->cs_domain = &domainset0;
        cpuset_zero = set;
        cpuset_root = &set->cs_mask;
@@ -1396,6 +1397,16 @@ cpuset_thread0(void)
         */
        cpuset_unr = new_unrhdr(2, INT_MAX, NULL);
 
+       /*
+        * If MD code has not initialized per-domain cpusets, place all
+        * CPUs in domain 0.
+        */
+       for (i = 0; i < MAXMEMDOM; i++)
+               if (!CPU_EMPTY(&cpuset_domain[i]))
+                       goto domains_set;
+       CPU_COPY(&all_cpus, &cpuset_domain[0]);
+domains_set:
+
        return (set);
 }
 
@@ -1447,34 +1458,6 @@ cpuset_setproc_update_set(struct proc *p, struct c
        return (0);
 }
 
-/*
- * This is called once the final set of system cpus is known.  Modifies
- * the root set and all children and mark the root read-only.  
- */
-static void
-cpuset_init(void *arg)
-{
-       cpuset_t mask;
-       int i;
-
-       mask = all_cpus;
-       if (cpuset_modify(cpuset_zero, &mask))
-               panic("Can't set initial cpuset mask.\n");
-       cpuset_zero->cs_flags |= CPU_SET_RDONLY;
-
-       /*
-        * If MD code has not initialized per-domain cpusets, place all
-        * CPUs in domain 0.
-        */
-       for (i = 0; i < MAXMEMDOM; i++)
-               if (!CPU_EMPTY(&cpuset_domain[i]))
-                       goto domains_set;
-       CPU_COPY(&all_cpus, &cpuset_domain[0]);
-domains_set:
-       return;
-}
-SYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL);
-
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_args {
        cpusetid_t      *setid;
Index: x86/x86/intr_machdep.c
===================================================================
--- x86/x86/intr_machdep.c      (revision 331610)
+++ x86/x86/intr_machdep.c      (working copy)
@@ -71,6 +71,8 @@
 #include <isa/isareg.h>
 #endif
 
+#include <vm/vm.h>
+
 #define        MAX_STRAY_LOG   5
 
 typedef void (*mask_fn)(void *);
@@ -185,7 +187,8 @@ intr_lookup_source(int vector)
 
 int
 intr_add_handler(const char *name, int vector, driver_filter_t filter,
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
+    int domain)
 {
        struct intsrc *isrc;
        int error;
@@ -200,6 +203,7 @@ intr_add_handler(const char *name, int vector, dri
                intrcnt_updatename(isrc);
                isrc->is_handlers++;
                if (isrc->is_handlers == 1) {
+                       isrc->is_domain = domain;
                        isrc->is_pic->pic_enable_intr(isrc);
                        isrc->is_pic->pic_enable_source(isrc);
                }
@@ -507,14 +511,27 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  */
 
 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
-static int current_cpu;
+static int current_cpu[MAXMEMDOM];
 
+static void
+intr_init_cpus(void)
+{
+       int i;
+
+       for (i = 0; i < vm_ndomains; i++) {
+               current_cpu[i] = 0;
+               if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
+                   !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
+                       intr_next_cpu(i);
+       }
+}
+
 /*
  * Return the CPU that the next interrupt source should use.  For now
  * this just returns the next local APIC according to round-robin.
  */
 u_int
-intr_next_cpu(void)
+intr_next_cpu(int domain)
 {
        u_int apic_id;
 
@@ -529,12 +546,13 @@ u_int
 #endif
 
        mtx_lock_spin(&icu_lock);
-       apic_id = cpu_apic_ids[current_cpu];
+       apic_id = cpu_apic_ids[current_cpu[domain]];
        do {
-               current_cpu++;
-               if (current_cpu > mp_maxid)
-                       current_cpu = 0;
-       } while (!CPU_ISSET(current_cpu, &intr_cpus));
+               current_cpu[domain]++;
+               if (current_cpu[domain] > mp_maxid)
+                       current_cpu[domain] = 0;
+       } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
+           !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]));
        mtx_unlock_spin(&icu_lock);
        return (apic_id);
 }
@@ -568,7 +586,18 @@ intr_add_cpu(u_int cpu)
        CPU_SET(cpu, &intr_cpus);
 }
 
-#ifndef EARLY_AP_STARTUP
+#ifdef EARLY_AP_STARTUP
+static void
+intr_smp_startup(void *arg __unused)
+{
+
+       intr_init_cpus();
+       return;
+}
+SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
+    NULL);
+
+#else
 /*
  * Distribute all the interrupt sources among the available CPUs once the
  * AP's have been launched.
@@ -580,6 +609,7 @@ intr_shuffle_irqs(void *arg __unused)
        u_int cpu;
        int i;
 
+       intr_init_cpus();
        /* Don't bother on UP. */
        if (mp_ncpus == 1)
                return;
@@ -599,12 +629,12 @@ intr_shuffle_irqs(void *arg __unused)
                         */
                        cpu = isrc->is_event->ie_cpu;
                        if (cpu == NOCPU)
-                               cpu = current_cpu;
+                               cpu = current_cpu[isrc->is_domain];
                        if (isrc->is_pic->pic_assign_cpu(isrc,
                            cpu_apic_ids[cpu]) == 0) {
                                isrc->is_cpu = cpu;
                                if (isrc->is_event->ie_cpu == NOCPU)
-                                       intr_next_cpu();
+                                       intr_next_cpu(isrc->is_domain);
                        }
                }
        }
@@ -635,10 +665,11 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
                isrc = interrupt_sources[i];
                if (isrc == NULL)
                        continue;
-               sbuf_printf(&sbuf, "%s:%d @%d: %ld\n",
+               sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
                    isrc->is_event->ie_fullname,
                    isrc->is_index,
                    isrc->is_cpu,
+                   isrc->is_domain,
                    *isrc->is_count);
        }
 
@@ -697,7 +728,7 @@ intr_balance(void *dummy __unused, int pending __u
         * Restart the scan from the same location to avoid moving in the
         * common case.
         */
-       current_cpu = 0;
+       intr_init_cpus();
 
        /*
         * Assign round-robin from most loaded to least.
@@ -706,8 +737,8 @@ intr_balance(void *dummy __unused, int pending __u
                isrc = interrupt_sorted[i];
                if (isrc == NULL  || isrc->is_event->ie_cpu != NOCPU)
                        continue;
-               cpu = current_cpu;
-               intr_next_cpu();
+               cpu = current_cpu[isrc->is_domain];
+               intr_next_cpu(isrc->is_domain);
                if (isrc->is_cpu != cpu &&
                    isrc->is_pic->pic_assign_cpu(isrc,
                    cpu_apic_ids[cpu]) == 0)
@@ -735,7 +766,7 @@ SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_AN
  * Always route interrupts to the current processor in the UP case.
  */
 u_int
-intr_next_cpu(void)
+intr_next_cpu(int domain)
 {
 
        return (PCPU_GET(apic_id));
Index: x86/x86/io_apic.c
===================================================================
--- x86/x86/io_apic.c   (revision 331610)
+++ x86/x86/io_apic.c   (working copy)
@@ -499,7 +499,7 @@ ioapic_enable_intr(struct intsrc *isrc)
        struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 
        if (intpin->io_vector == 0)
-               if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0)
+               if (ioapic_assign_cpu(isrc, intr_next_cpu(isrc->is_domain)) != 
0)
                        panic("Couldn't find an APIC vector for IRQ %d",
                            intpin->io_irq);
        apic_enable_vector(intpin->io_cpu, intpin->io_vector);
Index: x86/x86/msi.c
===================================================================
--- x86/x86/msi.c       (revision 331610)
+++ x86/x86/msi.c       (working copy)
@@ -363,7 +363,7 @@ int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
        struct msi_intsrc *msi, *fsrc;
-       u_int cpu;
+       u_int cpu, domain;
        int cnt, i, *mirqs, vector;
 #ifdef ACPI_DMAR
        u_int cookies[count];
@@ -373,6 +373,9 @@ msi_alloc(device_t dev, int count, int maxcount, i
        if (!msi_enabled)
                return (ENXIO);
 
+       if (bus_get_domain(dev, &domain) != 0)
+               domain = 0;
+
        if (count > 1)
                mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
        else
@@ -420,7 +423,7 @@ again:
        KASSERT(cnt == count, ("count mismatch"));
 
        /* Allocate 'count' IDT vectors. */
-       cpu = intr_next_cpu();
+       cpu = intr_next_cpu(domain);
        vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
        if (vector == 0) {
                mtx_unlock(&msi_lock);
@@ -610,7 +613,7 @@ int
 msix_alloc(device_t dev, int *irq)
 {
        struct msi_intsrc *msi;
-       u_int cpu;
+       u_int cpu, domain;
        int i, vector;
 #ifdef ACPI_DMAR
        u_int cookie;
@@ -620,6 +623,9 @@ msix_alloc(device_t dev, int *irq)
        if (!msi_enabled)
                return (ENXIO);
 
+       if (bus_get_domain(dev, &domain) != 0)
+               domain = 0;
+
 again:
        mtx_lock(&msi_lock);
 
@@ -651,7 +657,7 @@ again:
        }
 
        /* Allocate an IDT vector. */
-       cpu = intr_next_cpu();
+       cpu = intr_next_cpu(domain);
        vector = apic_alloc_vector(cpu, i);
        if (vector == 0) {
                mtx_unlock(&msi_lock);
Index: x86/x86/nexus.c
===================================================================
--- x86/x86/nexus.c     (revision 331610)
+++ x86/x86/nexus.c     (working copy)
@@ -573,7 +573,7 @@ nexus_setup_intr(device_t bus, device_t child, str
                 int flags, driver_filter_t filter, void (*ihand)(void *),
                 void *arg, void **cookiep)
 {
-       int             error;
+       int             error, domain;
 
        /* somebody tried to setup an irq that failed to allocate! */
        if (irq == NULL)
@@ -589,9 +589,11 @@ nexus_setup_intr(device_t bus, device_t child, str
        error = rman_activate_resource(irq);
        if (error)
                return (error);
+       if (bus_get_domain(child, &domain) != 0)
+               domain = 0;
 
        error = intr_add_handler(device_get_nameunit(child),
-           rman_get_start(irq), filter, ihand, arg, flags, cookiep);
+           rman_get_start(irq), filter, ihand, arg, flags, cookiep, domain);
 
        return (error);
 }
Index: x86/xen/xen_intr.c
===================================================================
--- x86/xen/xen_intr.c  (revision 331610)
+++ x86/xen/xen_intr.c  (working copy)
@@ -430,7 +430,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_
                 * unless specified otherwise, so shuffle them to balance
                 * the interrupt load.
                 */
-               xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu());
+               xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0));
        }
 #endif
 
@@ -1562,7 +1562,7 @@ xen_intr_add_handler(const char *name, driver_filt
                return (EINVAL);
 
        error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg,
-           flags|INTR_EXCL, &isrc->xi_cookie);
+           flags|INTR_EXCL, &isrc->xi_cookie, 0);
        if (error != 0) {
                printf(
                    "%s: xen_intr_add_handler: intr_add_handler failed: %d\n",
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to