Hi, can you boot a GENERIC.MP kernel with this diff, then show your
dmesg?  Your bsd kernel was failing to start cpu1.

On Sun, 09 Apr 2023 19:35:25 +0000
Eduardo Pires <eduardo.pi...@protonmail.com> wrote:

> cpu1 at mainbus0 pir c: IBM POWER9 2.3, 2700 MHz
> cpu1: 32KB 128b/line 8-way L1 I-cache, 32KB 128b/line 8-way L1 D-cache
> cpu1: 512KB 128b/line 8-way L2 cache
> cpu1: 10MB 128b/line 8-way L3 cache[ 8556.053449291,3] OPAL: CPU not active 
> in OPAL !
>  failed to identify
> cpu2 at mainbus0 pir 14: IBM POWER9 2.3, 2700 MHz
> ...
> opal0: idle psscr 300332
> [ 8566.060016341,3] OPAL: CPU not active in OPAL !
> [ 8566.060065723,3] OPAL: CPU not active in OPAL !
> [ 8566.060096675,3] OPAL: CPU not active in OPAL !
> opalcons0 at opal0

>From the "failed to identify", I deduce that bsd failed to start cpu1,
then got stuck at "root on sd0a...", because we are waiting for cpu1
to finish starting.  This diff checks for CPUF_PRESENT, so it might
skip waiting for cpu1.

To run this diff, you need to build a kernel, or ask me to send my
7.3-current kernel to you.  If you can boot the single-core bsd.sp,
you can use it to install a different kernel.  After petitboot,
at OpenBSD's "boot>", type "boot bsd.sp".

--George

Index: arch/powerpc64/dev/opal.c
===================================================================
RCS file: /cvs/src/sys/arch/powerpc64/dev/opal.c,v
retrieving revision 1.14
diff -u -p -r1.14 opal.c
--- arch/powerpc64/dev/opal.c   12 Oct 2022 13:39:50 -0000      1.14
+++ arch/powerpc64/dev/opal.c   10 Apr 2023 02:50:29 -0000
@@ -385,6 +385,7 @@ void
 opal_found_stop_state(struct opal_softc *sc, uint64_t state)
 {
 #ifdef MULTIPROCESSOR
+       int64_t error;
        uint32_t pirs[8];
        int i, len, node;
        char buf[32];
@@ -411,9 +412,17 @@ opal_found_stop_state(struct opal_softc 
                    pirs, sizeof(pirs));
                if (len > 0 && len % 4 == 0) {
                        /* Skip i = 0, the first hardware thread. */
-                       for (i = 1; i < len / 4; i++)
-                               opal_start_cpu(pirs[i],
+                       for (i = 1; i < len / 4; i++) {
+                               error = opal_start_cpu(pirs[i],
                                    (vaddr_t)cpu_hatch_and_stop);
+                               if (error != OPAL_SUCCESS) {
+                                       printf("%s: failed to idle "
+                                           "pir %u, error %lld\n",
+                                           sc->sc_dev.dv_xname,
+                                           (unsigned int)pirs[i],
+                                           (long long)error);
+                               }
+                       }
                }
        }
 #endif
Index: arch/powerpc64/powerpc64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/powerpc64/powerpc64/cpu.c,v
retrieving revision 1.25
diff -u -p -r1.25 cpu.c
--- arch/powerpc64/powerpc64/cpu.c      25 Jan 2023 09:53:53 -0000      1.25
+++ arch/powerpc64/powerpc64/cpu.c      10 Apr 2023 02:50:29 -0000
@@ -187,6 +187,7 @@ cpu_attach(struct device *parent, struct
 
 #ifdef MULTIPROCESSOR
        if (dev->dv_unit != 0) {
+               int64_t error;
                int timeout = 10000;
 
                sched_init_cpu(ci);
@@ -195,16 +196,22 @@ cpu_attach(struct device *parent, struct
                ci->ci_initstack_end = km_alloc(PAGE_SIZE, &kv_any, &kp_zero,
                    &kd_waitok) + PAGE_SIZE;
 
-               opal_start_cpu(ci->ci_pir, (vaddr_t)cpu_hatch);
+               error = opal_start_cpu(ci->ci_pir, (vaddr_t)cpu_hatch);
 
-               atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
-               membar_sync();
-
-               while ((ci->ci_flags & CPUF_IDENTIFIED) == 0 &&
-                   --timeout)
-                       delay(1000);
-               if (timeout == 0) {
-                       printf(" failed to identify");
+               if (error == OPAL_SUCCESS) {
+                       atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
+                       membar_sync();
+
+                       while ((ci->ci_flags & CPUF_IDENTIFIED) == 0 &&
+                           --timeout)
+                               delay(1000);
+                       if (timeout == 0) {
+                               printf(" failed to identify");
+                               ci->ci_flags = 0;
+                       }
+               } else {
+                       printf(" failed to start, error %lld",
+                           (long long)error);
                        ci->ci_flags = 0;
                }
        }
@@ -383,6 +390,8 @@ cpu_boot_secondary_processors(void)
                    IPL_IPI, ci, cpu_intr, ci, ci->ci_dev->dv_xname);
 
                if (CPU_IS_PRIMARY(ci))
+                       continue;
+               if ((ci->ci_flags & CPUF_PRESENT) == 0)
                        continue;
 
                ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;

Reply via email to