[PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs

2021-08-07 Thread Cédric Le Goater
On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at
runtime. Today, the IPI is not created for such nodes, and hot-plugged
CPUs use a bogus IPI, which leads to soft lockups.

We can not directly allocate and request the IPI on demand because
bringup_up() is called under the IRQ sparse lock. The alternative is
to allocate the IPIs for all possible nodes at startup and to request
the mapping on demand when the first CPU of a node is brought up.

Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node")
Cc: sta...@vger.kernel.org # v5.13
Reported-by: Geetika Moolchandani 
Cc: Srikar Dronamraju 
Cc: Laurent Vivier 
Signed-off-by: Cédric Le Goater 
Message-Id: <20210629131542.743888-1-...@kaod.org>
Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/sysdev/xive/common.c | 35 +--
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c 
b/arch/powerpc/sysdev/xive/common.c
index dbdbbc2f1dc5..943fd30095af 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain;
 static struct xive_ipi_desc {
unsigned int irq;
char name[16];
+   atomic_t started;
 } *xive_ipis;
 
 /*
@@ -1120,7 +1121,7 @@ static const struct irq_domain_ops 
xive_ipi_irq_domain_ops = {
.alloc  = xive_ipi_irq_domain_alloc,
 };
 
-static int __init xive_request_ipi(void)
+static int __init xive_init_ipis(void)
 {
struct fwnode_handle *fwnode;
struct irq_domain *ipi_domain;
@@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void)
struct xive_ipi_desc *xid = &xive_ipis[node];
struct xive_ipi_alloc_info info = { node };
 
-   /* Skip nodes without CPUs */
-   if (cpumask_empty(cpumask_of_node(node)))
-   continue;
-
/*
 * Map one IPI interrupt per node for all cpus of that node.
 * Since the HW interrupt number doesn't have any meaning,
@@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void)
xid->irq = ret;
 
snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
-
-   ret = request_irq(xid->irq, xive_muxed_ipi_action,
- IRQF_PERCPU | IRQF_NO_THREAD, xid->name, 
NULL);
-
-   WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
}
 
return ret;
@@ -1178,6 +1170,22 @@ static int __init xive_request_ipi(void)
return ret;
 }
 
+static int __init xive_request_ipi(unsigned int cpu)
+{
+   struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)];
+   int ret;
+
+   if (atomic_inc_return(&xid->started) > 1)
+   return 0;
+
+   ret = request_irq(xid->irq, xive_muxed_ipi_action,
+ IRQF_PERCPU | IRQF_NO_THREAD,
+ xid->name, NULL);
+
+   WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
+   return ret;
+}
+
 static int xive_setup_cpu_ipi(unsigned int cpu)
 {
unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
@@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
if (xc->hw_ipi != XIVE_BAD_IRQ)
return 0;
 
+   /* Register the IPI */
+   xive_request_ipi(cpu);
+
/* Grab an IPI from the backend, this will populate xc->hw_ipi */
if (xive_ops->get_ipi(cpu, xc))
return -EIO;
@@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct 
xive_cpu *xc)
if (xc->hw_ipi == XIVE_BAD_IRQ)
return;
 
+   /* TODO: clear IPI mapping */
+
/* Mask the IPI */
xive_do_source_set_mask(&xc->ipi_data, true);
 
@@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void)
smp_ops->cause_ipi = xive_cause_ipi;
 
/* Register the IPI */
-   xive_request_ipi();
+   xive_init_ipis();
 
/* Allocate and setup IPI for the boot CPU */
xive_setup_cpu_ipi(smp_processor_id());
-- 
2.31.1



[PATCH] powerpc: use strscpy to replace strlcpy

2021-08-07 Thread Jason Wang
The strlcpy should not be used because it doesn't limit the source
length. As linus says, it's a completely useless function if you
can't implicitly trust the source string - but that is almost always
why people think they should use it! All in all the BSD function
will lead some potential bugs.

But the strscpy doesn't require reading memory from the src string
beyond the specified "count" bytes, and since the return value is
easier to error-check than strlcpy()'s. In addition, the implementation
is robust to the string changing out from underneath it, unlike the
current strlcpy() implementation.

Thus, We prefer using strscpy instead of strlcpy.

Signed-off-by: Jason Wang 
---
 arch/powerpc/platforms/powermac/bootx_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/bootx_init.c 
b/arch/powerpc/platforms/powermac/bootx_init.c
index d20ef35e6d9d..741aa5b89e55 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -243,7 +243,7 @@ static void __init bootx_scan_dt_build_strings(unsigned 
long base,
DBG(" detected display ! adding properties names !\n");
bootx_dt_add_string("linux,boot-display", mem_end);
bootx_dt_add_string("linux,opened", mem_end);
-   strlcpy(bootx_disp_path, namep, sizeof(bootx_disp_path));
+   strscpy(bootx_disp_path, namep, sizeof(bootx_disp_path));
}
 
/* get and store all property names */
-- 
2.32.0



Re: [PATCH v2 0/6] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-08-07 Thread Uwe Kleine-König
On Fri, Aug 06, 2021 at 04:24:52PM -0500, Bjorn Helgaas wrote:
> On Fri, Aug 06, 2021 at 08:46:23AM +0200, Uwe Kleine-König wrote:
> > On Thu, Aug 05, 2021 at 06:42:34PM -0500, Bjorn Helgaas wrote:
> 
> > > I looked at all the bus_type.probe() methods, it looks like pci_dev is
> > > not the only offender here.  At least the following also have a driver
> > > pointer in the device struct:
> > > 
> > >   parisc_device.driver
> > >   acpi_device.driver
> > >   dio_dev.driver
> > >   hid_device.driver
> > >   pci_dev.driver
> > >   pnp_dev.driver
> > >   rio_dev.driver
> > >   zorro_dev.driver
> > 
> > Right, when I converted zorro_dev it was pointed out that the code was
> > copied from pci and the latter has the same construct. :-)
> > See
> > https://lore.kernel.org/r/20210730191035.1455248-5-u.kleine-koe...@pengutronix.de
> > for the patch, I don't find where pci was pointed out, maybe it was on
> > irc only.
> 
> Oh, thanks!  I looked to see if you'd done something similar
> elsewhere, but I missed this one.
> 
> > > Looking through the places that care about pci_dev.driver (the ones
> > > updated by patch 5/6), many of them are ... a little dubious to begin
> > > with.  A few need the "struct pci_error_handlers *err_handler"
> > > pointer, so that's probably legitimate.  But many just need a name,
> > > and should probably be using dev_driver_string() instead.
> > 
> > Yeah, I considered adding a function to get the driver name from a
> > pci_dev and a function to get the error handlers. Maybe it's an idea to
> > introduce these two and then use to_pci_driver(pdev->dev.driver) for the
> > few remaining users? Maybe doing that on top of my current series makes
> > sense to have a clean switch from pdev->driver to pdev->dev.driver?!
> 
> I'd propose using dev_driver_string() for these places:
> 
>   eeh_driver_name() (could change callers to use dev_driver_string())
>   bcma_host_pci_probe()
>   qm_alloc_uacce()
>   hns3_get_drvinfo()
>   prestera_pci_probe()
>   mlxsw_pci_probe()
>   nfp_get_drvinfo()
>   ssb_pcihost_probe()

So the idea is:

PCI: Simplify pci_device_remove()
PCI: Drop useless check from pci_device_probe()
xen/pci: Drop some checks that are always true

are kept as is as preparation. (Do you want to take them from this v2,
or should I include them again in v3?)

Then convert the list of functions above to use dev_driver_string() in a
4th patch.

> The use in mpt_device_driver_register() looks unnecessary: it's only
> to get a struct pci_device_id *, which is passed to ->probe()
> functions that don't need it.

This is patch #5.

> The use in adf_enable_aer() looks wrong: it sets the err_handler
> pointer in one of the adf_driver structs.  I think those structs
> should be basically immutable, and the drivers that call
> adf_enable_aer() from their .probe() methods should set
> ".err_handler = &adf_err_handler" in their static adf_driver
> definitions instead.

I don't understand that one without some research, probably this yields
at least one patch.

> I think that basically leaves these:
> 
>   uncore_pci_probe() # .id_table, custom driver "registration"
>   match_id() # .id_table, arch/x86/kernel/probe_roms.c
>   xhci_pci_quirks()  # .id_table
>   pci_error_handlers()   # roll-your-own AER handling, 
> drivers/misc/cxl/guest.c
> 
> I think it would be fine to use to_pci_driver(pdev->dev.driver) for
> these few.

Converting these will be patch 7 then and patch 8 can then drop the
duplicated handling.

Sounds reasonable?

Best regards
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | https://www.pengutronix.de/ |


signature.asc
Description: PGP signature


Re: Debian SID kernel doesn't boot on PowerBook 3400c

2021-08-07 Thread Christophe Leroy




Le 07/08/2021 à 15:09, Stan Johnson a écrit :

On 8/6/21 10:08 PM, Finn Thain wrote:


On Fri, 6 Aug 2021, Stan Johnson wrote:


$ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
CONFIG_PPC_KUAP=y
CONFIG_PPC_KUAP_DEBUG=y
CONFIG_VMAP_STACK=y
$ strings vmlinux | fgrep "Linux version"
Linux version 5.13.0-pmac-4-g63e3756d1bd ...
$ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-1

1) PB 3400c
vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
Boots, no errors logging in at (text) fb console. Logging in via ssh and
running "ls -Rail /usr/include" generated errors (and a hung ssh
session). Once errors started, they repeated for almost every command.
See pb3400c-63e3756d1bdf-1.txt.

2) Wallstreet
vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
X login failed, there were errors ("Oops: Kernel access of bad area",
"Oops: Exception in kernel mode"). Logging in via SSH, there were no
additional errors after running "ls -Rail /usr/include" -- the errors
did not escalate as they did on the PB 3400.
See Wallstreet-63e3756d1bdf-1.txt.


...

$ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
CONFIG_PPC_KUAP=y
CONFIG_PPC_KUAP_DEBUG=y
# CONFIG_VMAP_STACK is not set
$ strings vmlinux | fgrep "Linux version"
Linux version 5.13.0-pmac-4-g63e3756d1bd ...
$ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-2

3) PB 3400c
vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
Filesystem was corrupt from the previous test (probably from all the
errors during shutdown). After fixing the filesystem:
Boots, no errors logging in at (text) fb console. Logging in via ssh and
running "ls -Rail /usr/include" generated a few errors. There didn't
seem to be as many errors as in the previous test, there were a few
errors during shutdown but the shutdown was otherwise normal.
See pb3400c-63e3756d1bdf-2.txt.

4) Wallstreet
vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
X login worked, and there were no errors. There were no errors during
ssh access.
See Wallstreet-63e3756d1bdf-2.txt.



Thanks for collecting these results, Stan. Do you think that the
successful result from test 4) could have been just chance?


No. I repeated Test 4 above two more times on the Wallstreet. After
stomping on it as hard as I could, I didn't see any errors. I ran the
following tests simultaneously, with no errors:

a) Ping flood the Wallstreet
862132 packets transmitted, 862117 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 0.316/0.418/12.163/0.143 ms

b) "ls -Rail /usr" in an ssh window.

c) "find /usr/include -type f -exec sha1sum {} \;" in a second ssh window.

d) With a, b and c running, I logged in at the X console (slow but it
worked). Load average was 7.0 as reported by uptime.

So the success seems to be repeatable (or at least the errors are so
unlikely to happen that I'm not seeing anything).



It appears that the bug affecting the Powerbook 3400 is unaffected by
CONFIG_VMAP_STACK.

Whereas the bug affecting the Powerbook G3 disappears when
CONFIG_VMAP_STACK is disabled (assuming the result from 4 is reliable).

Either way, these results reiterate that "Oops: Kernel access of bad area,
sig: 11" was not entirely resolved by "powerpc/32s: Fix napping restore in
data storage interrupt (DSI)".



That sounds right. Thanks for investigating this.




Thanks a lot for your patience and for the tests.

I'm still having hard time understanding what the problem is.

Could you try the new change I pushed into the git repo ? It shouldn't have any effect, but I prefer 
to eliminate all possibilities. The documentation says that SRR1 upper bit are 0 on DSI and the code 
relies on that. But if the doc is wrong then that can explain the problem. So now I'm forcing it to 
0 regardless.


To get the change, you just have to do 'git pull -r' inside the directory where you checked out the 
sources and build.


Thanks again
Christophe


Re: [PATCH v4 1/2] tty: hvc: pass DMA capable memory to put_chars()

2021-08-07 Thread Xianting Tian



在 2021/8/6 下午10:51, Arnd Bergmann 写道:

On Fri, Aug 6, 2021 at 5:01 AM Xianting Tian
 wrote:

@@ -163,6 +155,13 @@ static void hvc_console_print(struct console *co, const 
char *b,
 if (vtermnos[index] == -1)
 return;

+   list_for_each_entry(hp, &hvc_structs, next)
+   if (hp->vtermno == vtermnos[index])
+   break;
+
+   c = hp->c;
+
+   spin_lock_irqsave(&hp->c_lock, flags);

The loop looks like it might race against changes to the list. It seems strange
that the print function has to actually search for the structure here.

It may be better to have yet another array for the buffer pointers next to
the cons_ops[] and vtermnos[] arrays.

I will make the change in v5, thanks.



+/*
+ * These sizes are most efficient for vio, because they are the
+ * native transfer size. We could make them selectable in the
+ * future to better deal with backends that want other buffer sizes.
+ */
+#define N_OUTBUF   16
+#define N_INBUF16
+
+#define __ALIGNED__ __attribute__((__aligned__(sizeof(long

I think you need a higher alignment for DMA buffers, instead of sizeof(long),
I would suggest ARCH_DMA_MINALIGN.


thanks, I will fix it in v5:

#define __ALIGNED__ __attribute__((__aligned__(ARCH_DMA_MINALIGN)))



Arnd


Re: Debian SID kernel doesn't boot on PowerBook 3400c

2021-08-07 Thread Christophe Leroy




Le 07/08/2021 à 18:26, Stan Johnson a écrit :

On 8/7/21 8:35 AM, Christophe Leroy wrote:



Le 07/08/2021 à 15:09, Stan Johnson a écrit :

On 8/6/21 10:08 PM, Finn Thain wrote:


On Fri, 6 Aug 2021, Stan Johnson wrote:


$ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
CONFIG_PPC_KUAP=y
CONFIG_PPC_KUAP_DEBUG=y
CONFIG_VMAP_STACK=y
$ strings vmlinux | fgrep "Linux version"
Linux version 5.13.0-pmac-4-g63e3756d1bd ...
$ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-1

1) PB 3400c
vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
Boots, no errors logging in at (text) fb console. Logging in via ssh
and
running "ls -Rail /usr/include" generated errors (and a hung ssh
session). Once errors started, they repeated for almost every command.
See pb3400c-63e3756d1bdf-1.txt.

2) Wallstreet
vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
X login failed, there were errors ("Oops: Kernel access of bad area",
"Oops: Exception in kernel mode"). Logging in via SSH, there were no
additional errors after running "ls -Rail /usr/include" -- the errors
did not escalate as they did on the PB 3400.
See Wallstreet-63e3756d1bdf-1.txt.


...

$ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
CONFIG_PPC_KUAP=y
CONFIG_PPC_KUAP_DEBUG=y
# CONFIG_VMAP_STACK is not set
$ strings vmlinux | fgrep "Linux version"
Linux version 5.13.0-pmac-4-g63e3756d1bd ...
$ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-2

3) PB 3400c
vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
Filesystem was corrupt from the previous test (probably from all the
errors during shutdown). After fixing the filesystem:
Boots, no errors logging in at (text) fb console. Logging in via ssh
and
running "ls -Rail /usr/include" generated a few errors. There didn't
seem to be as many errors as in the previous test, there were a few
errors during shutdown but the shutdown was otherwise normal.
See pb3400c-63e3756d1bdf-2.txt.

4) Wallstreet
vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
X login worked, and there were no errors. There were no errors during
ssh access.
See Wallstreet-63e3756d1bdf-2.txt.



Thanks for collecting these results, Stan. Do you think that the
successful result from test 4) could have been just chance?


No. I repeated Test 4 above two more times on the Wallstreet. After
stomping on it as hard as I could, I didn't see any errors. I ran the
following tests simultaneously, with no errors:

a) Ping flood the Wallstreet
862132 packets transmitted, 862117 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 0.316/0.418/12.163/0.143 ms

b) "ls -Rail /usr" in an ssh window.

c) "find /usr/include -type f -exec sha1sum {} \;" in a second ssh
window.

d) With a, b and c running, I logged in at the X console (slow but it
worked). Load average was 7.0 as reported by uptime.

So the success seems to be repeatable (or at least the errors are so
unlikely to happen that I'm not seeing anything).



It appears that the bug affecting the Powerbook 3400 is unaffected by
CONFIG_VMAP_STACK.

Whereas the bug affecting the Powerbook G3 disappears when
CONFIG_VMAP_STACK is disabled (assuming the result from 4 is reliable).

Either way, these results reiterate that "Oops: Kernel access of bad
area,
sig: 11" was not entirely resolved by "powerpc/32s: Fix napping
restore in
data storage interrupt (DSI)".



That sounds right. Thanks for investigating this.




Thanks a lot for your patience and for the tests.

I'm still having hard time understanding what the problem is.

Could you try the new change I pushed into the git repo ? It shouldn't
have any effect, but I prefer to eliminate all possibilities. The
documentation says that SRR1 upper bit are 0 on DSI and the code relies
on that. But if the doc is wrong then that can explain the problem. So
now I'm forcing it to 0 regardless.

To get the change, you just have to do 'git pull -r' inside the
directory where you checked out the sources and build.

Thanks again
Christophe



Thanks, Christophe.

In the same directory as previous builds:

$ git checkout chleroy-linux/bugtest
HEAD is now at 63e3756d1bdf powerpc/interrupts: Also perform KUAP/KUEP
lock and usertime accounting on NMI
$ git pull -r
You are not currently on a branch.
Please specify which branch you want to rebase against.
...
$ git pull -r chleroy-linux
remote: Enumerating objects: 6, done.
remote: Counting objects: 100% (6/6), done.
remote: Compressing objects: 100% (6/6), done.
remote: Total 6 (delta 0), reused 6 (delta 0), pack-reused 0
Unpacking objects: 100% (6/6), done.
 From https://github.com/chleroy/linux
63e3756d1bdf..9023760b1361  bugtest-> chleroy-linux/bugtest
Updating 63e3756d1bdf..9023760b1361
Fast-forward
  arch/powerpc/kernel/head_book3s_32.S | 1 +
  1 file changed, 1 insertion(+)
HEAD is up to date.

Hopefully I did that right and ended up at the right spot.

For tests 5 and 6:

$ cp ../dot-config-powermac-5.13 .config
$ scripts/config -e CONFIG_PPC_KUAP -e CONFIG_PPC_KUAP_DEBUG -e
CONFIG_VMAP_STACK
$ make ARCH=powerpc CROSS_CO

Re: Debian SID kernel doesn't boot on PowerBook 3400c

2021-08-07 Thread Stan Johnson
On 8/6/21 10:08 PM, Finn Thain wrote:
> 
> On Fri, 6 Aug 2021, Stan Johnson wrote:
> 
>> $ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
>> CONFIG_PPC_KUAP=y
>> CONFIG_PPC_KUAP_DEBUG=y
>> CONFIG_VMAP_STACK=y
>> $ strings vmlinux | fgrep "Linux version"
>> Linux version 5.13.0-pmac-4-g63e3756d1bd ...
>> $ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
>>
>> 1) PB 3400c
>> vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
>> Boots, no errors logging in at (text) fb console. Logging in via ssh and
>> running "ls -Rail /usr/include" generated errors (and a hung ssh
>> session). Once errors started, they repeated for almost every command.
>> See pb3400c-63e3756d1bdf-1.txt.
>>
>> 2) Wallstreet
>> vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
>> X login failed, there were errors ("Oops: Kernel access of bad area",
>> "Oops: Exception in kernel mode"). Logging in via SSH, there were no
>> additional errors after running "ls -Rail /usr/include" -- the errors
>> did not escalate as they did on the PB 3400.
>> See Wallstreet-63e3756d1bdf-1.txt.
>>
> ...
>> $ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
>> CONFIG_PPC_KUAP=y
>> CONFIG_PPC_KUAP_DEBUG=y
>> # CONFIG_VMAP_STACK is not set
>> $ strings vmlinux | fgrep "Linux version"
>> Linux version 5.13.0-pmac-4-g63e3756d1bd ...
>> $ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
>>
>> 3) PB 3400c
>> vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
>> Filesystem was corrupt from the previous test (probably from all the
>> errors during shutdown). After fixing the filesystem:
>> Boots, no errors logging in at (text) fb console. Logging in via ssh and
>> running "ls -Rail /usr/include" generated a few errors. There didn't
>> seem to be as many errors as in the previous test, there were a few
>> errors during shutdown but the shutdown was otherwise normal.
>> See pb3400c-63e3756d1bdf-2.txt.
>>
>> 4) Wallstreet
>> vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
>> X login worked, and there were no errors. There were no errors during
>> ssh access.
>> See Wallstreet-63e3756d1bdf-2.txt.
>>
> 
> Thanks for collecting these results, Stan. Do you think that the 
> successful result from test 4) could have been just chance?

No. I repeated Test 4 above two more times on the Wallstreet. After
stomping on it as hard as I could, I didn't see any errors. I ran the
following tests simultaneously, with no errors:

a) Ping flood the Wallstreet
862132 packets transmitted, 862117 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 0.316/0.418/12.163/0.143 ms

b) "ls -Rail /usr" in an ssh window.

c) "find /usr/include -type f -exec sha1sum {} \;" in a second ssh window.

d) With a, b and c running, I logged in at the X console (slow but it
worked). Load average was 7.0 as reported by uptime.

So the success seems to be repeatable (or at least the errors are so
unlikely to happen that I'm not seeing anything).

> 
> It appears that the bug affecting the Powerbook 3400 is unaffected by 
> CONFIG_VMAP_STACK.
> 
> Whereas the bug affecting the Powerbook G3 disappears when 
> CONFIG_VMAP_STACK is disabled (assuming the result from 4 is reliable).
> 
> Either way, these results reiterate that "Oops: Kernel access of bad area, 
> sig: 11" was not entirely resolved by "powerpc/32s: Fix napping restore in 
> data storage interrupt (DSI)".
> 

That sounds right. Thanks for investigating this.


Re: Debian SID kernel doesn't boot on PowerBook 3400c

2021-08-07 Thread Stan Johnson
On 8/7/21 8:35 AM, Christophe Leroy wrote:
> 
> 
> Le 07/08/2021 à 15:09, Stan Johnson a écrit :
>> On 8/6/21 10:08 PM, Finn Thain wrote:
>>>
>>> On Fri, 6 Aug 2021, Stan Johnson wrote:
>>>
 $ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
 CONFIG_PPC_KUAP=y
 CONFIG_PPC_KUAP_DEBUG=y
 CONFIG_VMAP_STACK=y
 $ strings vmlinux | fgrep "Linux version"
 Linux version 5.13.0-pmac-4-g63e3756d1bd ...
 $ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-1

 1) PB 3400c
 vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
 Boots, no errors logging in at (text) fb console. Logging in via ssh
 and
 running "ls -Rail /usr/include" generated errors (and a hung ssh
 session). Once errors started, they repeated for almost every command.
 See pb3400c-63e3756d1bdf-1.txt.

 2) Wallstreet
 vmlinux-5.13.0-pmac-4-g63e3756d1bd-1
 X login failed, there were errors ("Oops: Kernel access of bad area",
 "Oops: Exception in kernel mode"). Logging in via SSH, there were no
 additional errors after running "ls -Rail /usr/include" -- the errors
 did not escalate as they did on the PB 3400.
 See Wallstreet-63e3756d1bdf-1.txt.

>>> ...
 $ egrep '(CONFIG_PPC_KUAP|CONFIG_VMAP_STACK)' .config
 CONFIG_PPC_KUAP=y
 CONFIG_PPC_KUAP_DEBUG=y
 # CONFIG_VMAP_STACK is not set
 $ strings vmlinux | fgrep "Linux version"
 Linux version 5.13.0-pmac-4-g63e3756d1bd ...
 $ cp vmlinux ../vmlinux-5.13.0-pmac-4-g63e3756d1bd-2

 3) PB 3400c
 vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
 Filesystem was corrupt from the previous test (probably from all the
 errors during shutdown). After fixing the filesystem:
 Boots, no errors logging in at (text) fb console. Logging in via ssh
 and
 running "ls -Rail /usr/include" generated a few errors. There didn't
 seem to be as many errors as in the previous test, there were a few
 errors during shutdown but the shutdown was otherwise normal.
 See pb3400c-63e3756d1bdf-2.txt.

 4) Wallstreet
 vmlinux-5.13.0-pmac-4-g63e3756d1bd-2
 X login worked, and there were no errors. There were no errors during
 ssh access.
 See Wallstreet-63e3756d1bdf-2.txt.

>>>
>>> Thanks for collecting these results, Stan. Do you think that the
>>> successful result from test 4) could have been just chance?
>>
>> No. I repeated Test 4 above two more times on the Wallstreet. After
>> stomping on it as hard as I could, I didn't see any errors. I ran the
>> following tests simultaneously, with no errors:
>>
>> a) Ping flood the Wallstreet
>> 862132 packets transmitted, 862117 packets received, 0.0% packet loss
>> round-trip min/avg/max/stddev = 0.316/0.418/12.163/0.143 ms
>>
>> b) "ls -Rail /usr" in an ssh window.
>>
>> c) "find /usr/include -type f -exec sha1sum {} \;" in a second ssh
>> window.
>>
>> d) With a, b and c running, I logged in at the X console (slow but it
>> worked). Load average was 7.0 as reported by uptime.
>>
>> So the success seems to be repeatable (or at least the errors are so
>> unlikely to happen that I'm not seeing anything).
>>
>>>
>>> It appears that the bug affecting the Powerbook 3400 is unaffected by
>>> CONFIG_VMAP_STACK.
>>>
>>> Whereas the bug affecting the Powerbook G3 disappears when
>>> CONFIG_VMAP_STACK is disabled (assuming the result from 4 is reliable).
>>>
>>> Either way, these results reiterate that "Oops: Kernel access of bad
>>> area,
>>> sig: 11" was not entirely resolved by "powerpc/32s: Fix napping
>>> restore in
>>> data storage interrupt (DSI)".
>>>
>>
>> That sounds right. Thanks for investigating this.
>>
> 
> 
> Thanks a lot for your patience and for the tests.
> 
> I'm still having hard time understanding what the problem is.
> 
> Could you try the new change I pushed into the git repo ? It shouldn't
> have any effect, but I prefer to eliminate all possibilities. The
> documentation says that SRR1 upper bit are 0 on DSI and the code relies
> on that. But if the doc is wrong then that can explain the problem. So
> now I'm forcing it to 0 regardless.
> 
> To get the change, you just have to do 'git pull -r' inside the
> directory where you checked out the sources and build.
> 
> Thanks again
> Christophe
> 

Thanks, Christophe.

In the same directory as previous builds:

$ git checkout chleroy-linux/bugtest
HEAD is now at 63e3756d1bdf powerpc/interrupts: Also perform KUAP/KUEP
lock and usertime accounting on NMI
$ git pull -r
You are not currently on a branch.
Please specify which branch you want to rebase against.
...
$ git pull -r chleroy-linux
remote: Enumerating objects: 6, done.
remote: Counting objects: 100% (6/6), done.
remote: Compressing objects: 100% (6/6), done.
remote: Total 6 (delta 0), reused 6 (delta 0), pack-reused 0
Unpacking objects: 100% (6/6), done.
>From https://github.com/chleroy/linux
   63e3756d1bdf..9023760b1361  bugtest-> chleroy-linux/bugtest
Updating 63e3756d1bdf..902

Re: [PATCH v1 26/55] KVM: PPC: Book3S HV: Change dec_expires to be relative to guest timebase

2021-08-07 Thread Michael Ellerman
Nicholas Piggin  writes:
> Change dec_expires to be relative to the guest timebase, and allow
> it to be moved into low level P9 guest entry functions, to improve
> SPR access scheduling.
>
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/include/asm/kvm_book3s.h   |  6 +++
>  arch/powerpc/include/asm/kvm_host.h |  2 +-
>  arch/powerpc/kvm/book3s_hv.c| 58 +
>  arch/powerpc/kvm/book3s_hv_nested.c |  3 ++
>  arch/powerpc/kvm/book3s_hv_p9_entry.c   | 10 -
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 14 --
>  6 files changed, 49 insertions(+), 44 deletions(-)

My p8 is hitting an oops running guests, and bisect points to this. Not
obvious how the change relates to the oops, but maybe you can see it.

cheers


[  716.042962][T16989] Kernel attempted to read user page (0) - exploit 
attempt? (uid: 0)
[  716.043020][T16989] BUG: Kernel NULL pointer dereference on read at 
0x
[  716.043028][T16989] Faulting instruction address: 0xc001e1a8
[  716.043037][T16989] Oops: Kernel access of bad area, sig: 11 [#1]
[  716.043043][T16989] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
[  716.043052][T16989] Modules linked in: xt_MASQUERADE xt_conntrack ipt_REJECT 
nf_reject_ipv4 xt_tcpudp iptable_mangle iptable_nat nf_nat nf_conntrack 
nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink ip6table_filter ip6_tables 
iptable_filter tun bridge stp llc fuse kvm_hv kvm binfmt_misc squashfs mlx4_ib 
ib_uverbs dm_multipath scsi_dh_rdac ib_core scsi_dh_alua mlx4_en sr_mod cdrom 
lpfc sg mlx4_core bnx2x crc_t10dif crct10dif_generic scsi_transport_fc mdio 
vmx_crypto gf128mul crct10dif_vpmsum crct10dif_common leds_powernv powernv_rng 
led_class crc32c_vpmsum rng_core powernv_op_panel sunrpc ip_tables x_tables 
autofs4
[  716.043128][T16989] CPU: 56 PID: 16989 Comm: qemu-system-ppc Not tainted 
5.14.0-rc4-02329-g9bdd37071243 #1
[  716.043137][T16989] NIP:  c001e1a8 LR: c001e154 CTR: 
c016ebb0
[  716.043144][T16989] REGS: c009f1a93480 TRAP: 0300   Not tainted  
(5.14.0-rc4-02329-g9bdd37071243)
[  716.043150][T16989] MSR:  92803033  
 CR: 42442444  XER: 2000
[  716.043167][T16989] CFAR: c000cd0c DAR:  DSISR: 
4000 IRQMASK: 3
[  716.043167][T16989] GPR00: c001eab8 c009f1a93720 
c2459f00 c009c0730270
[  716.043167][T16989] GPR04: 01f0  
22442448 c009c072ec80
[  716.043167][T16989] GPR08: 00c2 4400 
92803033 0001
[  716.043167][T16989] GPR12: 2200 c00ec600 
7fff955f4410 
[  716.043167][T16989] GPR16: 7fff9628 7fff955f0320 
7fff8ee8ebe0 7fff8e660028
[  716.043167][T16989] GPR20: c00803807400 c00858b243bc 
000a c2496eb8
[  716.043167][T16989] GPR24: c00801123650 c009c0730250 
c009c072ec80 02802000
[  716.043167][T16989] GPR28: 0080 02802000 
0080 c009f1a93e80
[  716.043236][T16989] NIP [c001e1a8] restore_math+0x208/0x310
[  716.043247][T16989] LR [c001e154] restore_math+0x1b4/0x310
[  716.043254][T16989] Call Trace:
[  716.043257][T16989] [c009f1a93720] [22442448] 0x22442448 
(unreliable)
[  716.043267][T16989] [c009f1a93780] [c001eab8] 
__switch_to+0x228/0x2f0
[  716.043274][T16989] [c009f1a937e0] [c0f7949c] 
__schedule+0x40c/0xf10
[  716.043283][T16989] [c009f1a938b0] [c0f7a034] schedule+0x94/0x170
[  716.043291][T16989] [c009f1a938e0] [c0080b8e4474] 
kvmppc_wait_for_exec+0xdc/0xf8 [kvm_hv]
[  716.043307][T16989] [c009f1a93960] [c0080b8eeb18] 
kvmppc_vcpu_run_hv+0x900/0x10f0 [kvm_hv]
[  716.043319][T16989] [c009f1a93a10] [c0080b76355c] 
kvmppc_vcpu_run+0x34/0x48 [kvm]
[  716.043340][T16989] [c009f1a93a30] [c0080b75f188] 
kvm_arch_vcpu_ioctl_run+0x340/0x450 [kvm]
[  716.043359][T16989] [c009f1a93ac0] [c0080b74d470] 
kvm_vcpu_ioctl+0x328/0x8f8 [kvm]
[  716.043378][T16989] [c009f1a93ca0] [c04fe9d4] 
sys_ioctl+0x6b4/0x13b0
[  716.043386][T16989] [c009f1a93db0] [c002f918] 
system_call_exception+0x168/0x290
[  716.043394][T16989] [c009f1a93e10] [c000c864] 
system_call_common+0xf4/0x258
[  716.043402][T16989] --- interrupt: c00 at 0x7fff954af010
[  716.043407][T16989] NIP:  7fff954af010 LR: 000116243430 CTR: 

[  716.043413][T16989] REGS: c009f1a93e80 TRAP: 0c00   Not tainted  
(5.14.0-rc4-02329-g9bdd37071243)
[  716.043419][T16989] MSR:  9000d033   CR: 
2242  XER: 
[  716.043434][T16989] IRQMASK: 0
[  716.043434][T16989] GPR00: 0036 7fff8ee8dc30 
7fff955a7100 000f
[  716.043434][T16989] GPR04: 2000ae80  
04fb 
[  716.043434][T16989] GPR08: 000f