[Bisected] [commit 2ad56efa80dba89162106c06ebc00b611325e584] [linux-next] WARNING while booting to kernel 6.6.0-rc3-next-20230929

2023-10-04 Thread Tasmiya Nalatwad

Greetings,

[linux-next] [6.6.0-rc3-next-20230929] [git bisect -> 
2ad56efa80dba89162106c06ebc00b611325e584]WARNING: CPU: 0 PID: 8 at 
arch/powerpc/kernel/iommu.c:407 __iommu_free+0x1e4/0x1f0


--- Traces ---

[   62.578338] WARNING: CPU: 0 PID: 8 at arch/powerpc/kernel/iommu.c:407 
__iommu_free+0x1e4/0x1f0
[   62.578345] Modules linked in: sd_mod t10_pi crc64_rocksoft crc64 sg 
ibmvfc mlx5_core(+) scsi_transport_fc ibmveth mlxfw psample dm_multipath 
dm_mirror dm_region_hash dm_log dm_mod fuse
[   62.578362] CPU: 0 PID: 8 Comm: kworker/0:0 Not tainted 
6.6.0-rc3-next-20230929-auto #1
[   62.578366] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries

[   62.578369] Workqueue: events work_for_cpu_fn
[   62.578374] NIP:  c005f6d4 LR: c005f6d0 CTR: 
005ca81c
[   62.578377] REGS: c3a27890 TRAP: 0700   Not tainted 
(6.6.0-rc3-next-20230929-auto)
[   62.578381] MSR:  8282b033   
CR: 48000824  XER: 0008

[   62.578391] CFAR: c020f738 IRQMASK: 0
[   62.578391] GPR00: c005f6d0 c3a27b30 c1481800 
0017
[   62.578391] GPR04: 7fff c3a27950 c3a27948 
0027
[   62.578391] GPR08: c00c18c07c10 0001 0027 
c2ac8a08
[   62.578391] GPR12:  c2ff c019cc88 
c3042300
[   62.578391] GPR16:    
c3071ab0
[   62.578391] GPR20: c349f80d c3215440 c3215480 
61c8864680b583eb
[   62.578391] GPR24:  7fff 08002000 
0010
[   62.578391] GPR28: 0002 8002 cc5dc800 
cc5dc880

[   62.578428] NIP [c005f6d4] __iommu_free+0x1e4/0x1f0
[   62.578432] LR [c005f6d0] __iommu_free+0x1e0/0x1f0
[   62.578435] Call Trace:
[   62.578437] [c3a27b30] [c005f6d0] 
__iommu_free+0x1e0/0x1f0 (unreliable)

[   62.578442] [c3a27bc0] [c005f848] iommu_free+0x28/0x70
[   62.578446] [c3a27bf0] [c0061518] 
iommu_free_coherent+0x68/0xa0
[   62.578450] [c3a27c20] [c005e8d4] 
dma_iommu_free_coherent+0x24/0x40
[   62.578455] [c3a27c40] [c024698c] 
dma_free_attrs+0x10c/0x140
[   62.578459] [c3a27c90] [c00800dcb8d4] 
mlx5_cmd_cleanup+0x5c/0x90 [mlx5_core]
[   62.578512] [c3a27cc0] [c00800dc45a0] 
mlx5_mdev_uninit+0xc8/0x100 [mlx5_core]
[   62.578558] [c3a27d00] [c00800dc4ac4] 
probe_one+0x3ec/0x530 [mlx5_core]
[   62.578602] [c3a27d90] [c08c5edc] 
local_pci_probe+0x6c/0x110
[   62.578607] [c3a27e10] [c0189c98] 
work_for_cpu_fn+0x38/0x60
[   62.578612] [c3a27e40] [c018d1d0] 
process_scheduled_works+0x230/0x4f0
[   62.578616] [c3a27f10] [c018ff14] 
worker_thread+0x1e4/0x500

[   62.578619] [c3a27f90] [c019cdb8] kthread+0x138/0x140
[   62.578624] [c3a27fe0] [c000df98] 
start_kernel_thread+0x14/0x18
[   62.578628] Code: 481b004d 6000 e89e0028 3c62ffe0 3863dd20 
481b0039 6000 e89e0038 3c62ffe0 3863dd38 481b0025 6000 
<0fe0> 4b20 6000 3c4c0142

[   62.578640] ---[ end trace  ]---
[   62.578646] iommu_free: invalid entry
[   62.578649]     entry = 0x800203d0
[   62.578650]     dma_addr  = 0x800203d
[   62.578652]     Table = 0xcc5dc800
[   62.578654]     bus#  = 0x1
[   62.578655]     size  = 0x2
[   62.578657]     startOff  = 0x8000
[   62.578658]     index = 0x70200016
[   62.578660] [ cut here ]

gitbisect is pointing to the below commit

commit 2ad56efa80dba89162106c06ebc00b611325e584
Author: Jason Gunthorpe 
Date:   Wed Sep 13 10:43:36 2023 -0300

    powerpc/iommu: Setup a default domain and remove set_platform_dma_ops

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


[Bisected] [commit 2ad56efa80dba89162106c06ebc00b611325e584] [linux-next] WARNING while booting to kernel 6.6.0-rc3-next-20230929

2023-10-04 Thread Tasmiya Nalatwad

Greetings,

[linux-next] [6.6.0-rc3-next-20230929] [git bisect -> 
2ad56efa80dba89162106c06ebc00b611325e584]WARNING: CPU: 0 PID: 8 at 
arch/powerpc/kernel/iommu.c:407 __iommu_free+0x1e4/0x1f0


--- Traces ---

[   62.578338] WARNING: CPU: 0 PID: 8 at arch/powerpc/kernel/iommu.c:407 
__iommu_free+0x1e4/0x1f0
[   62.578345] Modules linked in: sd_mod t10_pi crc64_rocksoft crc64 sg 
ibmvfc mlx5_core(+) scsi_transport_fc ibmveth mlxfw psample dm_multipath 
dm_mirror dm_region_hash dm_log dm_mod fuse
[   62.578362] CPU: 0 PID: 8 Comm: kworker/0:0 Not tainted 
6.6.0-rc3-next-20230929-auto #1
[   62.578366] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries

[   62.578369] Workqueue: events work_for_cpu_fn
[   62.578374] NIP:  c005f6d4 LR: c005f6d0 CTR: 
005ca81c
[   62.578377] REGS: c3a27890 TRAP: 0700   Not tainted 
(6.6.0-rc3-next-20230929-auto)
[   62.578381] MSR:  8282b033   
CR: 48000824  XER: 0008

[   62.578391] CFAR: c020f738 IRQMASK: 0
[   62.578391] GPR00: c005f6d0 c3a27b30 c1481800 
0017
[   62.578391] GPR04: 7fff c3a27950 c3a27948 
0027
[   62.578391] GPR08: c00c18c07c10 0001 0027 
c2ac8a08
[   62.578391] GPR12:  c2ff c019cc88 
c3042300
[   62.578391] GPR16:    
c3071ab0
[   62.578391] GPR20: c349f80d c3215440 c3215480 
61c8864680b583eb
[   62.578391] GPR24:  7fff 08002000 
0010
[   62.578391] GPR28: 0002 8002 cc5dc800 
cc5dc880

[   62.578428] NIP [c005f6d4] __iommu_free+0x1e4/0x1f0
[   62.578432] LR [c005f6d0] __iommu_free+0x1e0/0x1f0
[   62.578435] Call Trace:
[   62.578437] [c3a27b30] [c005f6d0] 
__iommu_free+0x1e0/0x1f0 (unreliable)

[   62.578442] [c3a27bc0] [c005f848] iommu_free+0x28/0x70
[   62.578446] [c3a27bf0] [c0061518] 
iommu_free_coherent+0x68/0xa0
[   62.578450] [c3a27c20] [c005e8d4] 
dma_iommu_free_coherent+0x24/0x40
[   62.578455] [c3a27c40] [c024698c] 
dma_free_attrs+0x10c/0x140
[   62.578459] [c3a27c90] [c00800dcb8d4] 
mlx5_cmd_cleanup+0x5c/0x90 [mlx5_core]
[   62.578512] [c3a27cc0] [c00800dc45a0] 
mlx5_mdev_uninit+0xc8/0x100 [mlx5_core]
[   62.578558] [c3a27d00] [c00800dc4ac4] 
probe_one+0x3ec/0x530 [mlx5_core]
[   62.578602] [c3a27d90] [c08c5edc] 
local_pci_probe+0x6c/0x110
[   62.578607] [c3a27e10] [c0189c98] 
work_for_cpu_fn+0x38/0x60
[   62.578612] [c3a27e40] [c018d1d0] 
process_scheduled_works+0x230/0x4f0
[   62.578616] [c3a27f10] [c018ff14] 
worker_thread+0x1e4/0x500

[   62.578619] [c3a27f90] [c019cdb8] kthread+0x138/0x140
[   62.578624] [c3a27fe0] [c000df98] 
start_kernel_thread+0x14/0x18
[   62.578628] Code: 481b004d 6000 e89e0028 3c62ffe0 3863dd20 
481b0039 6000 e89e0038 3c62ffe0 3863dd38 481b0025 6000 
<0fe0> 4b20 6000 3c4c0142

[   62.578640] ---[ end trace  ]---
[   62.578646] iommu_free: invalid entry
[   62.578649]     entry = 0x800203d0
[   62.578650]     dma_addr  = 0x800203d
[   62.578652]     Table = 0xcc5dc800
[   62.578654]     bus#  = 0x1
[   62.578655]     size  = 0x2
[   62.578657]     startOff  = 0x8000
[   62.578658]     index = 0x70200016
[   62.578660] [ cut here ]

gitbisect is pointing to the below commit

commit 2ad56efa80dba89162106c06ebc00b611325e584
    powerpc/iommu: Setup a default domain and remove set_platform_dma_ops

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


[Bisected] [commit 2ad56efa80dba89162106c06ebc00b611325e584] [linux-next] WARNING while booting to kernel 6.6.0-rc3-next-20230929

2023-10-04 Thread Tasmiya Nalatwad

Greetings,

[linux-next] [6.6.0-rc3-next-20230929] [git bisect -> 
2ad56efa80dba89162106c06ebc00b611325e584]WARNING: CPU: 0 PID: 8 at 
arch/powerpc/kernel/iommu.c:407 __iommu_free+0x1e4/0x1f0


--- Traces ---

[   62.578338] WARNING: CPU: 0 PID: 8 at arch/powerpc/kernel/iommu.c:407 
__iommu_free+0x1e4/0x1f0
[   62.578345] Modules linked in: sd_mod t10_pi crc64_rocksoft crc64 sg 
ibmvfc mlx5_core(+) scsi_transport_fc ibmveth mlxfw psample dm_multipath 
dm_mirror dm_region_hash dm_log dm_mod fuse
[   62.578362] CPU: 0 PID: 8 Comm: kworker/0:0 Not tainted 
6.6.0-rc3-next-20230929-auto #1
[   62.578366] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries

[   62.578369] Workqueue: events work_for_cpu_fn
[   62.578374] NIP:  c005f6d4 LR: c005f6d0 CTR: 
005ca81c
[   62.578377] REGS: c3a27890 TRAP: 0700   Not tainted 
(6.6.0-rc3-next-20230929-auto)
[   62.578381] MSR:  8282b033   
CR: 48000824  XER: 0008

[   62.578391] CFAR: c020f738 IRQMASK: 0
[   62.578391] GPR00: c005f6d0 c3a27b30 c1481800 
0017
[   62.578391] GPR04: 7fff c3a27950 c3a27948 
0027
[   62.578391] GPR08: c00c18c07c10 0001 0027 
c2ac8a08
[   62.578391] GPR12:  c2ff c019cc88 
c3042300
[   62.578391] GPR16:    
c3071ab0
[   62.578391] GPR20: c349f80d c3215440 c3215480 
61c8864680b583eb
[   62.578391] GPR24:  7fff 08002000 
0010
[   62.578391] GPR28: 0002 8002 cc5dc800 
cc5dc880

[   62.578428] NIP [c005f6d4] __iommu_free+0x1e4/0x1f0
[   62.578432] LR [c005f6d0] __iommu_free+0x1e0/0x1f0
[   62.578435] Call Trace:
[   62.578437] [c3a27b30] [c005f6d0] 
__iommu_free+0x1e0/0x1f0 (unreliable)

[   62.578442] [c3a27bc0] [c005f848] iommu_free+0x28/0x70
[   62.578446] [c3a27bf0] [c0061518] 
iommu_free_coherent+0x68/0xa0
[   62.578450] [c3a27c20] [c005e8d4] 
dma_iommu_free_coherent+0x24/0x40
[   62.578455] [c3a27c40] [c024698c] 
dma_free_attrs+0x10c/0x140
[   62.578459] [c3a27c90] [c00800dcb8d4] 
mlx5_cmd_cleanup+0x5c/0x90 [mlx5_core]
[   62.578512] [c3a27cc0] [c00800dc45a0] 
mlx5_mdev_uninit+0xc8/0x100 [mlx5_core]
[   62.578558] [c3a27d00] [c00800dc4ac4] 
probe_one+0x3ec/0x530 [mlx5_core]
[   62.578602] [c3a27d90] [c08c5edc] 
local_pci_probe+0x6c/0x110
[   62.578607] [c3a27e10] [c0189c98] 
work_for_cpu_fn+0x38/0x60
[   62.578612] [c3a27e40] [c018d1d0] 
process_scheduled_works+0x230/0x4f0
[   62.578616] [c3a27f10] [c018ff14] 
worker_thread+0x1e4/0x500

[   62.578619] [c3a27f90] [c019cdb8] kthread+0x138/0x140
[   62.578624] [c3a27fe0] [c000df98] 
start_kernel_thread+0x14/0x18
[   62.578628] Code: 481b004d 6000 e89e0028 3c62ffe0 3863dd20 
481b0039 6000 e89e0038 3c62ffe0 3863dd38 481b0025 6000 
<0fe0> 4b20 6000 3c4c0142

[   62.578640] ---[ end trace  ]---
[   62.578646] iommu_free: invalid entry
[   62.578649]     entry = 0x800203d0
[   62.578650]     dma_addr  = 0x800203d
[   62.578652]     Table = 0xcc5dc800
[   62.578654]     bus#  = 0x1
[   62.578655]     size  = 0x2
[   62.578657]     startOff  = 0x8000
[   62.578658]     index = 0x70200016
[   62.578660] [ cut here ]

gitbisect is pointing to the below commit

commit 2ad56efa80dba89162106c06ebc00b611325e584
    powerpc/iommu: Setup a default domain and remove set_platform_dma_ops

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


[Bisected] [commit 2ad56efa80dba89162106c06ebc00b611325e584] [linux-next] WARNING while booting to kernel 6.6.0-rc3-next-20230929

2023-10-04 Thread Tasmiya Nalatwad

Greetings,

[linux-next] [6.6.0-rc3-next-20230929] [git bisect -> 
2ad56efa80dba89162106c06ebc00b611325e584]WARNING: CPU: 0 PID: 8 at 
arch/powerpc/kernel/iommu.c:407__iommu_free+0x1e4/0x1f0


--- Traces ---

[   62.578338] WARNING: CPU: 0 PID: 8 at 
arch/powerpc/kernel/iommu.c:407__iommu_free+0x1e4/0x1f0
[   62.578345] Modules linked in: sd_mod t10_pi crc64_rocksoft crc64 sg 
ibmvfc mlx5_core(+) scsi_transport_fc ibmveth mlxfw psample dm_multipath 
dm_mirror dm_region_hash dm_log dm_mod fuse
[   62.578362] CPU: 0 PID: 8 Comm: kworker/0:0 Not tainted 
6.6.0-rc3-next-20230929-auto #1
[   62.578366] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp Series

[   62.578369] Workqueue: events work_for_cpu_fn
[   62.578374] NIP:  c005f6d4 LR: c005f6d0 CTR: 
005ca81c
[   62.578377] REGS: c3a27890 TRAP: 0700   Not tainted 
(6.6.0-rc3-next-20230929-auto)
[   62.578381] MSR:  8282b033   
CR: 48000824  XER: 0008

[   62.578391] CFAR: c020f738 IRQMASK: 0
[   62.578391] GPR00: c005f6d0 c3a27b30 c1481800 
0017
[   62.578391] GPR04: 7fff c3a27950 c3a27948 
0027
[   62.578391] GPR08: c00c18c07c10 0001 0027 
c2ac8a08
[   62.578391] GPR12:  c2ff c019cc88 
c3042300
[   62.578391] GPR16:    
c3071ab0
[   62.578391] GPR20: c349f80d c3215440 c3215480 
61c8864680b583eb
[   62.578391] GPR24:  7fff 08002000 
0010
[   62.578391] GPR28: 0002 8002 cc5dc800 
cc5dc880

[   62.578428] NIP [c005f6d4] __iommu_free+0x1e4/0x1f0
[   62.578432] LR [c005f6d0] __iommu_free+0x1e0/0x1f0
[   62.578435] Call Trace:
[   62.578437] [c3a27b30] [c005f6d0] 
__iommu_free+0x1e0/0x1f0 (unreliable)

[   62.578442] [c3a27bc0] [c005f848] iommu_free+0x28/0x70
[   62.578446] [c3a27bf0] [c0061518] 
iommu_free_coherent+0x68/0xa0
[   62.578450] [c3a27c20] [c005e8d4] 
dma_iommu_free_coherent+0x24/0x40
[   62.578455] [c3a27c40] [c024698c] 
dma_free_attrs+0x10c/0x140
[   62.578459] [c3a27c90] [c00800dcb8d4] 
mlx5_cmd_cleanup+0x5c/0x90 [mlx5_core]
[   62.578512] [c3a27cc0] [c00800dc45a0] 
mlx5_mdev_uninit+0xc8/0x100 [mlx5_core]
[   62.578558] [c3a27d00] [c00800dc4ac4] 
probe_one+0x3ec/0x530 [mlx5_core]
[   62.578602] [c3a27d90] [c08c5edc] 
local_pci_probe+0x6c/0x110
[   62.578607] [c3a27e10] [c0189c98] 
work_for_cpu_fn+0x38/0x60
[   62.578612] [c3a27e40] [c018d1d0] 
process_scheduled_works+0x230/0x4f0
[   62.578616] [c3a27f10] [c018ff14] 
worker_thread+0x1e4/0x500

[   62.578619] [c3a27f90] [c019cdb8] kthread+0x138/0x140
[   62.578624] [c3a27fe0] [c000df98] 
start_kernel_thread+0x14/0x18
[   62.578628] Code: 481b004d 6000 e89e0028 3c62ffe0 3863dd20 
481b0039 6000 e89e0038 3c62ffe0 3863dd38 481b0025 6000 
<0fe0> 4b20 6000 3c4c0142

[   62.578640] ---[ end trace  ]---
[   62.578646] iommu_free: invalid entry
[   62.578649]     entry = 0x800203d0
[   62.578650]     dma_addr  = 0x800203d
[   62.578652]     Table = 0xcc5dc800
[   62.578654]     bus#  = 0x1
[   62.578655]     size  = 0x2
[   62.578657]     startOff  = 0x8000
[   62.578658]     index = 0x70200016
[   62.578660] [ cut here ]

gitbisect is pointing to the below commit

commit 2ad56efa80dba89162106c06ebc00b611325e584
    powerpc/iommu: Setup a default domain and remove set_platform_dma_ops

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


Re: [Bisected] [commit 2ad56efa80dba89162106c06ebc00b611325e584] [linux-next] WARNING while booting to kernel 6.6.0-rc3-next-20230929

2023-10-04 Thread Tasmiya Nalatwad

Thanks Jason. Yes the suggested changes works and Warnings are not seen.

On 10/4/23 17:08, Jason Gunthorpe wrote:

On Wed, Oct 04, 2023 at 04:37:10PM +0530, Tasmiya Nalatwad wrote:

Greetings,

[linux-next] [6.6.0-rc3-next-20230929] [git bisect ->
2ad56efa80dba89162106c06ebc00b611325e584]WARNING: CPU: 0 PID: 8 at
arch/powerpc/kernel/[1]iommu.c:407 __iommu_free+0x1e4/0x1f0
gitbisect is pointing to the below commit
commit 2ad56efa80dba89162106c06ebc00b611325e584
powerpc/iommu: Setup a default domain and remove set_platform_dma_ops

I assume this means there are still sequencing problems with power at
boot time. eg we turned on the dma ops in the wrong order or something
like that

As far as I can see the only difference here is that we do the
operation to claim dma ops during the iommu drive probe. We can avoid that.

Does this work for you?

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index d6ad3fde85a212..115b9031badac7 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1280,13 +1280,19 @@ struct iommu_table_group_ops spapr_tce_table_group_ops 
= {
  /*
   * A simple iommu_ops to allow less cruft in generic VFIO code.
   */
-static int spapr_tce_platform_iommu_attach_dev(struct iommu_domain *dom,
-  struct device *dev)
+static int
+spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
+   struct device *dev)
  {
+   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_group *grp = iommu_group_get(dev);
struct iommu_table_group *table_group;
int ret = -EINVAL;

+   /* At first attach the ownership is already set */
+   if (!domain)
+   return 0;
+
if (!grp)
return -ENODEV;



--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



[Bisected] [commit 2ad56efa80db] [Hotplug] WARNING while performing hotplug operation on 6.6-rc3-next

2023-10-06 Thread Tasmiya Nalatwad
 [7fff93637eb0] 0x7fff93637eb0
[ 6296.426351] LR [7fff93637e94] 0x7fff93637e94
[ 6296.426355] --- interrupt: c00
[ 6296.426358] Code: ebc1fff0 ebe1fff8 7c0803a6 4e800020 38a1 
7fc4f378 7f83e378 4bffe83d 7c690034 7c7a1b78 5529d97e 69290001 
<0b09> 2fa3 419efe4c 4b38

[ 6296.426378] ---[ end trace  ]---
[ 6296.427348] pci 0013:60:00.1: No hypervisor support for SR-IOV on 
this device, IOV BARs disabled.

[ 6296.430049] pci 0013:60:00.1: PME# supported from D0 D3hot D3cold
[ 6296.433403] pci 0013:60:00.1: Adding to iommu group 0
[ 6296.433408] iommu_tce: it_map is not empty
[ 6296.433414] [ cut here ]


gitbisect is pointing to below commit

commit 2ad56efa80dba89162106c06ebc00b611325e584
    powerpc/iommu: Setup a default domain and remove set_platform_dma_ops

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [Bisected] [commit 2ad56efa80db] [Hotplug] WARNING while performing hotplug operation on 6.6-rc3-next

2023-10-06 Thread Tasmiya Nalatwad

Thanks Jason for confirmation and the fix.

On 10/6/23 19:09, Jason Gunthorpe wrote:

On Fri, Oct 06, 2023 at 06:50:00PM +0530, Tasmiya Nalatwad wrote:

Greetings,

Thanks Jason.

The fix provided by you works. It is not giving WARNING's but i am
seeing below logs. Would you please confirm on the logs.

I don't know anything about your environment but those logs don't
appear to be related to this series?

Jason


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [Bisected] [commit 2ad56efa80db] [Hotplug] WARNING while performing hotplug operation on 6.6-rc3-next

2023-10-07 Thread Tasmiya Nalatwad

Greetings,

Thanks Jason.

The fix provided by you works. It is not giving WARNING's but i am 
seeing below logs. Would you please confirm on the logs.


https://lore.kernel.org/all/0-v1-2b52423411b9+164fc-iommu_ppc_defdomain_...@nvidia.com/

[  152.342125] pci 0013:60:00.0: No hypervisor support for SR-IOV on 
this device, IOV BARs disabled.

[  152.344896] pci 0013:60:00.0: PME# supported from D0 D3hot D3cold
[  152.348936] pci 0013:60:00.0: Adding to iommu group 0
[  152.349944] pci 0013:60:00.1: No hypervisor support for SR-IOV on 
this device, IOV BARs disabled.

[  152.352701] pci 0013:60:00.1: PME# supported from D0 D3hot D3cold
[  152.356520] pci 0013:60:00.1: Adding to iommu group 0
[  152.356527] iommu_tce: it_map is not empty
[  152.357501] pci 0013:60:00.2: No hypervisor support for SR-IOV on 
this device, IOV BARs disabled.

[  152.360247] pci 0013:60:00.2: PME# supported from D0 D3hot D3cold
[  152.364070] pci 0013:60:00.2: Adding to iommu group 0
[  152.364075] *iommu_tce: it_map is not empty*
[  152.365043] pci 0013:60:00.3: No hypervisor support for SR-IOV on 
this device, IOV BARs disabled.

[  152.367790] pci 0013:60:00.3: PME# supported from D0 D3hot D3cold
[  152.371635] pci 0013:60:00.3: Adding to iommu group 0
[  152.371641] iommu_tce: it_map is not empty
[  152.371720] pci 0013:60:00.0: of_irq_parse_pci: no interrupt-map 
found, INTx interrupts not available
[  152.408351] tg3 0013:60:00.0: iommu: 64-bit OK but direct DMA is 
limited by 820
[  152.408376] tg3 0013:60:00.0: iommu: 64-bit OK but direct DMA is 
limited by 820

*[  152.408404] tg3 0013:60:00.0: DMA engine test failed, aborting**
**[  152.408557] tg3: probe of 0013:60:00.0 failed with error -12*
[  152.408583] pci 0013:60:00.1: Adding to iommu group 0
[  152.408589] iommu_tce: it_map is not empty
[  152.408656] pci 0013:60:00.1: of_irq_parse_pci: no interrupt-map 
found, INTx interrupts not available
[  152.448321] tg3 0013:60:00.1: iommu: 64-bit OK but direct DMA is 
limited by 820
[  152.448339] tg3 0013:60:00.1: iommu: 64-bit OK but direct DMA is 
limited by 820

[  152.448360] tg3 0013:60:00.1: DMA engine test failed, aborting
[  152.448497] tg3: probe of 0013:60:00.1 failed with error -12
[  152.448519] pci 0013:60:00.2: Adding to iommu group 0
[  152.448525] iommu_tce: it_map is not empty
[  152.448586] pci 0013:60:00.2: of_irq_parse_pci: no interrupt-map 
found, INTx interrupts not available
[  152.488323] tg3 0013:60:00.2: iommu: 64-bit OK but direct DMA is 
limited by 820
[  152.488340] tg3 0013:60:00.2: iommu: 64-bit OK but direct DMA is 
limited by 820

[  152.488362] tg3 0013:60:00.2: DMA engine test failed, aborting
[  152.488495] tg3: probe of 0013:60:00.2 failed with error -12
[  152.488516] pci 0013:60:00.3: Adding to iommu group 0
[  152.488523] iommu_tce: it_map is not empty
[  152.488581] pci 0013:60:00.3: of_irq_parse_pci: no interrupt-map 
found, INTx interrupts not available
[  152.528342] tg3 0013:60:00.3: iommu: 64-bit OK but direct DMA is 
limited by 820
[  152.528359] tg3 0013:60:00.3: iommu: 64-bit OK but direct DMA is 
limited by 820

[  152.528381] tg3 0013:60:00.3: DMA engine test failed, aborting
[  152.528515] tg3: probe of 0013:60:00.3 failed with error -12

On 10/6/23 17:06, Jason Gunthorpe wrote:

On Fri, Oct 06, 2023 at 01:20:17PM +0530, Tasmiya Nalatwad wrote:

Greetings,

[linux-next] [6.6.0-rc3-next-20230929] WARNING: CPU: 5 PID: 185612 at
drivers/iommu/iommu.c:3049  iommu_setup_default_domain+0x410/0x680

--- Traces ---

[ 6296.425934] WARNING: CPU: 5 PID: 185612 at drivers/iommu/iommu.c:3049
iommu_setup_default_domain+0x410/0x680

Does this fix it too? I think it should?

https://lore.kernel.org/r/0-v1-2b52423411b9+164fc-iommu_ppc_defdomain_...@nvidia.com

Jason


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


[Bisected] [1b4fa28a8b07] Build failure "net/core/gso_test.c"

2023-10-12 Thread Tasmiya Nalatwad

Greetings,

[net-next] [6.6-rc4] Build failure "net/core/gso_test.c"

--- Traces ---

make -j 33 -s && make modules_install && make install
net/core/gso_test.c:58:48: error: initializer element is not constant
   58 | .segs = (const unsigned int[]) { gso_size },
  |    ^
net/core/gso_test.c:58:48: note: (near initialization for ‘cases[0]’)
net/core/gso_test.c:65:48: error: initializer element is not constant
   65 | .segs = (const unsigned int[]) { gso_size, 
gso_size, 1 },

  |    ^
net/core/gso_test.c:65:48: note: (near initialization for ‘cases[1]’)
net/core/gso_test.c:72:49: error: initializer element is not constant
   72 | .frags = (const unsigned int[]) { gso_size, 1 },
  | ^
net/core/gso_test.c:72:49: note: (near initialization for ‘cases[2]’)
net/core/gso_test.c:74:48: error: initializer element is not constant
   74 | .segs = (const unsigned int[]) { gso_size, 
gso_size, 1 },

  |    ^
net/core/gso_test.c:74:48: note: (near initialization for ‘cases[2]’)
net/core/gso_test.c:80:49: error: initializer element is not constant
   80 | .frags = (const unsigned int[]) { gso_size, 
gso_size, 2 },

  | ^
net/core/gso_test.c:80:49: note: (near initialization for ‘cases[3]’)
net/core/gso_test.c:82:48: error: initializer element is not constant
   82 | .segs = (const unsigned int[]) { gso_size, 
gso_size, 2 },

  |    ^
net/core/gso_test.c:82:48: note: (near initialization for ‘cases[3]’)
net/core/gso_test.c:89:49: error: initializer element is not constant
   89 | .frags = (const unsigned int[]) { gso_size, 3 },
  | ^
net/core/gso_test.c:89:49: note: (near initialization for ‘cases[4]’)
net/core/gso_test.c:91:48: error: initializer element is not constant
   91 | .segs = (const unsigned int[]) { 2 * gso_size, 3 },
  |    ^
net/core/gso_test.c:91:48: note: (near initialization for ‘cases[4]’)
net/core/gso_test.c:99:53: error: initializer element is not constant
   99 | .frag_skbs = (const unsigned int[]) { gso_size, 
gso_size },

  | ^
net/core/gso_test.c:99:53: note: (near initialization for ‘cases[5]’)
net/core/gso_test.c:101:48: error: initializer element is not constant
  101 | .segs = (const unsigned int[]) { gso_size, 
gso_size, gso_size },

  |    ^
net/core/gso_test.c:101:48: note: (near initialization for ‘cases[5]’)
net/core/gso_test.c:107:53: error: initializer element is not constant
  107 | .frag_skbs = (const unsigned int[]) { gso_size, 
gso_size },

  | ^
net/core/gso_test.c:107:53: note: (near initialization for ‘cases[6]’)
net/core/gso_test.c:109:48: error: initializer element is not constant
  109 | .segs = (const unsigned int[]) { gso_size, 
gso_size },

  |    ^
net/core/gso_test.c:109:48: note: (near initialization for ‘cases[6]’)
net/core/gso_test.c:117:53: error: initializer element is not constant
  117 | .frag_skbs = (const unsigned int[]) { gso_size, 
1, gso_size, 2 },

  | ^
net/core/gso_test.c:117:53: note: (near initialization for ‘cases[7]’)
net/core/gso_test.c:119:48: error: initializer element is not constant
  119 | .segs = (const unsigned int[]) { gso_size, 
gso_size, gso_size, 3 },

  |    ^
net/core/gso_test.c:119:48: note: (near initialization for ‘cases[7]’)
make[4]: *** [scripts/Makefile.build:243: net/core/gso_test.o] Error 1
make[4]: *** Waiting for unfinished jobs
make[3]: *** [scripts/Makefile.build:480: net/core] Error 2
make[3]: *** Waiting for unfinished jobs

make[2]: *** [scripts/Makefile.build:480: net] Error 2
make[2]: *** Waiting for unfinished jobs
make[1]: *** [/root/net-next/Makefile:1913: .] Error 2
make: *** [Makefile:234: __sub-make] Error 2

gitbisect points to below commit, reverting the below commit resolves 
the issue


commit 1b4fa28a8b07eb331aeb7fbfc806c0d2e3dc3627
    net: parametrize skb_segment unit test to expand coverage

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [Bisected] [1b4fa28a8b07] Build failure "net/core/gso_test.c"

2023-10-12 Thread Tasmiya Nalatwad

Greetings,

Thank you Florian. I have tried the changes suggested by you and it 
fixes the issue. With the suggested changes the problem is not seen.


On 10/12/23 15:27, Florian Westphal wrote:

.linear_len = GSO_TEST_SIZE,


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



[Bisected] [efeda3bf912f] OOPS crash while performing Block device module parameter test [qla2xxx / FC]

2023-10-18 Thread Tasmiya Nalatwad

Greetings,

OOPs Kernel crash while performing Block device module parameter test 
[qla2xxx / FC] on linux-next 6.6.0-rc5-next-20231010


--- Traces ---

[30876.431678] Kernel attempted to read user page (30) - exploit 
attempt? (uid: 0)

[30876.431687] BUG: Kernel NULL pointer dereference on read at 0x0030
[30876.431692] Faulting instruction address: 0xc008018e3180
[30876.431697] Oops: Kernel access of bad area, sig: 11 [#1]
[30876.431700] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=8192 NUMA pSeries
[30876.431705] Modules linked in: qla2xxx(+) nvme_fc nvme_fabrics 
nvme_core dm_round_robin dm_queue_length exfat vfat fat btrfs 
blake2b_generic zstd_compress loop raid10 raid456 async_raid6_recov 
async_memcpy async_pq async_xor async_tx xor raid6_pq raid1 linear xfs 
libcrc32c raid0 nvram rpadlpar_io rpaphp xsk_diag bonding tls rfkill 
vmx_crypto pseries_rng binfmt_misc ext4 mbcache jbd2 dm_service_time 
sd_mod sg ibmvfc ibmveth t10_pi crc64_rocksoft crc64 scsi_transport_fc 
dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: 
nvme_core]
[30876.431767] CPU: 0 PID: 1289400 Comm: kworker/0:2 Kdump: loaded Not 
tainted 6.6.0-rc5-next-20231010-auto #1
[30876.431773] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries

[30876.431779] Workqueue: events work_for_cpu_fn
[30876.431788] NIP:  c008018e3180 LR: c008018e3128 CTR: 
c0513f80
[30876.431792] REGS: c00062a8b930 TRAP: 0300   Not tainted 
(6.6.0-rc5-next-20231010-auto)
[30876.431797] MSR:  8280b033   
CR: 28000482  XER: 2004000f
[30876.431811] CFAR: c008018e3138 DAR: 0030 DSISR: 
4000 IRQMASK: 0
[30876.431811] GPR00: c008018e3128 c00062a8bbd0 c00800eb8300 

[30876.431811] GPR04:    
0017bbac
[30876.431811] GPR08:  0030  
c008019a6d68
[30876.431811] GPR12:  c2ff c019cb98 
c00082a97980
[30876.431811] GPR16:    
c3071ab0
[30876.431811] GPR20: c3491c0d c00063bb9a00 c00063bb30c0 
c001d8b52928
[30876.431811] GPR24: c00800eb63a8 ffed c001d8b52000 
0102
[30876.431811] GPR28: c00800ebaf00 c001d8b52890  
c001d8b58000

[30876.431856] NIP [c008018e3180] qla2x00_mem_free+0x298/0x6b0 [qla2xxx]
[30876.431876] LR [c008018e3128] qla2x00_mem_free+0x240/0x6b0 [qla2xxx]
[30876.431895] Call Trace:
[30876.431897] [c00062a8bbd0] [c008018e2f1c] 
qla2x00_mem_free+0x34/0x6b0 [qla2xxx] (unreliable)
[30876.431917] [c00062a8bc20] [c008018eed30] 
qla2x00_probe_one+0x16d8/0x2640 [qla2xxx]
[30876.431937] [c00062a8bd90] [c08c589c] 
local_pci_probe+0x6c/0x110
[30876.431943] [c00062a8be10] [c0189ba8] 
work_for_cpu_fn+0x38/0x60
[30876.431948] [c00062a8be40] [c018d0d0] 
process_scheduled_works+0x230/0x4f0
[30876.431952] [c00062a8bf10] [c018fe14] 
worker_thread+0x1e4/0x500

[30876.431955] [c00062a8bf90] [c019ccc8] kthread+0x138/0x140
[30876.431960] [c00062a8bfe0] [c000df98] 
start_kernel_thread+0x14/0x18
[30876.431965] Code: 4082000c a09f0198 78841b68 e8df0278 38e0 
480c3b8d e8410018 3920 e91f0178 f93f0280 f93f0278 39280030 
 7fa95040 419e00b8 ebc80030

[30876.431977] ---[ end trace  ]---
[30876.480385] pstore: backend (nvram) writing error (-1)


Git bisect points to below commit. Reverting this commit fixes the problem.
commit efeda3bf912f269bcae16816683f432f58d68075
    scsi: qla2xxx: Move resource to allow code reuse

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [EXT] [Bisected] [efeda3bf912f] OOPS crash while performing Block device module parameter test [qla2xxx / FC]

2023-10-18 Thread Tasmiya Nalatwad

Thanks Nilesh. The patch fixes the issue.

On 10/18/23 19:59, Nilesh Javali wrote:

Hi Tasmiya,


-Original Message-
From: Tasmiya Nalatwad 
Sent: Wednesday, October 18, 2023 6:51 PM
To: linux-s...@vger.kernel.org; linux-ker...@vger.kernel.org; linuxppc-
d...@lists.ozlabs.org; linux-bl...@vger.kernel.org; linux-n...@vger.kernel.org
Cc: Quinn Tran ; Nilesh Javali ;
himanshu.madh...@oracle.com; martin.peter...@oracle.com; GR-QLogic-
Storage-Upstream ;
j...@linux.ibm.com; abdha...@linux.vnet.ibm.com; mputt...@linux.vnet.com;
sach...@linux.vnet.com
Subject: [EXT] [Bisected] [efeda3bf912f] OOPS crash while performing Block
device module parameter test [qla2xxx / FC]

External Email

--
Greetings,

OOPs Kernel crash while performing Block device module parameter test
[qla2xxx / FC] on linux-next 6.6.0-rc5-next-20231010

--- Traces ---

[30876.431678] Kernel attempted to read user page (30) - exploit
attempt? (uid: 0)
[30876.431687] BUG: Kernel NULL pointer dereference on read at 0x0030
[30876.431692] Faulting instruction address: 0xc008018e3180
[30876.431697] Oops: Kernel access of bad area, sig: 11 [#1]
[30876.431700] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=8192 NUMA
pSeries
[30876.431705] Modules linked in: qla2xxx(+) nvme_fc nvme_fabrics
nvme_core dm_round_robin dm_queue_length exfat vfat fat btrfs
blake2b_generic zstd_compress loop raid10 raid456 async_raid6_recov
async_memcpy async_pq async_xor async_tx xor raid6_pq raid1 linear xfs
libcrc32c raid0 nvram rpadlpar_io rpaphp xsk_diag bonding tls rfkill
vmx_crypto pseries_rng binfmt_misc ext4 mbcache jbd2 dm_service_time
sd_mod sg ibmvfc ibmveth t10_pi crc64_rocksoft crc64 scsi_transport_fc
dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded:
nvme_core]
[30876.431767] CPU: 0 PID: 1289400 Comm: kworker/0:2 Kdump: loaded Not
tainted 6.6.0-rc5-next-20231010-auto #1
[30876.431773] Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200
0xf06 of:IBM,FW1030.30 (NH1030_062) hv:phyp pSeries
[30876.431779] Workqueue: events work_for_cpu_fn
[30876.431788] NIP:  c008018e3180 LR: c008018e3128 CTR:
c0513f80
[30876.431792] REGS: c00062a8b930 TRAP: 0300   Not tainted
(6.6.0-rc5-next-20231010-auto)
[30876.431797] MSR:  8280b033 
CR: 28000482  XER: 2004000f
[30876.431811] CFAR: c008018e3138 DAR: 0030 DSISR:
4000 IRQMASK: 0
[30876.431811] GPR00: c008018e3128 c00062a8bbd0
c00800eb8300

[30876.431811] GPR04:  

0017bbac
[30876.431811] GPR08:  0030

c008019a6d68
[30876.431811] GPR12:  c2ff
c019cb98
c00082a97980
[30876.431811] GPR16:  

c3071ab0
[30876.431811] GPR20: c3491c0d c00063bb9a00
c00063bb30c0
c001d8b52928
[30876.431811] GPR24: c00800eb63a8 ffed c001d8b52000
0102
[30876.431811] GPR28: c00800ebaf00 c001d8b52890

c001d8b58000
[30876.431856] NIP [c008018e3180] qla2x00_mem_free+0x298/0x6b0
[qla2xxx]
[30876.431876] LR [c008018e3128] qla2x00_mem_free+0x240/0x6b0
[qla2xxx]
[30876.431895] Call Trace:
[30876.431897] [c00062a8bbd0] [c008018e2f1c]
qla2x00_mem_free+0x34/0x6b0 [qla2xxx] (unreliable)
[30876.431917] [c00062a8bc20] [c008018eed30]
qla2x00_probe_one+0x16d8/0x2640 [qla2xxx]
[30876.431937] [c00062a8bd90] [c08c589c]
local_pci_probe+0x6c/0x110
[30876.431943] [c00062a8be10] [c0189ba8]
work_for_cpu_fn+0x38/0x60
[30876.431948] [c00062a8be40] [c018d0d0]
process_scheduled_works+0x230/0x4f0
[30876.431952] [c00062a8bf10] [c018fe14]
worker_thread+0x1e4/0x500
[30876.431955] [c00062a8bf90] [c019ccc8]
kthread+0x138/0x140
[30876.431960] [c00062a8bfe0] [c000df98]
start_kernel_thread+0x14/0x18
[30876.431965] Code: 4082000c a09f0198 78841b68 e8df0278 38e0
480c3b8d e8410018 3920 e91f0178 f93f0280 f93f0278 39280030
 7fa95040 419e00b8 ebc80030
[30876.431977] ---[ end trace  ]---
[30876.480385] pstore: backend (nvram) writing error (-1)


Git bisect points to below commit. Reverting this commit fixes the problem.
commit efeda3bf912f269bcae16816683f432f58d68075
      scsi: qla2xxx: Move resource to allow code reuse

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center

We have recently posted a fix for the commit that you have pointed here,
https://marc.info/?l=linux-scsi&m=169750508721982&w=2

Thanks,
Nilesh


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



[Mainline/linux-next-netdev/net-next/scsi]Dlpar remove, drmgr phb and pci remove operations are failing

2024-01-02 Thread Tasmiya Nalatwad

Greetings,

 [Mainline/linux-next-netdev/net-next/scsi]Dlpar remove, drmgr phb and 
pci remove operations are failing


command ---> chhwres -r io --rsubtype slot -m "managed system name" -o r 
--id 6 -l 21030014


output --->

HSCL2929 The dynamic removal of I/O resources failed: The I/O slot 
dynamic partitioning operation failed.  Here are the I/O slot IDs that 
failed and the reasons for failure:


Jan 02 02:20:22 caDlparCommand:execv to drmgr
Validating PHB DLPAR capability...yes.
Could not find drc index 0x2014 to add to phb list
There are no DR capable slots on this system
Could not find PHB PHB 20



The OS return code is 3.

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



[mainline] [linux-next] [6.8-rc1] [FC] [DLPAR] OOps kernel crash after performing dlpar remove test

2024-01-31 Thread Tasmiya Nalatwad
  

[58563.146632] GPR24: 000106b41668  0006 
01000d8a5270
[58563.146632] GPR28: 0006 01000d8a02a0 01000d8a5270 
0006

[58563.146690] NIP [24470cb4] 0x24470cb4
[58563.146694] LR [243e7d04] 0x243e7d04
[58563.146698] --- interrupt: c00
[58563.146701] Code: e9299a20 3d020173 39089a20 7fa94000 419e0038 
e9490018 7fbf5000 409e0020 4870 6000 6000 6000 
 7faaf840 419e0058 e929

[58563.146722] ---[ end trace  ]---

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [mainline] [linux-next] [6.8-rc1] [FC] [DLPAR] OOps kernel crash after performing dlpar remove test

2024-02-01 Thread Tasmiya Nalatwad
] NIP:  200013f21594 LR: 200013e97bf4 CTR: 

[  981.124309] REGS: c000a878be80 TRAP: 0c00   Not tainted 
(6.5.0-rc6-next-20230817-auto)
[  981.124312] MSR:  8280f033 
  CR: 22000282  XER: 

[  981.124321] IRQMASK: 0
[  981.124321] GPR00: 0004 73a55c70 200014007300 
0007
[  981.124321] GPR04: 00013aff5750 0008 fbad2c80 
00013afd02a0
[  981.124321] GPR08: 0001   

[  981.124321] GPR12:  200013b7bc30  

[  981.124321] GPR16:    

[  981.124321] GPR20:    

[  981.124321] GPR24: 00010ef61668  0008 
00013aff5750
[  981.124321] GPR28: 0008 00013afd02a0 00013aff5750 
0008

[  981.124356] NIP [200013f21594] 0x200013f21594
[  981.124358] LR [200013e97bf4] 0x200013e97bf4
[  981.124361] --- interrupt: c00
[  981.124362] Code: 38427bd0 7c0802a6 6000 7c0802a6 fba1ffe8 
fbc1fff0 fbe1fff8 7cbf2b78 38a0 7cdd3378 f8010010 f821ffc1 
 4bff95d1 6000 7c7e1b79

[  981.124374] ---[ end trace  ]---


Thanks and Regards


On 1/31/24 16:18, Robin Murphy wrote:

On 2024-01-31 9:19 am, Tasmiya Nalatwad wrote:

Greetings,

[mainline] [linux-next] [6.8-rc1] [DLPAR] OOps kernel crash after 
performing dlpar remove test


--- Traces ---

[58563.146236] BUG: Unable to handle kernel data access at 
0x6b6b6b6b6b6b6b83

[58563.146242] Faulting instruction address: 0xc09c0e60
[58563.146248] Oops: Kernel access of bad area, sig: 11 [#1]
[58563.146252] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=8192 NUMA pSeries
[58563.146258] Modules linked in: isofs cdrom dm_snapshot dm_bufio 
dm_round_robin dm_queue_length exfat vfat fat btrfs blake2b_generic 
xor raid6_pq zstd_compress loop xfs libcrc32c raid0 nvram rpadlpar_io 
rpaphp nfnetlink xsk_diag bonding tls rfkill sunrpc dm_service_time 
dm_multipath dm_mod pseries_rng vmx_crypto binfmt_misc ext4 mbcache 
jbd2 sd_mod sg ibmvscsi scsi_transport_srp ibmveth lpfc nvmet_fc 
nvmet nvme_fc nvme_fabrics nvme_core t10_pi crc64_rocksoft crc64 
scsi_transport_fc fuse
[58563.146326] CPU: 0 PID: 1071247 Comm: drmgr Kdump: loaded Not 
tainted 6.8.0-rc1-auto-gecb1b8288dc7 #1
[58563.146332] Hardware name: IBM,9009-42A POWER9 (raw) 0x4e0202 
0xf05 of:IBM,FW950.A0 (VL950_141) hv:phyp pSeries
[58563.146337] NIP:  c09c0e60 LR: c09c0e28 CTR: 
c09c1584
[58563.146342] REGS: c0007960f260 TRAP: 0380   Not tainted 
(6.8.0-rc1-auto-gecb1b8288dc7)
[58563.146347] MSR:  80009033   CR: 
24822424  XER: 20040006

[58563.146360] CFAR: c09c0e74 IRQMASK: 0
[58563.146360] GPR00: c09c0e28 c0007960f500 
c1482600 c3050540
[58563.146360] GPR04:  c0089a6870c0 
0001 fffe
[58563.146360] GPR08: c2bac020 6b6b6b6b6b6b6b6b 
6b6b6b6b6b6b6b6b 0220
[58563.146360] GPR12: 2000 c308 
 
[58563.146360] GPR16:   
 0001
[58563.146360] GPR20: c1281478  
c1281490 c2bfed80
[58563.146360] GPR24: c0089a6870c0  
 c2b9ffb8
[58563.146360] GPR28:  c2bac0e8 
 

[58563.146421] NIP [c09c0e60] iommu_ops_from_fwnode+0x68/0x118
[58563.146430] LR [c09c0e28] iommu_ops_from_fwnode+0x30/0x118


This implies that iommu_device_list has become corrupted. Looks like 
spapr_tce_setup_phb_iommus_initcall() registers an iommu_device which 
pcibios_free_controller() could free if a PCI controller is removed, 
but there's no path anywhere to ever unregister any of those IOMMUs. 
Presumably this also means that is a PCI controller is dynamically 
added after init, its IOMMU won't be set up properly either.


Thanks,
Robin.


[58563.146437] Call Trace:
[58563.146439] [c0007960f500] [c0007960f560] 
0xc0007960f560 (unreliable)
[58563.146446] [c0007960f530] [c09c0fd0] 
__iommu_probe_device+0xc0/0x5c0
[58563.146454] [c0007960f5a0] [c09c151c] 
iommu_probe_device+0x4c/0xb4
[58563.146462] [c0007960f5e0] [c09c15d0] 
iommu_bus_notifier+0x4c/0x8c
[58563.146469] [c0007960f600] [c019e3d0] 
notifier_call_chain+0xb8/0x1a0
[58563.146476] [c0007960f660] [c019eea0] 
blocking_notifier_call_chain+0x64/0x94
[58563.146483] [c0007960f6a0] [c09d3c5c] 
bus_notify+0x50/0x7c
[58563.146491] [c0007960f6e0] [c09cfba4] 
device_add+0x774/0x9bc
[58563.146498] [c0007960f7a0] [c08abe9c] 
pci_device_add+0x2f4/0x864
[58563.146506] [c0

Re: [mainline] [linux-next] [6.8-rc1] [FC] [DLPAR] OOps kernel crash after performing dlpar remove test

2024-02-01 Thread Tasmiya Nalatwad
: 200013e97bf4 CTR:

[  981.124309] REGS: c000a878be80 TRAP: 0c00   Not tainted
(6.5.0-rc6-next-20230817-auto)
[  981.124312] MSR:  8280f033
  CR: 22000282  XER: 
[  981.124321] IRQMASK: 0
[  981.124321] GPR00: 0004 73a55c70 200014007300
0007
[  981.124321] GPR04: 00013aff5750 0008 fbad2c80
00013afd02a0
[  981.124321] GPR08: 0001  

[  981.124321] GPR12:  200013b7bc30 

[  981.124321] GPR16:   

[  981.124321] GPR20:   

[  981.124321] GPR24: 00010ef61668  0008
00013aff5750
[  981.124321] GPR28: 0008 00013afd02a0 00013aff5750
0008
[  981.124356] NIP [200013f21594] 0x200013f21594
[  981.124358] LR [200013e97bf4] 0x200013e97bf4
[  981.124361] --- interrupt: c00
[  981.124362] Code: 38427bd0 7c0802a6 6000 7c0802a6 fba1ffe8
fbc1fff0 fbe1fff8 7cbf2b78 38a0 7cdd3378 f8010010 f821ffc1
 4bff95d1 6000 7c7e1b79
[  981.124374] ---[ end trace  ]---


Thanks and Regards

On 1/31/24 16:18, Robin Murphy wrote:

On 2024-01-31 9:19 am, Tasmiya Nalatwad wrote:

Greetings,

[mainline] [linux-next] [6.8-rc1] [DLPAR] OOps kernel crash after 
performing dlpar remove test


--- Traces ---

[58563.146236] BUG: Unable to handle kernel data access at 
0x6b6b6b6b6b6b6b83

[58563.146242] Faulting instruction address: 0xc09c0e60
[58563.146248] Oops: Kernel access of bad area, sig: 11 [#1]
[58563.146252] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=8192 NUMA pSeries
[58563.146258] Modules linked in: isofs cdrom dm_snapshot dm_bufio 
dm_round_robin dm_queue_length exfat vfat fat btrfs blake2b_generic 
xor raid6_pq zstd_compress loop xfs libcrc32c raid0 nvram rpadlpar_io 
rpaphp nfnetlink xsk_diag bonding tls rfkill sunrpc dm_service_time 
dm_multipath dm_mod pseries_rng vmx_crypto binfmt_misc ext4 mbcache 
jbd2 sd_mod sg ibmvscsi scsi_transport_srp ibmveth lpfc nvmet_fc 
nvmet nvme_fc nvme_fabrics nvme_core t10_pi crc64_rocksoft crc64 
scsi_transport_fc fuse
[58563.146326] CPU: 0 PID: 1071247 Comm: drmgr Kdump: loaded Not 
tainted 6.8.0-rc1-auto-gecb1b8288dc7 #1
[58563.146332] Hardware name: IBM,9009-42A POWER9 (raw) 0x4e0202 
0xf05 of:IBM,FW950.A0 (VL950_141) hv:phyp pSeries
[58563.146337] NIP:  c09c0e60 LR: c09c0e28 CTR: 
c09c1584
[58563.146342] REGS: c0007960f260 TRAP: 0380   Not tainted 
(6.8.0-rc1-auto-gecb1b8288dc7)
[58563.146347] MSR:  80009033   CR: 
24822424  XER: 20040006

[58563.146360] CFAR: c09c0e74 IRQMASK: 0
[58563.146360] GPR00: c09c0e28 c0007960f500 
c1482600 c3050540
[58563.146360] GPR04:  c0089a6870c0 
0001 fffe
[58563.146360] GPR08: c2bac020 6b6b6b6b6b6b6b6b 
6b6b6b6b6b6b6b6b 0220
[58563.146360] GPR12: 2000 c308 
 
[58563.146360] GPR16:   
 0001
[58563.146360] GPR20: c1281478  
c1281490 c2bfed80
[58563.146360] GPR24: c0089a6870c0  
 c2b9ffb8
[58563.146360] GPR28:  c2bac0e8 
 

[58563.146421] NIP [c09c0e60] iommu_ops_from_fwnode+0x68/0x118
[58563.146430] LR [c09c0e28] iommu_ops_from_fwnode+0x30/0x118


This implies that iommu_device_list has become corrupted. Looks like 
spapr_tce_setup_phb_iommus_initcall() registers an iommu_device which 
pcibios_free_controller() could free if a PCI controller is removed, 
but there's no path anywhere to ever unregister any of those IOMMUs. 
Presumably this also means that is a PCI controller is dynamically 
added after init, its IOMMU won't be set up properly either.


Thanks,
Robin.


[58563.146437] Call Trace:
[58563.146439] [c0007960f500] [c0007960f560] 
0xc0007960f560 (unreliable)
[58563.146446] [c0007960f530] [c09c0fd0] 
__iommu_probe_device+0xc0/0x5c0
[58563.146454] [c0007960f5a0] [c09c151c] 
iommu_probe_device+0x4c/0xb4
[58563.146462] [c0007960f5e0] [c09c15d0] 
iommu_bus_notifier+0x4c/0x8c
[58563.146469] [c0007960f600] [c019e3d0] 
notifier_call_chain+0xb8/0x1a0
[58563.146476] [c0007960f660] [c019eea0] 
blocking_notifier_call_chain+0x64/0x94
[58563.146483] [c0007960f6a0] [c09d3c5c] 
bus_notify+0x50/0x7c
[58563.146491] [c0007960f6e0] [c09cfba4] 
device_add+0x774/0x9bc
[58563.146498] [c0007960f7a0] [c08abe9c] 
pci_device_add+0x2f4/0x864
[58563.146506] [c0007960f850] [c007d5a0] 
of_create_pci_dev+0x390/0xa08
[58

[Bisected] [commit dd6c3c544126] [linux-next] [6.8.0-rc2] Task hungs for infinite time call traces

2024-02-05 Thread Tasmiya Nalatwad
32378] 
interrupt_exit_user_prepare_main+0x1ac/0x264
[ 1104.161136] [c001d10b7e20] [c0032580] 
syscall_exit_prepare+0x150/0x178
[ 1104.161141] [c001d10b7e50] [c000d068] 
system_call_vectored_common+0x168/0x2ec
[ 1104.161146] --- interrupt: 3000 at 0x7fffaed4c11c
[ 1104.161149] NIP:  7fffaed4c11c LR:  CTR: 
[ 1104.161152] REGS: c001d10b7e80 TRAP: 3000   Not tainted  
(6.8.0-rc2-next-20240130-auto)
[ 1104.161155] MSR:  8000d033   CR: 48002402  
XER: 
[ 1104.161163] IRQMASK: 0
[ 1104.161163] GPR00: 0034 7fffec649770 7fffaef07f00 

[ 1104.161163] GPR04:  ff00  
0001
[ 1104.161163] GPR08: 00014cd61390   

[ 1104.161163] GPR12:  7fffaefbc140 0ee6b280 
7fffec649a30
[ 1104.161163] GPR16: 7fffec649bd8 000118b66478  

[ 1104.161163] GPR20:    

[ 1104.161163] GPR24: 7fffec64b0b0 000118b663d8 000118b66a58 

[ 1104.161163] GPR28: 00014cd61250  00014cd61370 
00014cd61140
[ 1104.161199] NIP [7fffaed4c11c] 0x7fffaed4c11c
[ 1104.161202] LR [] 0x0
[ 1104.161204] --- interrupt: 3000
[ 1227.040796] INFO: taskumount:32506  blocked for more than 368 seconds.
[ 1227.040810]   Not tainted 6.8.0-rc2-next-20240130-auto #1
[ 1227.040814] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.
[ 1227.040817]task:umount   state:D  stack:0  pid:32506  tgid:32506 
 ppid:32329   flags:0x00042080
[ 1227.040823] Call Trace:
[ 1227.040826] [c001d10b7560] [c6e9b934] 0xc6e9b934 
(unreliable)
[ 1227.040832] [c001d10b7710] [c001fbac] __switch_to+0x13c/0x220
[ 1227.040839] [c001d10b7770] [c1002840] __schedule+0x268/0x7c4
[ 1227.040844] [c001d10b7840] [c1002ddc] schedule+0x40/0x108
[ 1227.040848] [c001d10b78b0] [c100b748] 
schedule_timeout+0x19c/0x1c0
[ 1227.040853] [c001d10b7980] [c1004044] 
__wait_for_common+0x148/0x340
[ 1227.040857] [c001d10b7a10] [c018fa98] 
__flush_workqueue+0x15c/0x530
[ 1227.040863] [c001d10b7ab0] [c00808f89e9c] 
xfs_inodegc_flush+0x54/0x15c [xfs]
[ 1227.040925] [c001d10b7b00] [c00808f9f47c] xfs_unmountfs+0x30/0x1e4 
[xfs]
[ 1227.040980] [c001d10b7b80] [c00808fa825c] 
xfs_fs_put_super+0x5c/0x110 [xfs]
[ 1227.041035] [c001d10b7bf0] [c05c8774] 
generic_shutdown_super+0xc0/0x16c
[ 1227.041041] [c001d10b7c60] [c05c8a50] kill_block_super+0x30/0x68
[ 1227.041045] [c001d10b7c90] [c00808fa5c54] xfs_kill_sb+0x28/0x4c [xfs]
[ 1227.041099] [c001d10b7cc0] [c05ca9d4] 
deactivate_locked_super+0x70/0x144
[ 1227.041105] [c001d10b7cf0] [c0605728] cleanup_mnt+0x10c/0x1d8
[ 1227.041109] [c001d10b7d40] [c019b5e4] task_work_run+0xe0/0x16c
[ 1227.041113] [c001d10b7d90] [c0022974] 
do_notify_resume+0x134/0x13c
[ 1227.041118] [c001d10b7dc0] [c0032378] 
interrupt_exit_user_prepare_main+0x1ac/0x264
[ 1227.041123] [c001d10b7e20] [c0032580] 
syscall_exit_prepare+0x150/0x178
[ 1227.041128] [c001d10b7e50] [c000d068] 
system_call_vectored_common+0x168/0x2ec
[ 1227.041133] --- interrupt: 3000 at 0x7fffaed4c11c
[ 1227.041136] NIP:  7fffaed4c11c LR:  CTR: 
[ 1227.041139] REGS: c001d10b7e80 TRAP: 3000   Not tainted  
(6.8.0-rc2-next-20240130-auto)
[ 1227.041142] MSR:  8000d033   CR: 48002402  
XER: 
[ 1227.041150] IRQMASK: 0
[ 1227.041150] GPR00: 0034 7fffec649770 7fffaef07f00 

[ 1227.041150] GPR04:  ff00  
0001
[ 1227.041150] GPR08: 00014cd61390   

[ 1227.041150] GPR12:  7fffaefbc140 0ee6b280 
7fffec649a30
[ 1227.041150] GPR16: 7fffec649bd8 000118b66478  

[ 1227.041150] GPR20:    

[ 1227.041150] GPR24: 7fffec64b0b0 000118b663d8 000118b66a58 

[ 1227.041150] GPR28: 00014cd61250  00014cd61370 
00014cd61140
[ 1227.041186] NIP [7fffaed4c11c] 0x7fffaed4c11c
[ 1227.041188] LR [] 0x0
[ 1227.041190] --- interrupt: 3000

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


Fwd: [Bisected] [commit dd6c3c544126] [linux-next] [6.8.0-rc2] Task hungs for infinite time call traces

2024-02-05 Thread Tasmiya Nalatwad




 Forwarded Message 
Subject: 	[Bisected] [commit dd6c3c544126] [linux-next] [6.8.0-rc2] Task 
hungs for infinite time call traces

Date:   Mon, 5 Feb 2024 14:10:24 +0530
From:   Tasmiya Nalatwad 
To: 	linux-ker...@vger.kernel.org , 
linuxppc-dev@lists.ozlabs.org , 
linux-bl...@vger.kernel.org , 
linux-n...@vger.kernel.org 
CC: 	t...@kernel.org, jiangshan...@gmail.com, abdha...@linux.vnet.ibm.com 
, sach...@linux.vnet.com 
, mputt...@linux.vnet.com 




Greetings,

[linux-next] [6.8.0-rc2-next-20240130] [FC/XFS] Task hungs for infinite 
time while running bonnie test XFS filesystem


Bisected the issue. Git bisect points to the below commit
commit dd6c3c5441263723305a9c52c5ccc899a4653000
Author: Tejun Heo 

--- Traces ---

[  981.280811] Call Trace:
[  981.280813] [c001d10b7560] [c6e9b934] 0xc6e9b934 
(unreliable)
[  981.280820] [c001d10b7710] [c001fbac] __switch_to+0x13c/0x220
[  981.280827] [c001d10b7770] [c1002840] __schedule+0x268/0x7c4
[  981.280832] [c001d10b7840] [c1002ddc] schedule+0x40/0x108
[  981.280836] [c001d10b78b0] [c100b748] 
schedule_timeout+0x19c/0x1c0
[  981.280841] [c001d10b7980] [c1004044] 
__wait_for_common+0x148/0x340
[  981.280845] [c001d10b7a10] [c018fa98] 
__flush_workqueue+0x15c/0x530
[  981.280852] [c001d10b7ab0] [c00808f89e9c] 
xfs_inodegc_flush+0x54/0x15c [xfs]
[  981.280913] [c001d10b7b00] [c00808f9f47c] xfs_unmountfs+0x30/0x1e4 
[xfs]
[  981.280969] [c001d10b7b80] [c00808fa825c] 
xfs_fs_put_super+0x5c/0x110 [xfs]
[  981.281023] [c001d10b7bf0] [c05c8774] 
generic_shutdown_super+0xc0/0x16c
[  981.281029] [c001d10b7c60] [c05c8a50] kill_block_super+0x30/0x68
[  981.281034] [c001d10b7c90] [c00808fa5c54] xfs_kill_sb+0x28/0x4c [xfs]
[  981.281088] [c001d10b7cc0] [c05ca9d4] 
deactivate_locked_super+0x70/0x144
[  981.281093] [c001d10b7cf0] [c0605728] cleanup_mnt+0x10c/0x1d8
[  981.281098] [c001d10b7d40] [c019b5e4] task_work_run+0xe0/0x16c
[  981.281102] [c001d10b7d90] [c0022974] 
do_notify_resume+0x134/0x13c
[  981.281107] [c001d10b7dc0] [c0032378] 
interrupt_exit_user_prepare_main+0x1ac/0x264
[  981.281112] [c001d10b7e20] [c0032580] 
syscall_exit_prepare+0x150/0x178
[  981.281116] [c001d10b7e50] [c000d068] 
system_call_vectored_common+0x168/0x2ec
[  981.281122] --- interrupt: 3000 at 0x7fffaed4c11c
[  981.281125] NIP:  7fffaed4c11c LR:  CTR: 
[  981.281128] REGS: c001d10b7e80 TRAP: 3000   Not tainted  
(6.8.0-rc2-next-20240130-auto)
[  981.281131] MSR:  8000d033   CR: 48002402  
XER: 
[  981.281139] IRQMASK: 0
[  981.281139] GPR00: 0034 7fffec649770 7fffaef07f00 

[  981.281139] GPR04:  ff00  
0001
[  981.281139] GPR08: 00014cd61390   

[  981.281139] GPR12:  7fffaefbc140 0ee6b280 
7fffec649a30
[  981.281139] GPR16: 7fffec649bd8 000118b66478  

[  981.281139] GPR20:    

[  981.281139] GPR24: 7fffec64b0b0 000118b663d8 000118b66a58 

[  981.281139] GPR28: 00014cd61250  00014cd61370 
00014cd61140
[  981.281175] NIP [7fffaed4c11c] 0x7fffaed4c11c
[  981.281177] LR [] 0x0
[  981.281179] --- interrupt: 3000
[ 1104.160797] INFO: taskumount:32506  blocked for more than 245 seconds.
[ 1104.160811]   Not tainted 6.8.0-rc2-next-20240130-auto #1
[ 1104.160814] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.
[ 1104.160818]task:umount   state:D  stack:0  pid:32506  tgid:32506 
 ppid:32329   flags:0x00042080
[ 1104.160826] Call Trace:
[ 1104.160829] [c001d10b7560] [c6e9b934] 0xc6e9b934 
(unreliable)
[ 1104.160836] [c001d10b7710] [c001fbac] __switch_to+0x13c/0x220
[ 1104.160844] [c001d10b7770] [c1002840] __schedule+0x268/0x7c4
[ 1104.160851] [c001d10b7840] [c1002ddc] schedule+0x40/0x108
[ 1104.160856] [c001d10b78b0] [c100b748] 
schedule_timeout+0x19c/0x1c0
[ 1104.160862] [c001d10b7980] [c1004044] 
__wait_for_common+0x148/0x340
[ 1104.160867] [c001d10b7a10] [c018fa98] 
__flush_workqueue+0x15c/0x530
[ 1104.160873] [c001d10b7ab0] [c00808f89e9c] 
xfs_inodegc_flush+0x54/0x15c [xfs]
[ 1104.160938] [c001d10b7b00] [c00808f9f47c] xfs_unmountfs+0x30/0x1e4 
[xfs]
[ 1104.160993] [c001d10b7b80] [c00808fa825c] 
xfs_fs_put_super+0x5c/0x110 [xfs]
[ 1104.161047] [c001d10b7bf0] [c05c8774] 
generic_shutdown_super+0xc0/0x16c
[ 1104.161053] [c001d10b7c60] [c05c8a50] kill_blo

[Bisected] [commit dd6c3c544126] [linux-next] [6.8.0-rc2] Task hungs for infinite time call traces

2024-02-05 Thread Tasmiya Nalatwad
e+0x134/0x13c
[ 1104.161131] [c001d10b7dc0] [c0032378] 
interrupt_exit_user_prepare_main+0x1ac/0x264
[ 1104.161136] [c001d10b7e20] [c0032580] 
syscall_exit_prepare+0x150/0x178
[ 1104.161141] [c001d10b7e50] [c000d068] 
system_call_vectored_common+0x168/0x2ec
[ 1104.161146] --- interrupt: 3000 at 0x7fffaed4c11c
[ 1104.161149] NIP:  7fffaed4c11c LR:  CTR: 
[ 1104.161152] REGS: c001d10b7e80 TRAP: 3000   Not tainted  
(6.8.0-rc2-next-20240130-auto)
[ 1104.161155] MSR:  8000d033   CR: 48002402  
XER: 
[ 1104.161163] IRQMASK: 0
[ 1104.161163] GPR00: 0034 7fffec649770 7fffaef07f00 

[ 1104.161163] GPR04:  ff00  
0001
[ 1104.161163] GPR08: 00014cd61390   

[ 1104.161163] GPR12:  7fffaefbc140 0ee6b280 
7fffec649a30
[ 1104.161163] GPR16: 7fffec649bd8 000118b66478  

[ 1104.161163] GPR20:    

[ 1104.161163] GPR24: 7fffec64b0b0 000118b663d8 000118b66a58 

[ 1104.161163] GPR28: 00014cd61250  00014cd61370 
00014cd61140
[ 1104.161199] NIP [7fffaed4c11c] 0x7fffaed4c11c
[ 1104.161202] LR [] 0x0
[ 1104.161204] --- interrupt: 3000
[ 1227.040796] INFO: taskumount:32506  blocked for more than 368 seconds.
[ 1227.040810]   Not tainted 6.8.0-rc2-next-20240130-auto #1
[ 1227.040814] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.
[ 1227.040817]task:umount   state:D  stack:0  pid:32506  tgid:32506 
 ppid:32329   flags:0x00042080
[ 1227.040823] Call Trace:
[ 1227.040826] [c001d10b7560] [c6e9b934] 0xc6e9b934 
(unreliable)
[ 1227.040832] [c001d10b7710] [c001fbac] __switch_to+0x13c/0x220
[ 1227.040839] [c001d10b7770] [c1002840] __schedule+0x268/0x7c4
[ 1227.040844] [c001d10b7840] [c1002ddc] schedule+0x40/0x108
[ 1227.040848] [c001d10b78b0] [c100b748] 
schedule_timeout+0x19c/0x1c0
[ 1227.040853] [c001d10b7980] [c1004044] 
__wait_for_common+0x148/0x340
[ 1227.040857] [c001d10b7a10] [c018fa98] 
__flush_workqueue+0x15c/0x530
[ 1227.040863] [c001d10b7ab0] [c00808f89e9c] 
xfs_inodegc_flush+0x54/0x15c [xfs]
[ 1227.040925] [c001d10b7b00] [c00808f9f47c] xfs_unmountfs+0x30/0x1e4 
[xfs]
[ 1227.040980] [c001d10b7b80] [c00808fa825c] 
xfs_fs_put_super+0x5c/0x110 [xfs]
[ 1227.041035] [c001d10b7bf0] [c05c8774] 
generic_shutdown_super+0xc0/0x16c
[ 1227.041041] [c001d10b7c60] [c05c8a50] kill_block_super+0x30/0x68
[ 1227.041045] [c001d10b7c90] [c00808fa5c54] xfs_kill_sb+0x28/0x4c [xfs]
[ 1227.041099] [c001d10b7cc0] [c05ca9d4] 
deactivate_locked_super+0x70/0x144
[ 1227.041105] [c001d10b7cf0] [c0605728] cleanup_mnt+0x10c/0x1d8
[ 1227.041109] [c001d10b7d40] [c019b5e4] task_work_run+0xe0/0x16c
[ 1227.041113] [c001d10b7d90] [c0022974] 
do_notify_resume+0x134/0x13c
[ 1227.041118] [c001d10b7dc0] [c0032378] 
interrupt_exit_user_prepare_main+0x1ac/0x264
[ 1227.041123] [c001d10b7e20] [c0032580] 
syscall_exit_prepare+0x150/0x178
[ 1227.041128] [c001d10b7e50] [c000d068] 
system_call_vectored_common+0x168/0x2ec
[ 1227.041133] --- interrupt: 3000 at 0x7fffaed4c11c
[ 1227.041136] NIP:  7fffaed4c11c LR:  CTR: 
[ 1227.041139] REGS: c001d10b7e80 TRAP: 3000   Not tainted  
(6.8.0-rc2-next-20240130-auto)
[ 1227.041142] MSR:  8000d033   CR: 48002402  
XER: 
[ 1227.041150] IRQMASK: 0
[ 1227.041150] GPR00: 0034 7fffec649770 7fffaef07f00 

[ 1227.041150] GPR04:  ff00  
0001
[ 1227.041150] GPR08: 00014cd61390   

[ 1227.041150] GPR12:  7fffaefbc140 0ee6b280 
7fffec649a30
[ 1227.041150] GPR16: 7fffec649bd8 000118b66478  

[ 1227.041150] GPR20:    

[ 1227.041150] GPR24: 7fffec64b0b0 000118b663d8 000118b66a58 

[ 1227.041150] GPR28: 00014cd61250  00014cd61370 
00014cd61140
[ 1227.041186] NIP [7fffaed4c11c] 0x7fffaed4c11c
[ 1227.041188] LR [] 0x0
[ 1227.041190] --- interrupt: 3000

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


14:59,[Bisected] [commit dd6c3c544126] [linux-next] [6.8.0-rc2] Task hungs for infinite time call traces

2024-02-05 Thread Tasmiya Nalatwad
[linux-next] [6.8.0-rc2-next-20240130] [FC/XFS] Task hungs for infinite 
time while running bonnie test XFS filesystem


Bisected the issue. Git bisect points to the below commit
commit dd6c3c5441263723305a9c52c5ccc899a4653000
  workqueue: Move pwq_dec_nr_in_flight() to the end of work 
item handling


--- Traces ---

[  981.280811] Call Trace:
[  981.280813] [c001d10b7560] [c6e9b934] 0xc6e9b934 
(unreliable)
[  981.280820] [c001d10b7710] [c001fbac] __switch_to+0x13c/0x220
[  981.280827] [c001d10b7770] [c1002840] __schedule+0x268/0x7c4
[  981.280832] [c001d10b7840] [c1002ddc] schedule+0x40/0x108
[  981.280836] [c001d10b78b0] [c100b748] 
schedule_timeout+0x19c/0x1c0
[  981.280841] [c001d10b7980] [c1004044] 
__wait_for_common+0x148/0x340
[  981.280845] [c001d10b7a10] [c018fa98] 
__flush_workqueue+0x15c/0x530
[  981.280852] [c001d10b7ab0] [c00808f89e9c] 
xfs_inodegc_flush+0x54/0x15c [xfs]
[  981.280913] [c001d10b7b00] [c00808f9f47c] xfs_unmountfs+0x30/0x1e4 
[xfs]
[  981.280969] [c001d10b7b80] [c00808fa825c] 
xfs_fs_put_super+0x5c/0x110 [xfs]
[  981.281023] [c001d10b7bf0] [c05c8774] 
generic_shutdown_super+0xc0/0x16c
[  981.281029] [c001d10b7c60] [c05c8a50] kill_block_super+0x30/0x68
[  981.281034] [c001d10b7c90] [c00808fa5c54] xfs_kill_sb+0x28/0x4c [xfs]
[  981.281088] [c001d10b7cc0] [c05ca9d4] 
deactivate_locked_super+0x70/0x144
[  981.281093] [c001d10b7cf0] [c0605728] cleanup_mnt+0x10c/0x1d8
[  981.281098] [c001d10b7d40] [c019b5e4] task_work_run+0xe0/0x16c
[  981.281102] [c001d10b7d90] [c0022974] 
do_notify_resume+0x134/0x13c
[  981.281107] [c001d10b7dc0] [c0032378] 
interrupt_exit_user_prepare_main+0x1ac/0x264
[  981.281112] [c001d10b7e20] [c0032580] 
syscall_exit_prepare+0x150/0x178
[  981.281116] [c001d10b7e50] [c000d068] 
system_call_vectored_common+0x168/0x2ec
[  981.281122] --- interrupt: 3000 at 0x7fffaed4c11c
[  981.281125] NIP:  7fffaed4c11c LR:  CTR: 
[  981.281128] REGS: c001d10b7e80 TRAP: 3000   Not tainted  
(6.8.0-rc2-next-20240130-auto)
[  981.281131] MSR:  8000d033   CR: 48002402  
XER: 
[  981.281139] IRQMASK: 0
[  981.281139] GPR00: 0034 7fffec649770 7fffaef07f00 

[  981.281139] GPR04:  ff00  
0001
[  981.281139] GPR08: 00014cd61390   

[  981.281139] GPR12:  7fffaefbc140 0ee6b280 
7fffec649a30
[  981.281139] GPR16: 7fffec649bd8 000118b66478  

[  981.281139] GPR20:    

[  981.281139] GPR24: 7fffec64b0b0 000118b663d8 000118b66a58 

[  981.281139] GPR28: 00014cd61250  00014cd61370 
00014cd61140
[  981.281175] NIP [7fffaed4c11c] 0x7fffaed4c11c
[  981.281177] LR [] 0x0
[  981.281179] --- interrupt: 3000

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


[Bisected] [commit dd6c3c544126] [linux-next] [6.8.0-rc2] Task hungs for infinite time call traces

2024-02-05 Thread Tasmiya Nalatwad

Greetings,

[Linux-next] [6.8.0-rc2-next-20240130] [FC / XFS] Task hangs for 
infinite time while running bonnie test XFS filesystem


Bisected the issue. Git bisect points to the below commit
commit dd6c3c5441263723305a9c52c5ccc899a4653000
  workqueue: Move pwq_dec_nr_in_flight() to the end of work 
item handling


--- Traces ---

[  981.280811] Call Trace:
[  981.280813] [c001d10b7560] [c6e9b934] 0xc6e9b934 
(unreliable)
[  981.280820] [c001d10b7710] [c001fbac] __switch_to+0x13c/0x220
[  981.280827] [c001d10b7770] [c1002840] __schedule+0x268/0x7c4
[  981.280832] [c001d10b7840] [c1002ddc] schedule+0x40/0x108
[  981.280836] [c001d10b78b0] [c100b748] 
schedule_timeout+0x19c/0x1c0
[  981.280841] [c001d10b7980] [c1004044] 
__wait_for_common+0x148/0x340
[  981.280845] [c001d10b7a10] [c018fa98] 
__flush_workqueue+0x15c/0x530
[  981.280852] [c001d10b7ab0] [c00808f89e9c] 
xfs_inodegc_flush+0x54/0x15c [xfs]
[  981.280913] [c001d10b7b00] [c00808f9f47c] xfs_unmountfs+0x30/0x1e4 
[xfs]
[  981.280969] [c001d10b7b80] [c00808fa825c] 
xfs_fs_put_super+0x5c/0x110 [xfs]
[  981.281023] [c001d10b7bf0] [c05c8774] 
generic_shutdown_super+0xc0/0x16c
[  981.281029] [c001d10b7c60] [c05c8a50] kill_block_super+0x30/0x68
[  981.281034] [c001d10b7c90] [c00808fa5c54] xfs_kill_sb+0x28/0x4c [xfs]
[  981.281088] [c001d10b7cc0] [c05ca9d4] 
deactivate_locked_super+0x70/0x144
[  981.281093] [c001d10b7cf0] [c0605728] cleanup_mnt+0x10c/0x1d8
[  981.281098] [c001d10b7d40] [c019b5e4] task_work_run+0xe0/0x16c
[  981.281102] [c001d10b7d90] [c0022974] 
do_notify_resume+0x134/0x13c
[  981.281107] [c001d10b7dc0] [c0032378] 
interrupt_exit_user_prepare_main+0x1ac/0x264
[  981.281112] [c001d10b7e20] [c0032580] 
syscall_exit_prepare+0x150/0x178
[  981.281116] [c001d10b7e50] [c000d068] 
system_call_vectored_common+0x168/0x2ec
[  981.281122] --- interrupt: 3000 at 0x7fffaed4c11c
[  981.281125] NIP:  7fffaed4c11c LR:  CTR: 
[  981.281128] REGS: c001d10b7e80 TRAP: 3000   Not tainted  
(6.8.0-rc2-next-20240130-auto)
[  981.281131] MSR:  8000d033   CR: 48002402  
XER: 
[  981.281139] IRQMASK: 0
[  981.281139] GPR00: 0034 7fffec649770 7fffaef07f00 

[  981.281139] GPR04:  ff00  
0001
[  981.281139] GPR08: 00014cd61390   

[  981.281139] GPR12:  7fffaefbc140 0ee6b280 
7fffec649a30
[  981.281139] GPR16: 7fffec649bd8 000118b66478  

[  981.281139] GPR20:    

[  981.281139] GPR24: 7fffec64b0b0 000118b663d8 000118b66a58 

[  981.281139] GPR28: 00014cd61250  00014cd61370 
00014cd61140
[  981.281175] NIP [7fffaed4c11c] 0x7fffaed4c11c
[  981.281177] LR [] 0x0
[  981.281179] --- interrupt: 3000

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center


[revert commit 9f079dda1433] [mainline] [6.8.0-rc3] [NVME] OOPS kernel crash while booting

2024-02-06 Thread Tasmiya Nalatwad

Greetings,

[revert commit 9f079dda1433] [mainline] [6.8.0-rc3] [NVME] OOPS kernel 
crash while booting to kernel


Reverting below commit fixes the problem

commit 9f079dda14339ee87d864306a9dc8c6b4e4da40b
    nvme: allow passthru cmd error logging

--- Traces ---

[   15.639835] BUG: Kernel NULL pointer dereference on read at 0x03d8
[   15.639840] Faulting instruction address: 0xc008215b01dc
[   15.639845] Oops: Kernel access of bad area, sig: 11 [#1]
[   15.639849] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=8192 NUMA pSeries
[   15.639855] Modules linked in: xsk_diag bonding tls nft_compat 
nf_tables nfnetlink rfkill binfmt_misc dm_multipath dm_mod pseries_rng 
dax_pmem drm drm_panel_orientation_quirks ext4 mbcache jbd2 ibmvfc 
nd_pmem nd_btt scsi_transport_fc ibmveth nvme papr_scm bnx2x nvme_core 
t10_pi vmx_crypto libnvdimm crc64_rocksoft_generic crc64_rocksoft mdio 
crc64 libcrc32c fuse
[   15.639901] CPU: 1 PID: 3289 Comm: udevadm Not tainted 
6.8.0-rc3-auto-g99bd3cb0d12e #1
[   15.639907] Hardware name: IBM,9009-42A POWER9 (raw) 0x4e0202 
0xf05 of:IBM,FW950.A0 (VL950_141) hv:phyp pSeries
[   15.639913] NIP:  c008215b01dc LR: c0a197bc CTR: 
c008215b01b8
[   15.639918] REGS: c0006f3177f0 TRAP: 0300   Not tainted 
(6.8.0-rc3-auto-g99bd3cb0d12e)
[   15.639923] MSR:  80009033   CR: 
84888480  XER: 2004
[   15.639936] CFAR: c000dbbc DAR: 03d8 DSISR: 
4000 IRQMASK: 0
[   15.639936] GPR00: c0a197bc c0006f317a90 c008215d8200 
c0009281
[   15.639936] GPR04: c008215d2570 c0009281 c0009282 

[   15.639936] GPR08: c0009281  0001 
22888482
[   15.639936] GPR12: c008215b01b8 cf8cf300  

[   15.639936] GPR16:    
0001
[   15.639936] GPR20:  00400cc0 c00086d14c28 
7fff
[   15.639936] GPR24: f000  c00086d14c18 
0001
[   15.639936] GPR28: c0007509c180 c5bc1448 c008215d2570 
c00086d14bf0
[   15.63] NIP [c008215b01dc] 
nvme_io_passthru_err_log_enabled_show+0x24/0x80 [nvme_core]

[   15.640013] LR [c0a197bc] dev_attr_show+0x40/0xac
[   15.640020] Call Trace:
[   15.640023] [c0006f317a90] [c0006f317b10] 0xc0006f317b10 
(unreliable)

[   15.640030] [c0006f317af0] [c0a197bc] dev_attr_show+0x40/0xac
[   15.640037] [c0006f317b60] [c06c11a0] 
sysfs_kf_seq_show+0xcc/0x1f0
[   15.640045] [c0006f317bf0] [c06be224] 
kernfs_seq_show+0x44/0x58
[   15.640052] [c0006f317c10] [c060882c] 
seq_read_iter+0x254/0x69c
[   15.640060] [c0006f317cf0] [c06bed60] 
kernfs_fop_read_iter+0x4c/0x60

[   15.640067] [c0006f317d10] [c05bf61c] vfs_read+0x2bc/0x390
[   15.640074] [c0006f317dc0] [c05c040c] ksys_read+0x84/0x144
[   15.640081] [c0006f317e10] [c0033358] 
system_call_exception+0x138/0x330
[   15.640088] [c0006f317e50] [c000d05c] 
system_call_vectored_common+0x15c/0x2ec

[   15.640096] --- interrupt: 3000 at 0x7fff87d342e4
[   15.640101] NIP:  7fff87d342e4 LR:  CTR: 

[   15.640106] REGS: c0006f317e80 TRAP: 3000   Not tainted 
(6.8.0-rc3-auto-g99bd3cb0d12e)
[   15.640110] MSR:  8280f033 
  CR: 42884482  XER: 

[   15.640126] IRQMASK: 0
[   15.640126] GPR00: 0003 7fffea617e80 7fff87e47200 
0003
[   15.640126] GPR04: 010009494f20 00010008 7fff87e40e18 
0100094a4f20
[   15.640126] GPR08: 00010008   

[   15.640126] GPR12:  7fff88434ba0  

[   15.640126] GPR16:  00013e082f48 7fffea618290 
7fffea617ee8
[   15.640126] GPR20: 003e 7fffea618108 7fffea618110 
8000
[   15.640126] GPR24:  0002 003e 

[   15.640126] GPR28: 00010007 00010008 010009494f20 
0003

[   15.640185] NIP [7fff87d342e4] 0x7fff87d342e4
[   15.640189] LR [] 0x0
[   15.640193] --- interrupt: 3000
[   15.640196] Code: e8410018 00028048  3c4c0003 38428048 
7c0802a6 6000 7c0802a6 f8010010 f821ffa1 e9230078 7ca32b78 
<892903d8> 2c09 4082002c 3d22

[   15.640217] ---[ end trace  ]---

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [revert commit 9f079dda1433] [mainline] [6.8.0-rc3] [NVME] OOPS kernel crash while booting

2024-02-06 Thread Tasmiya Nalatwad
Thanks Keith for the patch. I have tested the patch without reverting 
old commits. The patch fixes the issue.


Tested-by: Tasmiya Nalatwad 

On 2/6/24 23:22, Keith Busch wrote:

On Tue, Feb 06, 2024 at 10:05:20PM +0530, Tasmiya Nalatwad wrote:

Greetings,

[revert commit 9f079dda1433] [mainline] [6.8.0-rc3] [NVME] OOPS kernel crash
while booting to kernel

Reverting below commit fixes the problem

commit 9f079dda14339ee87d864306a9dc8c6b4e4da40b
     nvme: allow passthru cmd error logging

Thanks for the report. Let's take a shot at fixing it before considering
a revert. I copied you on the patch proposal.


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



[revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

2024-02-28 Thread Tasmiya Nalatwad
/include/linux/dpll.h: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

  typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
 ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro 
‘__rcu_dereference_check’

  __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
  ^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro 
‘rcu_dereference_check’

  rcu_dereference_check(p, lockdep_rtnl_is_held())
  ^
./include/linux/dpll.h:175:9: note: in expansion of macro 
‘rcu_dereference_rtnl’

  return rcu_dereference_rtnl(dev->dpll_pin);
 ^~~~
In file included from net/core/rtnetlink.c:60:
./include/linux/dpll.h:179:1: error: control reaches end of non-void 
function [-Werror=return-type]

 }

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [revert 0d60d8df6f49] [netdev/net] [6.8-rc5] Build Failure

2024-02-28 Thread Tasmiya Nalatwad
c:17:
./include/linux/dpll.h: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

  typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
 ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro 
‘__rcu_dereference_check’

  __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
  ^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro 
‘rcu_dereference_check’

  rcu_dereference_check(p, lockdep_rtnl_is_held())
  ^
./include/linux/dpll.h:175:9: note: in expansion of macro 
‘rcu_dereference_rtnl’

  return rcu_dereference_rtnl(dev->dpll_pin);
 ^~~~
In file included from net/core/rtnetlink.c:60:
./include/linux/dpll.h:179:1: error: control reaches end of non-void 
function [-Werror=return-type]
 } 


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



[netdev] Build failure on powerpc

2024-02-28 Thread Tasmiya Nalatwad
update.h:439:9: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

  typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
 ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro 
‘__rcu_dereference_check’

  __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
  ^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro 
‘rcu_dereference_check’

  rcu_dereference_check(p, lockdep_rtnl_is_held())
  ^
./include/linux/dpll.h:175:9: note: in expansion of macro 
‘rcu_dereference_rtnl’

  return rcu_dereference_rtnl(dev->dpll_pin);
 ^~~~
In file included from net/core/rtnetlink.c:60:
./include/linux/dpll.h:179:1: error: control reaches end of non-void 
function [-Werror=return-type]

 }


Reverting the below commit fixes the problem

commit 0d60d8df6f493bb46bf5db40d39dd60a1bafdd4e
    dpll: rely on rcu for netdev_dpll_pin()

--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

2024-02-29 Thread Tasmiya Nalatwad

Greetings,

I have tried the patch provided below. Moving struct to file 
"net/core/rtnetlink.c" is not resolving the problem. Please find the 
below traces.


--- Traces ---

In file included from ./include/linux/rbtree.h:24,
 from ./include/linux/mm_types.h:11,
 from ./include/linux/mmzone.h:22,
 from ./include/linux/gfp.h:7,
 from ./include/linux/umh.h:4,
 from ./include/linux/kmod.h:9,
 from ./include/linux/module.h:17,
 from net/core/rtnetlink.c:17:
net/core/rtnetlink.c: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

  typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
 ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro 
‘__rcu_dereference_check’

  __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
  ^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro 
‘rcu_dereference_check’

  rcu_dereference_check(p, lockdep_rtnl_is_held())
  ^
net/core/rtnetlink.c:1059:15: note: in expansion of macro 
‘rcu_dereference_rtnl’

    return rcu_dereference_rtnl(dev->dpll_pin);
   ^~~~
  CC  crypto/algboss.o
net/core/rtnetlink.c:1063:1: error: control reaches end of non-void 
function [-Werror=return-type]

 }
 ^
  CC  crypto/authenc.o
  CC  crypto/authencesn.o
  CC  crypto/af_alg.o
  CC  crypto/algif_hash.o
  CC  crypto/algif_skcipher.o
  CC  crypto/algif_rng.o
  CC  crypto/algif_aead.o
  AR  arch/powerpc/kernel/built-in.a
cc1: some warnings being treated as errors
make[4]: *** [scripts/Makefile.build:243: net/core/rtnetlink.o] Error 1
make[4]: *** Waiting for unfinished jobs
  CC  lib/kobject_uevent.o
  AR  drivers/net/mdio/built-in.a
  AR  net/802/built-in.a
  AR  drivers/connector/built-in.a
  CC  lib/vsprintf.o
  AR  ipc/built-in.a
  AR  net/nsh/built-in.a
  CC  lib/dynamic_debug.o
In file included from ./arch/powerpc/include/generated/asm/rwonce.h:1,
 from ./include/linux/compiler.h:251,
 from ./include/linux/instrumented.h:10,
 from ./include/linux/uaccess.h:6,
 from net/core/dev.c:71:
net/core/dev.c: In function ‘netdev_dpll_pin_assign’:
./include/linux/rcupdate.h:462:36: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

 #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
    ^~~~
./include/asm-generic/rwonce.h:55:33: note: in definition of macro 
‘__WRITE_ONCE’

  *(volatile typeof(x) *)&(x) = (val);    \
 ^~~
./arch/powerpc/include/asm/barrier.h:76:2: note: in expansion of macro 
‘WRITE_ONCE’

  WRITE_ONCE(*p, v);  \
  ^~
./include/asm-generic/barrier.h:172:55: note: in expansion of macro 
‘__smp_store_release’
 #define smp_store_release(p, v) do { kcsan_release(); 
__smp_store_release(p, v); } while (0)

^~~
./include/linux/rcupdate.h:503:3: note: in expansion of macro 
‘smp_store_release’

   smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
   ^
./include/linux/rcupdate.h:503:25: note: in expansion of macro 
‘RCU_INITIALIZER’

   smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
 ^~~
net/core/dev.c:9081:2: note: in expansion of macro ‘rcu_assign_pointer’
  rcu_assign_pointer(dev->dpll_pin, dpll_pin);
  ^~

On 2/28/24 20:13, Eric Dumazet wrote:

On Wed, Feb 28, 2024 at 3:07 PM Vadim Fedorenko
 wrote:

On 28/02/2024 11:09, Tasmiya Nalatwad wrote:

Greetings,

[revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

Reverting below commit fixes the issue

commit 0d60d8df6f493bb46bf5db40d39dd60a1bafdd4e
  dpll: rely on rcu for netdev_dpll_pin()

--- Traces ---

./include/linux/dpll.h: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to
incomplete type ‘struct dpll_pin’
typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
   ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro
‘__rcu_dereference_check’
__rcu_dereference_check((p), __UNIQUE_ID(rcu), \
^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro
‘rcu_dereference_check’
rcu_dereference_check(p, lockdep_rtnl_is_held())
^
./include/linux/dpll.h:175:9: note: in expansion of macro
‘rcu_dereference_rtnl’
return rcu_dereference_rtnl(dev->dpll_pin);
   ^~~~
make[4]: *** [scripts/Makefile.build:243: drivers/dpll/dpll_core.o] Error 1
make[4]: *** Waiting for unfinished jobs
AR  net/mpls/built-in.a
AR  net/l3mdev/built-in.a
In file included from ./include/linux/rbtree.h:24,

Re: [revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

2024-02-29 Thread Tasmiya Nalatwad

Latest changes suggested below works, and the issue is not seen.

Tested-by: Tasmiya Nalatwad 

On 2/29/24 14:25, Eric Dumazet wrote:

On Thu, Feb 29, 2024 at 9:04 AM Tasmiya Nalatwad
 wrote:

Greetings,

I have tried the patch provided below. Moving struct to file
"net/core/rtnetlink.c" is not resolving the problem. Please find the
below traces.

--- Traces ---

In file included from ./include/linux/rbtree.h:24,
   from ./include/linux/mm_types.h:11,
   from ./include/linux/mmzone.h:22,
   from ./include/linux/gfp.h:7,
   from ./include/linux/umh.h:4,
   from ./include/linux/kmod.h:9,
   from ./include/linux/module.h:17,
   from net/core/rtnetlink.c:17:
net/core/rtnetlink.c: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to
incomplete type ‘struct dpll_pin’
typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
   ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro
‘__rcu_dereference_check’
__rcu_dereference_check((p), __UNIQUE_ID(rcu), \
^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro
‘rcu_dereference_check’
rcu_dereference_check(p, lockdep_rtnl_is_held())
^
net/core/rtnetlink.c:1059:15: note: in expansion of macro
‘rcu_dereference_rtnl’
  return rcu_dereference_rtnl(dev->dpll_pin);
 ^~~~
CC  crypto/algboss.o
net/core/rtnetlink.c:1063:1: error: control reaches end of non-void
function [-Werror=return-type]
   }
   ^
CC  crypto/authenc.o
CC  crypto/authencesn.o
CC  crypto/af_alg.o
CC  crypto/algif_hash.o
CC  crypto/algif_skcipher.o
CC  crypto/algif_rng.o
CC  crypto/algif_aead.o
AR  arch/powerpc/kernel/built-in.a
cc1: some warnings being treated as errors
make[4]: *** [scripts/Makefile.build:243: net/core/rtnetlink.o] Error 1
make[4]: *** Waiting for unfinished jobs
CC  lib/kobject_uevent.o
AR  drivers/net/mdio/built-in.a
AR  net/802/built-in.a
AR  drivers/connector/built-in.a
CC  lib/vsprintf.o
AR  ipc/built-in.a
AR  net/nsh/built-in.a
CC  lib/dynamic_debug.o
In file included from ./arch/powerpc/include/generated/asm/rwonce.h:1,
   from ./include/linux/compiler.h:251,
   from ./include/linux/instrumented.h:10,
   from ./include/linux/uaccess.h:6,
   from net/core/dev.c:71:
net/core/dev.c: In function ‘netdev_dpll_pin_assign’:
./include/linux/rcupdate.h:462:36: error: dereferencing pointer to
incomplete type ‘struct dpll_pin’
   #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
  ^~~~
./include/asm-generic/rwonce.h:55:33: note: in definition of macro
‘__WRITE_ONCE’
*(volatile typeof(x) *)&(x) = (val);\
   ^~~
./arch/powerpc/include/asm/barrier.h:76:2: note: in expansion of macro
‘WRITE_ONCE’
WRITE_ONCE(*p, v);  \
^~
./include/asm-generic/barrier.h:172:55: note: in expansion of macro
‘__smp_store_release’
   #define smp_store_release(p, v) do { kcsan_release();
__smp_store_release(p, v); } while (0)
^~~
./include/linux/rcupdate.h:503:3: note: in expansion of macro
‘smp_store_release’
 smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
 ^
./include/linux/rcupdate.h:503:25: note: in expansion of macro
‘RCU_INITIALIZER’
 smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
   ^~~
net/core/dev.c:9081:2: note: in expansion of macro ‘rcu_assign_pointer’
rcu_assign_pointer(dev->dpll_pin, dpll_pin);
^~

On 2/28/24 20:13, Eric Dumazet wrote:

On Wed, Feb 28, 2024 at 3:07 PM Vadim Fedorenko
 wrote:

On 28/02/2024 11:09, Tasmiya Nalatwad wrote:

Greetings,

[revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

Reverting below commit fixes the issue

commit 0d60d8df6f493bb46bf5db40d39dd60a1bafdd4e
   dpll: rely on rcu for netdev_dpll_pin()

--- Traces ---

./include/linux/dpll.h: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to
incomplete type ‘struct dpll_pin’
 typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
^
./include/linux/rcupdate.h:587:2: note: in expansion of macro
‘__rcu_dereference_check’
 __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
 ^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro
‘rcu_dereference_check’
 rcu_dereference_check(p, lockdep_rtnl_is_held())
 ^
./include/linux/dpll.h:175:9: note: in expansion of macro
‘rcu_dereference_rtnl’
 return rcu_dereference_rtnl(dev->dpll_pin);

Re: [revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

2024-02-29 Thread Tasmiya Nalatwad

Greetings,

This is tied to gcc version. Build failure is seen while using gcc-8.5.x

On 2/29/24 20:17, Jakub Kicinski wrote:

On Thu, 29 Feb 2024 09:55:22 +0100 Eric Dumazet wrote:

I do not see other solution than this, otherwise we have to add more
pollution to include/linux/netdevice.h

Right :(


diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 
a9c973b92294bb110cf3cd336485972127b01b58..40797ea80bc6273cae6b7773d0a3e47459a72150
100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2469,7 +2469,7 @@ struct net_device {
 struct devlink_port *devlink_port;

  #if IS_ENABLED(CONFIG_DPLL)
-   struct dpll_pin __rcu   *dpll_pin;
+   void __rcu *dpll_pin;
  #endif

If DPLL wants to hide its type definitions the helpers must live
in dpll? IOW move netdev_dpll_pin() to drivers/dpll/dpll_core.c

BTW Tasmiya, please do tell us what compiler you're using.


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center



Re: [revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

2024-03-04 Thread Tasmiya Nalatwad

Greetings,

The build error is seen on linux-next and mainline also

Mainline : 6.8.0-rc7

linux-next : 6.8.0-rc7-next-20240304

--- Traces ---

net/core/dev.c: In function ‘netdev_dpll_pin_assign’:
./include/linux/rcupdate.h:462:36: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

 #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
    ^~~~
./include/asm-generic/rwonce.h:55:33: note: in definition of macro 
‘__WRITE_ONCE’

  *(volatile typeof(x) *)&(x) = (val);    \
 ^~~
./arch/powerpc/include/asm/barrier.h:76:2: note: in expansion of macro 
‘WRITE_ONCE’

  WRITE_ONCE(*p, v);  \
  ^~
./include/asm-generic/barrier.h:172:55: note: in expansion of macro 
‘__smp_store_release’
 #define smp_store_release(p, v) do { kcsan_release(); 
__smp_store_release(p, v); } while (0)

^~~
./include/linux/rcupdate.h:503:3: note: in expansion of macro 
‘smp_store_release’

   smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
   ^
./include/linux/rcupdate.h:503:25: note: in expansion of macro 
‘RCU_INITIALIZER’

   smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
 ^~~
net/core/dev.c:9081:2: note: in expansion of macro ‘rcu_assign_pointer’
  rcu_assign_pointer(dev->dpll_pin, dpll_pin);

On 2/28/24 19:37, Vadim Fedorenko wrote:

On 28/02/2024 11:09, Tasmiya Nalatwad wrote:

Greetings,

[revert 0d60d8df6f49] [net/net-next] [6.8-rc5] Build Failure

Reverting below commit fixes the issue

commit 0d60d8df6f493bb46bf5db40d39dd60a1bafdd4e
 dpll: rely on rcu for netdev_dpll_pin()

--- Traces ---

./include/linux/dpll.h: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

   typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
  ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro 
‘__rcu_dereference_check’

   __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
   ^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro 
‘rcu_dereference_check’

   rcu_dereference_check(p, lockdep_rtnl_is_held())
   ^
./include/linux/dpll.h:175:9: note: in expansion of macro 
‘rcu_dereference_rtnl’

   return rcu_dereference_rtnl(dev->dpll_pin);
  ^~~~
make[4]: *** [scripts/Makefile.build:243: drivers/dpll/dpll_core.o] 
Error 1

make[4]: *** Waiting for unfinished jobs
   AR  net/mpls/built-in.a
   AR  net/l3mdev/built-in.a
In file included from ./include/linux/rbtree.h:24,
  from ./include/linux/mm_types.h:11,
  from ./include/linux/mmzone.h:22,
  from ./include/linux/gfp.h:7,
  from ./include/linux/umh.h:4,
  from ./include/linux/kmod.h:9,
  from ./include/linux/module.h:17,
  from drivers/dpll/dpll_netlink.c:9:
./include/linux/dpll.h: In function ‘netdev_dpll_pin’:
./include/linux/rcupdate.h:439:9: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

   typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
  ^
./include/linux/rcupdate.h:587:2: note: in expansion of macro 
‘__rcu_dereference_check’

   __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
   ^~~
./include/linux/rtnetlink.h:70:2: note: in expansion of macro 
‘rcu_dereference_check’

   rcu_dereference_check(p, lockdep_rtnl_is_held())
   ^
./include/linux/dpll.h:175:9: note: in expansion of macro 
‘rcu_dereference_rtnl’

   return rcu_dereference_rtnl(dev->dpll_pin);
  ^~~~
make[4]: *** [scripts/Makefile.build:243: 
drivers/dpll/dpll_netlink.o] Error 1

make[3]: *** [scripts/Makefile.build:481: drivers/dpll] Error 2
make[3]: *** Waiting for unfinished jobs
In file included from ./arch/powerpc/include/generated/asm/rwonce.h:1,
  from ./include/linux/compiler.h:251,
  from ./include/linux/instrumented.h:10,
  from ./include/linux/uaccess.h:6,
  from net/core/dev.c:71:
net/core/dev.c: In function ‘netdev_dpll_pin_assign’:
./include/linux/rcupdate.h:462:36: error: dereferencing pointer to 
incomplete type ‘struct dpll_pin’

  #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
 ^~~~
./include/asm-generic/rwonce.h:55:33: note: in definition of macro 
‘__WRITE_ONCE’

   *(volatile typeof(x) *)&(x) = (val);    \
  ^~~
./arch/powerpc/include/asm/barrier.h:76:2: note: in expansion of 
macro ‘WRITE_ONCE’

   WRITE_ONCE(*p, v);  \
   ^~
./include/asm-generic/barrier.h:172:55: note: in expansion of macro 
‘__smp_store_release’
  #define smp_store_release(p, v) do { kcsan_release(); 
__smp_store_release(p, v); } while (0)

^~

[linux-next] [FC/EXT4] [PPC] WARNING: CPU: 33 PID: 47869 at block/blk-lib.c:50 __blkdev_issue_discard+0x250/0x280

2022-05-10 Thread Tasmiya Nalatwad

Greetings,

linux-next kernel 5.18.0-rc5-next-20220506 WARN_ON is triggered while 
running stress test on FC disk created with the EXT4 filesystem.


Console Logs :

 md127: detected capacity change from 0 to 62879744
 EXT4-fs (dm-11): mounted filesystem with ordered data mode. Quota 
mode: none.

 md127: detected capacity change from 62879744 to 0
 md: md127 stopped.
 EXT4-fs (dm-11): mounted filesystem with ordered data mode. Quota 
mode: none.

 md127: detected capacity change from 0 to 62879744
 WARNING: CPU: 33 PID: 47869 at block/blk-lib.c:50 
__blkdev_issue_discard+0x250/0x280
 Modules linked in: raid0 rpadlpar_io rpaphp nfnetlink tcp_diag 
udp_diag inet_diag unix_diag af_packet_diag netlink_diag bonding rfkill 
sunrpc pseries_rng xts vmx_crypto gf128mul sch_fq_codel binfmt_misc 
ip_tables ext4 mbcache jbd2 dm_round_robin sd_mod t10_pi crc64_rocksoft 
crc64 sg ibmvfc scsi_transport_fc ibmveth dm_multipath dm_mirror 
dm_region_hash dm_log dm_mod fuse
 CPU: 33 PID: 47869 Comm: mkfs.ext4 Kdump: loaded Not tainted 
5.18.0-rc5-next-20220506-autotest #1

 NIP:  c064beb0 LR: c064bf40 CTR: 
 REGS: c000a7a2f870 TRAP: 0700   Not tainted 
(5.18.0-rc5-next-20220506-autotest)

 MSR:  80029033   CR: 28002282  XER: 
 CFAR: c064bd24 IRQMASK: 0
 GPR00: c064bf40 c000a7a2fb10 c28cbd00 
c000ed96
 GPR04:  8000 0cc0 
c000a7a2fc28
 GPR08:  c000c70a  

 GPR12: 2000 c018ff95ee80  

 GPR16:   0077ef00 
00014e1e56a0
 GPR20: 8000 7fff988b0588 7213dfe8 

 GPR24: 0003  0100 
c000ed96
 GPR28: 0100  c000ed96 
c000ed96

 NIP [c064beb0] __blkdev_issue_discard+0x250/0x280
 LR [c064bf40] blkdev_issue_discard+0x60/0xe0
 Call Trace:
 [c000a7a2fb10] [c000a7a2fb60] 0xc000a7a2fb60 (unreliable)
 [c000a7a2fbe0] [c064bf40] blkdev_issue_discard+0x60/0xe0
 [c000a7a2fc70] [c065e840] blkdev_common_ioctl+0x1b0/0xbf0
 [c000a7a2fd00] [c065f6a8] blkdev_ioctl+0x428/0x6e0
 [c000a7a2fd60] [c04857c8] sys_ioctl+0xf8/0x150
 [c000a7a2fdb0] [c002f468] system_call_exception+0x178/0x380
 [c000a7a2fe10] [c000c64c] system_call_common+0xec/0x250
 --- interrupt: c00 at 0x7fff98524480
 NIP:  7fff98524480 LR: 7fff98867828 CTR: 
 REGS: c000a7a2fe80 TRAP: 0c00   Not tainted 
(5.18.0-rc5-next-20220506-autotest)
 MSR:  8280f033   CR: 
24002288  XER: 

 IRQMASK: 0
 GPR00: 0036 7213dec0 7fff98617100 
0003
 GPR04: 20001277 7213df48 0100 
1000
 GPR08: 0003   

 GPR12:  7fff9895ce40  

 GPR16:   0077ef00 
00014e1e56a0
 GPR20: 8000 7fff988b0588 7213dfe8 

 GPR24: 00011b2a0890 00011b2a08a0 00011b2a0880 
00011b2a00d8
 GPR28:  7fff988b0590 00011b2a00e0 
7213e0a0

 NIP [7fff98524480] 0x7fff98524480
 LR [7fff98867828] 0x7fff98867828
 --- interrupt: c00
 Instruction dump:
 6000 2fa3 419e0018 3c62fe6e 38810068 38630da0 4bb95b21 6000
 3b20ffa1 4b60 6000 6000 <0fe0> 7c0802a6 fb010090 fb4100a0
 ---[ end trace  ]---


--
Regards,
Tasmiya Nalatwad
IBM Linux Technology Center