Hello!
I have found reproducible way how to trigger kernel trap that appears
both under OpenBSD 7.3-RELEASE as guest (VM under KVM) and even on
bare-metal machine with IDE CD - so it is not Hypervisor bug.
To trigger kernel trap, these conditions have to be met:
1. kernel 7.3-RELEASE uses i8254 timer (which often happens with bsd.rd
from install73.iso which has limited clock alternatives)
2. some process is reading lot of data from IDE CD (for example
install sets from install73.iso)
3. the less memory the sooner will that trap occurs (for 2GB RAM in few
seconds, for 8GB RAM it may take several minutes and timeouts)
When you start reading for example all install sets
from mounted IDE CD-ROM:
cat /mnt/cdrom/7.3/amd64/*.tgz > /dev/null
Soon one or more timeouts occurs:
wdc_atapi_start: not ready, st = 50
Followed by this trap:
uvm_fault(0xfffffd807f55e2d0, 0x37, 0, 2) -> e
kernel: page fault trap, code=0
Stopped at wdc_free_xfer+0xe3: movq %rax,0(%rcx)
TID PID UID PRFLAGS PFLAGS CPU COMMAND
*477705 68753 0 0x100003 0 0 cat
# NOTE: folded lines on "at":
wdc_free_xfer(ffff8000000b6710,fffffd802e5d1dd8)
at wdc_free_xfer+0xe3
wdc_atapi_the_machine(ffff8000000b6710,fffffd802e5d1dd8,2)
at wdc_atapi_the_mac
hine+0x223
wdc_atapi_intr(ffff8000000b6710,fffffd802e5d1dd8,1)
at wdc_atapi_intr+0x5a
wdcintr(ffff8000000b6710)
at wdcintr+0xbd
intr_handler(ffff8000215796b0,ffff800000097980)
at intr_handler+0x38
Xintr_ioapic_edge15_untramp()
at Xintr_ioapic_edge15_untramp+0x18f
Xspllower()
at Xspllower+0x19
breadn(fffffd80434ac2a0,125,800,ffff8000007f6000,ffff8000007f6100,20,1261409224a6f0a6)
at breadn+0x15b
cd9660_read(ffff800021579a08)
at cd9660_read+0x189
VOP_READ(fffffd80434ac2a0,ffff800021579b68,0,fffffd807f7bda28)
at VOP_READ+0x41
vn_read(fffffd80676b53d0,ffff800021579b68,0)
at vn_read+0xa1
dofilereadv(ffff800021505060,3,ffff800021579b68,0,ffff800021579c30)
at dofilereadv+0x146
sys_read(ffff800021505060,ffff800021579be0,ffff800021579c30)
at sys_read+0x51
syscall(ffff800021579ca0)
at syscall+0x354
end trace frame: 0xffff800021579d20, count: 0
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb> show registers
rdi 0xffff8000000b6710
rsi 0xfffffd802e5d1dd8
rbp 0xffff800021579570
rbx 0x31
rdx 0xffffffff82466528 wdc_xfer_pool+0x48
rcx 0x37
rax 0xffffffffffffffff
r8 0xffffffff82508020 cleancache+0x20
r9 0xffffffff82508040 cleancache+0x40
r10 0x72c2ddd1139ca6df
r11 0xa89ec081eb37a8a3
r12 0xa
r13 0xffff800021579580
r14 0xfffffd802e5d1dd8
r15 0xffff8000000b6710
rip 0xffffffff811a1bc3 wdc_free_xfer+0xe3
cs 0x8
rflags 0x10213 __ALIGN_SIZE+0xf213
rsp 0xffff800021579540
ss 0x10
wdc_free_xfer+0xe3: movq %rax,0(%rcx)
### NOTICE That %rcx contains 0x37 which is not valid
### pointer(!)
Mapping source code using objdump on wdc.o:
00000000000033c0 <wdc_free_xfer>:
wdc_free_xfer():
/usr/src/sys/dev/ic/wdc.c:1968
printf '%x\n' $((0x33c0 + 0xe3))
34a3
00000000000033c0 <wdc_free_xfer>:
wdc_free_xfer():
/usr/src/sys/dev/ic/wdc.c:1968
33c0: 4c 8b 1d 00 00 00 00 mov 0(%rip),%r11 # 33c7
<wdc_free_xfer+0x7>
33c3: R_X86_64_PC32
__retguard_246+0xfffffffffffffffc
33c7: 4c 33 1c 24 xor (%rsp),%r11
33cb: 55 push %rbp
33cc: 48 89 e5 mov %rsp,%rbp
33cf: 57 push %rdi
33d0: 56 push %rsi
33d1: 41 53 push %r11
33d3: 41 57 push %r15
33d5: 41 56 push %r14
33d7: 50 push %rax
33d8: 49 89 f6 mov %rsi,%r14
33db: 49 89 ff mov %rdi,%r15
/usr/src/sys/dev/ic/wdc.c:1972
33de: f7 06 00 04 00 00 testl $0x400,(%rsi)
33e4: 75 28 jne 340e <wdc_free_xfer+0x4e>
/usr/src/sys/dev/ic/wdc.c:1978
33e6: bf 06 00 00 00 mov $0x6,%edi
33eb: e8 00 00 00 00 callq 33f0 <wdc_free_xfer+0x30>
33ec: R_X86_64_PLT32 splraise+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1979
33f0: 41 80 67 58 fe andb $0xfe,0x58(%r15)
/usr/src/sys/dev/ic/wdc.c:1980
33f5: 49 8b 56 30 mov 0x30(%r14),%rdx
33f9: 49 8b 4e 38 mov 0x38(%r14),%rcx
33fd: 48 85 d2 test %rdx,%rdx
3400: 74 24 je 3426 <wdc_free_xfer+0x66>
3402: 48 87 d0 xchg %rdx,%rax
3405: 48 83 c0 38 add $0x38,%rax
3409: 48 87 d0 xchg %rdx,%rax
340c: eb 29 jmp 3437 <wdc_free_xfer+0x77>
/usr/src/sys/dev/ic/wdc.c:1973
340e: 41 80 67 58 fe andb $0xfe,0x58(%r15)
/usr/src/sys/dev/ic/wdc.c:1974
3413: 49 8b 4e 30 mov 0x30(%r14),%rcx
3417: 49 8b 46 38 mov 0x38(%r14),%rax
341b: 48 85 c9 test %rcx,%rcx
341e: 74 78 je 3498 <wdc_free_xfer+0xd8>
3420: 48 83 c1 38 add $0x38,%rcx
3424: eb 7d jmp 34a3 <wdc_free_xfer+0xe3>
/usr/src/sys/dev/ic/wdc.c:1980
3426: 49 8b 97 e0 04 00 00 mov 0x4e0(%r15),%rdx
342d: 48 87 d0 xchg %rdx,%rax
3430: 48 83 c0 08 add $0x8,%rax
3434: 48 87 d0 xchg %rdx,%rax
3437: 48 89 0a mov %rcx,(%rdx)
343a: 49 8b 4e 30 mov 0x30(%r14),%rcx
343e: 49 8b 56 38 mov 0x38(%r14),%rdx
3442: 48 89 0a mov %rcx,(%rdx)
3445: 49 c7 46 38 ff ff ff movq $0xffffffffffffffff,0x38(%r14)
344c: ff
344d: 49 c7 46 30 ff ff ff movq $0xffffffffffffffff,0x30(%r14)
3454: ff
/usr/src/sys/dev/ic/wdc.c:1981
3455: 41 f7 06 00 08 00 00 testl $0x800,(%r14)
345c: 75 21 jne 347f <wdc_free_xfer+0xbf>
/usr/src/sys/dev/ic/wdc.c:1985
345e: 89 c7 mov %eax,%edi
3460: e8 00 00 00 00 callq 3465 <wdc_free_xfer+0xa5>
3461: R_X86_64_PLT32 spllower+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1988
3465: 48 c7 c7 00 00 00 00 mov $0x0,%rdi
3468: R_X86_64_32S wdc_xfer_iopool
346c: 4c 89 f6 mov %r14,%rsi
346f: 48 83 c4 08 add $0x8,%rsp
3473: 41 5e pop %r14
3475: 41 5f pop %r15
3477: 41 5b pop %r11
3479: c9 leaveq
347a: e9 00 00 00 00 jmpq 347f <wdc_free_xfer+0xbf>
347b: R_X86_64_PLT32 scsi_io_put+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1982
347f: 41 81 26 ff f7 ff ff andl $0xfffff7ff,(%r14)
/usr/src/sys/dev/ic/wdc.c:1985
3486: 89 c7 mov %eax,%edi
3488: 48 83 c4 08 add $0x8,%rsp
348c: 41 5e pop %r14
348e: 41 5f pop %r15
3490: 41 5b pop %r11
3492: c9 leaveq
3493: e9 00 00 00 00 jmpq 3498 <wdc_free_xfer+0xd8>
3494: R_X86_64_PLT32 spllower+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1974
# C code at line 1974:
# TAILQ_REMOVE(&chp->ch_queue->sc_xfer, xfer, c_xferchain);
3498: 49 8b 8f e0 04 00 00 mov 0x4e0(%r15),%rcx
349f: 48 83 c1 08 add $0x8,%rcx
### CRASH on address below:, %rcx = 0x37 (invalid address)
34a3: 48 89 01 mov %rax,(%rcx)
34a6: 49 8b 46 30 mov 0x30(%r14),%rax
34aa: 49 8b 4e 38 mov 0x38(%r14),%rcx
The dev/ic/wdc.c is standard source from sys.tgz sets from
install73.iso - so lines should match this version (unfortunately I
found now way how to automatically interleave C sourced code with
disassembly using "objdump -dlrS wdc.o" - it somehow fails
to do that.)
How to reproduce:
1. build kernel where only i8254 timer is available, here is patch for
GENERIC
--- /usr/src/sys/arch/amd64/conf/GENERIC Fri Nov 11 16:27:39 2022
+++ /usr/src/sys/arch/amd64/conf/I8254_PIT Thu Jun 1 12:24:32 2023
@@ -13,6 +13,10 @@
include "../../../conf/GENERIC"
maxusers 80 # estimated number of users
+# HP - added
+# already enabled in GENERIC: option DDB
+option DDB_SAFE_CONSOLE
+
option USER_PCICONF # user-space PCI configuration
option APERTURE # in-kernel aperture driver for XFree86
@@ -38,8 +42,8 @@
pvbus0 at mainbus0
acpi0 at bios0
-acpitimer* at acpi?
-acpihpet* at acpi?
+#acpitimer* at acpi?
+#acpihpet* at acpi?
acpiac* at acpi?
acpibat* at acpi?
acpibtn* at acpi?
@@ -92,7 +96,7 @@
ipmi0 at mainbus? disable # IPMI
vmt0 at pvbus? # VMware Tools
-pvclock0 at pvbus? # KVM pvclock
+#pvclock0 at pvbus? # KVM pvclock
xen0 at pvbus? # Xen HVM domU
xnf* at xen? # Xen Netfront
2. Boot this kernel and verify that i8254 timer is used:
sysctl | fgrep timecount
kern.timecounter.tick=1
kern.timecounter.timestepwarnings=0
kern.timecounter.hardware=i8254
kern.timecounter.choice=i8254(0)
3. mount CD from IDE CD-ROM (for example install73.iso)
4. read all of data from mounted cdrom - for example using:
cat /cdrom/7.3/amd64/*.tgz > /dev/null
5. You should quickly see one or more timeout messages followed by
kernel trap as shown on this e-mail.
This bug is related to
- https://marc.info/?t=168400453600002&r=1&w=2
However trap trace is different - I guess that it is because there are
many differences when building bsd.rd (RAMDISK_CD) kernel with
-DSMALL_KERNEL and other changes while in this example lightly modified
GENERIC kernel is used.
Please let me know if you need any additional details.
Best regards
--Henryk Paluch