Hello!

I have found reproducible way how to trigger kernel trap that appears both under OpenBSD 7.3-RELEASE as guest (VM under KVM) and even on bare-metal machine with IDE CD - so it is not Hypervisor bug.

To trigger kernel trap, these conditions have to be met:

1. kernel 7.3-RELEASE uses i8254 timer (which often happens with bsd.rd
   from install73.iso which has limited clock alternatives)
2. some process is reading lot of data from IDE CD (for example
   install sets from install73.iso)
3. the less memory the sooner will that trap occurs (for 2GB RAM in few
   seconds, for 8GB RAM it may take several minutes and timeouts)

When you start reading for example all install sets
from mounted IDE CD-ROM:

cat /mnt/cdrom/7.3/amd64/*.tgz > /dev/null

Soon one or more timeouts occurs:

wdc_atapi_start: not ready, st = 50

Followed by this trap:

uvm_fault(0xfffffd807f55e2d0, 0x37, 0, 2) -> e
kernel: page fault trap, code=0
Stopped at      wdc_free_xfer+0xe3:     movq    %rax,0(%rcx)
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
*477705  68753      0    0x100003          0    0  cat

# NOTE: folded lines on "at":

wdc_free_xfer(ffff8000000b6710,fffffd802e5d1dd8)
 at wdc_free_xfer+0xe3
wdc_atapi_the_machine(ffff8000000b6710,fffffd802e5d1dd8,2)
 at wdc_atapi_the_mac
hine+0x223
wdc_atapi_intr(ffff8000000b6710,fffffd802e5d1dd8,1)
 at wdc_atapi_intr+0x5a
wdcintr(ffff8000000b6710)
 at wdcintr+0xbd
intr_handler(ffff8000215796b0,ffff800000097980)
 at intr_handler+0x38
Xintr_ioapic_edge15_untramp()
 at Xintr_ioapic_edge15_untramp+0x18f
Xspllower()
 at Xspllower+0x19
breadn(fffffd80434ac2a0,125,800,ffff8000007f6000,ffff8000007f6100,20,1261409224a6f0a6)
 at breadn+0x15b
cd9660_read(ffff800021579a08)
 at cd9660_read+0x189
VOP_READ(fffffd80434ac2a0,ffff800021579b68,0,fffffd807f7bda28)
 at VOP_READ+0x41
vn_read(fffffd80676b53d0,ffff800021579b68,0)
 at vn_read+0xa1
dofilereadv(ffff800021505060,3,ffff800021579b68,0,ffff800021579c30)
 at dofilereadv+0x146
sys_read(ffff800021505060,ffff800021579be0,ffff800021579c30)
 at sys_read+0x51
syscall(ffff800021579ca0)
 at syscall+0x354
end trace frame: 0xffff800021579d20, count: 0
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb> show registers
rdi               0xffff8000000b6710
rsi               0xfffffd802e5d1dd8
rbp               0xffff800021579570
rbx                             0x31
rdx               0xffffffff82466528    wdc_xfer_pool+0x48
rcx                             0x37
rax               0xffffffffffffffff
r8                0xffffffff82508020    cleancache+0x20
r9                0xffffffff82508040    cleancache+0x40
r10               0x72c2ddd1139ca6df
r11               0xa89ec081eb37a8a3
r12                              0xa
r13               0xffff800021579580
r14               0xfffffd802e5d1dd8
r15               0xffff8000000b6710
rip               0xffffffff811a1bc3    wdc_free_xfer+0xe3
cs                               0x8
rflags                       0x10213    __ALIGN_SIZE+0xf213
rsp               0xffff800021579540
ss                              0x10
wdc_free_xfer+0xe3:     movq    %rax,0(%rcx)

### NOTICE That %rcx contains 0x37 which is not valid
### pointer(!)

Mapping source code using objdump on wdc.o:


00000000000033c0 <wdc_free_xfer>:
wdc_free_xfer():
/usr/src/sys/dev/ic/wdc.c:1968

printf '%x\n' $((0x33c0 + 0xe3))

34a3

00000000000033c0 <wdc_free_xfer>:
wdc_free_xfer():
/usr/src/sys/dev/ic/wdc.c:1968
33c0: 4c 8b 1d 00 00 00 00 mov 0(%rip),%r11 # 33c7 <wdc_free_xfer+0x7>
                        33c3: R_X86_64_PC32     
__retguard_246+0xfffffffffffffffc
    33c7:       4c 33 1c 24             xor    (%rsp),%r11
    33cb:       55                      push   %rbp
    33cc:       48 89 e5                mov    %rsp,%rbp
    33cf:       57                      push   %rdi
    33d0:       56                      push   %rsi
    33d1:       41 53                   push   %r11
    33d3:       41 57                   push   %r15
    33d5:       41 56                   push   %r14
    33d7:       50                      push   %rax
    33d8:       49 89 f6                mov    %rsi,%r14
    33db:       49 89 ff                mov    %rdi,%r15
/usr/src/sys/dev/ic/wdc.c:1972
    33de:       f7 06 00 04 00 00       testl  $0x400,(%rsi)
    33e4:       75 28                   jne    340e <wdc_free_xfer+0x4e>
/usr/src/sys/dev/ic/wdc.c:1978
    33e6:       bf 06 00 00 00          mov    $0x6,%edi
    33eb:       e8 00 00 00 00          callq  33f0 <wdc_free_xfer+0x30>
                        33ec: R_X86_64_PLT32    splraise+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1979
    33f0:       41 80 67 58 fe          andb   $0xfe,0x58(%r15)
/usr/src/sys/dev/ic/wdc.c:1980
    33f5:       49 8b 56 30             mov    0x30(%r14),%rdx
    33f9:       49 8b 4e 38             mov    0x38(%r14),%rcx
    33fd:       48 85 d2                test   %rdx,%rdx
    3400:       74 24                   je     3426 <wdc_free_xfer+0x66>
    3402:       48 87 d0                xchg   %rdx,%rax
    3405:       48 83 c0 38             add    $0x38,%rax
    3409:       48 87 d0                xchg   %rdx,%rax
    340c:       eb 29                   jmp    3437 <wdc_free_xfer+0x77>
/usr/src/sys/dev/ic/wdc.c:1973
    340e:       41 80 67 58 fe          andb   $0xfe,0x58(%r15)
/usr/src/sys/dev/ic/wdc.c:1974
    3413:       49 8b 4e 30             mov    0x30(%r14),%rcx
    3417:       49 8b 46 38             mov    0x38(%r14),%rax
    341b:       48 85 c9                test   %rcx,%rcx
    341e:       74 78                   je     3498 <wdc_free_xfer+0xd8>
    3420:       48 83 c1 38             add    $0x38,%rcx
    3424:       eb 7d                   jmp    34a3 <wdc_free_xfer+0xe3>
/usr/src/sys/dev/ic/wdc.c:1980
    3426:       49 8b 97 e0 04 00 00    mov    0x4e0(%r15),%rdx
    342d:       48 87 d0                xchg   %rdx,%rax
    3430:       48 83 c0 08             add    $0x8,%rax
    3434:       48 87 d0                xchg   %rdx,%rax
    3437:       48 89 0a                mov    %rcx,(%rdx)
    343a:       49 8b 4e 30             mov    0x30(%r14),%rcx
    343e:       49 8b 56 38             mov    0x38(%r14),%rdx
    3442:       48 89 0a                mov    %rcx,(%rdx)
    3445:       49 c7 46 38 ff ff ff    movq   $0xffffffffffffffff,0x38(%r14)
    344c:       ff
    344d:       49 c7 46 30 ff ff ff    movq   $0xffffffffffffffff,0x30(%r14)
    3454:       ff
/usr/src/sys/dev/ic/wdc.c:1981
    3455:       41 f7 06 00 08 00 00    testl  $0x800,(%r14)
    345c:       75 21                   jne    347f <wdc_free_xfer+0xbf>
/usr/src/sys/dev/ic/wdc.c:1985
    345e:       89 c7                   mov    %eax,%edi
    3460:       e8 00 00 00 00          callq  3465 <wdc_free_xfer+0xa5>
                        3461: R_X86_64_PLT32    spllower+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1988
    3465:       48 c7 c7 00 00 00 00    mov    $0x0,%rdi
                        3468: R_X86_64_32S      wdc_xfer_iopool
    346c:       4c 89 f6                mov    %r14,%rsi
    346f:       48 83 c4 08             add    $0x8,%rsp
    3473:       41 5e                   pop    %r14
    3475:       41 5f                   pop    %r15
    3477:       41 5b                   pop    %r11
    3479:       c9                      leaveq
    347a:       e9 00 00 00 00          jmpq   347f <wdc_free_xfer+0xbf>
                        347b: R_X86_64_PLT32    scsi_io_put+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1982
    347f:       41 81 26 ff f7 ff ff    andl   $0xfffff7ff,(%r14)
/usr/src/sys/dev/ic/wdc.c:1985
    3486:       89 c7                   mov    %eax,%edi
    3488:       48 83 c4 08             add    $0x8,%rsp
    348c:       41 5e                   pop    %r14
    348e:       41 5f                   pop    %r15
    3490:       41 5b                   pop    %r11
    3492:       c9                      leaveq
    3493:       e9 00 00 00 00          jmpq   3498 <wdc_free_xfer+0xd8>
                        3494: R_X86_64_PLT32    spllower+0xfffffffffffffffc
/usr/src/sys/dev/ic/wdc.c:1974
# C code at line 1974:
#       TAILQ_REMOVE(&chp->ch_queue->sc_xfer, xfer, c_xferchain);
    3498:       49 8b 8f e0 04 00 00    mov    0x4e0(%r15),%rcx
    349f:       48 83 c1 08             add    $0x8,%rcx
### CRASH on address below:, %rcx = 0x37 (invalid address)
    34a3:       48 89 01                mov    %rax,(%rcx)
    34a6:       49 8b 46 30             mov    0x30(%r14),%rax
    34aa:       49 8b 4e 38             mov    0x38(%r14),%rcx

The dev/ic/wdc.c is standard source from sys.tgz sets from
install73.iso - so lines should match this version (unfortunately I
found now way how to automatically interleave C sourced code with
disassembly using "objdump -dlrS wdc.o"  - it somehow fails
to do that.)


How to reproduce:

1. build kernel where only i8254 timer is available, here is patch for GENERIC

--- /usr/src/sys/arch/amd64/conf/GENERIC        Fri Nov 11 16:27:39 2022
+++ /usr/src/sys/arch/amd64/conf/I8254_PIT      Thu Jun  1 12:24:32 2023
@@ -13,6 +13,10 @@
 include                "../../../conf/GENERIC"
 maxusers       80                      # estimated number of users

+# HP - added
+# already enabled in GENERIC: option DDB
+option DDB_SAFE_CONSOLE
+
 option         USER_PCICONF    # user-space PCI configuration

 option         APERTURE        # in-kernel aperture driver for XFree86
@@ -38,8 +42,8 @@
 pvbus0 at mainbus0

 acpi0          at bios0
-acpitimer*     at acpi?
-acpihpet*      at acpi?
+#acpitimer*    at acpi?
+#acpihpet*     at acpi?
 acpiac*                at acpi?
 acpibat*       at acpi?
 acpibtn*       at acpi?
@@ -92,7 +96,7 @@
 ipmi0  at mainbus? disable     # IPMI

 vmt0   at pvbus?               # VMware Tools
-pvclock0 at pvbus?             # KVM pvclock
+#pvclock0 at pvbus?            # KVM pvclock

 xen0   at pvbus?               # Xen HVM domU
 xnf*   at xen?                 # Xen Netfront

2. Boot this kernel and verify that i8254 timer is used:

sysctl | fgrep timecount
kern.timecounter.tick=1
kern.timecounter.timestepwarnings=0
kern.timecounter.hardware=i8254
kern.timecounter.choice=i8254(0)

3. mount CD from IDE CD-ROM (for example install73.iso)
4. read all of data from mounted cdrom - for example using:

cat /cdrom/7.3/amd64/*.tgz > /dev/null

5. You should quickly see one or more timeout messages followed by
   kernel trap as shown on this e-mail.

This bug is related to
- https://marc.info/?t=168400453600002&r=1&w=2
However trap trace is different - I guess that it is because there are
many differences when building bsd.rd (RAMDISK_CD) kernel with
-DSMALL_KERNEL and other changes while in this example lightly modified
GENERIC kernel is used.

Please let me know if you need any additional details.

Best regards
  --Henryk Paluch

Reply via email to