Dear tech, I have a machine with some kind of hardware defect. Smartctl shows that one SSD regularly has an unexpected power loss (Attribute 174):
174 Unknown_Attribute 0x0032 100 100 000 Old_age Always - 284. On the console I see the following output: root on sd0a (b75e60dd651bd99e.a) swap on sd0b dump on sd0b ahci0: device didn't come ready after reset, TFD: 0x441<ERR> ahci0: unrecoverable errors (IS: 10<UFS>), disabling port. ahci0: device didn't come ready after reset, TFD: 0x1d0<BSY> ahci0: stopping the port, softreset slot 31 was still active. ahci0: device didn't come ready after reset, TFD: 0x441<ERR> ahci0: device didn't come ready after reset, TFD: 0x441<ERR> ahci0: stopping the port, softreset slot 31 was still active. ahci0: device didn't come ready after reset, TFD: 0x441<ERR> ahci0: stopping the port, softreset slot 31 was still active. ahci0: stopping the port, softreset slot 31 was still active. ahci0: stopping the port, softreset slot 31 was still active. ahci0: stopping the port, softreset slot 31 was still active. ahci0: stopping the port, softreset slot 31 was still active. ahci0: stopping the port, softreset slot 31 was still active. ahci0: device didn't come ready after reset, TFD: 0x441<ERR> ahci0: device didn't come ready after reset, TFD: 0x441<ERR> mode = 0100644, inum = 7731315, fs = /var panic: ffs_valloc: dup alloc Stopped at db_enter+0x10: popq %rbp TID PID UID PRFLAGS PFLAGS CPU COMMAND 175271 46390 1001 0x2 0x4000000 0 rustc 345274 46390 1001 0x2 0x4000000 1 rustc 224769 52321 1001 0x2 0x4000000 5 rustc *414814 59107 1001 0x1002 0x4080000 2K rustc 87951 59107 1001 0x1002 0x4080000 4 rustc db_enter() at db_enter+0x10 panic(ffffffff81f194f6) at panic+0xbf ffs_inode_alloc(fffffd87b0b8c1e0,81a4,fffffd8fc6cfc5d0,ffff800020a7a948) at ffs _inode_alloc+0x42e ufs_makeinode(81a4,fffffd87ab540438,ffff800020a7ac40,ffff800020a7ac70) at ufs_m akeinode+0x79 ufs_create(ffff800020a7a9f8) at ufs_create+0x3c VOP_CREATE(fffffd87ab540438,ffff800020a7ac40,ffff800020a7ac70,ffff800020a7aa50) at VOP_CREATE+0x3f vn_open(ffff800020a7ac10,10602,1a4) at vn_open+0x162 doopenat(ffff800020879cf0,ffffff9c,246e5646200,10601,1b6,ffff800020a7adf0) at d oopenat+0x1cd syscall(ffff800020a7ae60) at syscall+0x35f Xsyscall() at Xsyscall+0x128 end of kernel ffs_inode_alloc checks the mode of the newly allocated inode and if it is not 0 it panics. I suspect that due to the ssds softreset that IO operations take longer and cause trouble due to missing locking. For example in ffs_nodealloccg it is possible for an FFS2 file system to sleep due to a getblk. During the sleep, another call to ffs_nodealloccg might happen with the same ipref. The function first checks if there is space at the prefered location, then sleeps and then reserves it. This is a TOCTOU bug. FreeBSD fixed this in 2013: https://github.com/freebsd/freebsd-src/commit/94f4ac214c403b99cbc44dd6e9cdf4db2cc9b297 Below is a diff that addresses this with a exclusive lock using RW_SLEEPFAIL. Index: ufs/ffs/ffs_alloc.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_alloc.c,v retrieving revision 1.114 diff -u -p -r1.114 ffs_alloc.c --- ufs/ffs/ffs_alloc.c 11 Mar 2021 13:31:35 -0000 1.114 +++ ufs/ffs/ffs_alloc.c 10 Mar 2023 15:44:52 -0000 @@ -46,6 +46,7 @@ #include <sys/buf.h> #include <sys/vnode.h> #include <sys/mount.h> +#include <sys/rwlock.h> #include <sys/syslog.h> #include <sys/stdint.h> #include <sys/time.h> @@ -1089,6 +1090,7 @@ gotit: } /* inode allocation routine */ +struct rwlock ffs_node_lock = RWLOCK_INITIALIZER("indallck"); daddr_t ffs_nodealloccg(struct inode *ip, u_int cg, daddr_t ipref, int mode) { @@ -1115,6 +1117,9 @@ ffs_nodealloccg(struct inode *ip, u_int return (0); cgp = (struct cg *)bp->b_data; +#ifdef FFS2 + check_nifree: +#endif if (cgp->cg_cs.cs_nifree == 0) { brelse(bp); return (0); @@ -1201,9 +1206,12 @@ gotit: /* Has any inode not been used at least once? */ cgp->cg_initediblk < cgp->cg_ffs2_niblk) { + if (rw_enter(&ffs_node_lock, RW_WRITE | RW_SLEEPFAIL)) + goto check_nifree; ibp = getblk(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), (int)fs->fs_bsize, 0, INFSLP); + rw_exit(&ffs_node_lock); memset(ibp->b_data, 0, fs->fs_bsize); dp2 = (struct ufs2_dinode *)(ibp->b_data);