On Sat, Dec 11, 2021 at 09:37:08AM +0000, Visa Hankala wrote:
> On Sat, Dec 11, 2021 at 09:58:54AM +0100, Mark Kettenis wrote:
> > > Date: Sat, 11 Dec 2021 01:13:32 +0100
> > > From: Alexander Bluhm <alexander.bl...@gmx.net>
> 
> > > witness: lock order reversal:
> > >  1st 0xfffffd886921d8d8 vmmaplk (&map->lock)
> > >  2nd 0xffff800022c54130 nfsnode (&np->n_lock)
> > > lock order data w2 -> w1 missing
> > > lock order "&map->lock"(rwlock) -> "&np->n_lock"(rrwlock) first seen at:
> > > #0  rw_enter+0x65
> > > #1  rrw_enter+0x56
> > > #2  VOP_LOCK+0x5b
> > > #3  vn_lock+0xad
> > > #4  vn_rdwr+0x7f
> > > #5  vndstrategy+0x2e6
> > > #6  physio+0x227
> > > #7  spec_write+0x95
> > > #8  VOP_WRITE+0x41
> > > #9  vn_write+0xfc
> > > #10 dofilewritev+0x14d
> > > #11 sys_pwrite+0x5c
> > > #12 syscall+0x374
> > > #13 Xsyscall+0x128
> > 
> > so this is accessing a vnd whichis backed by a file on NFS.  Not
> > terribly surprised that this causes issues.  I think:
> > 
> > > lock order "&map->lock"(rwlock) -> "&np->n_lock"(rrwlock) first seen at:
> > 
> > is the "right" lock order.  Unfortunately data for the "wrong" order
> > is missing...
> 
> Some of these "missing" entries may be caused by transitive lock order
> relations that are not handled by the lock order printer.
> 
> I have somewhere a diff that improves the situation. Let's see if I can
> find it...

Here it is, though I want to tweak it before committing. It has problems
with stack usage because of recursion, and the output is still a mess.


witness: lock order reversal:           
 1st 0xfffffd810891f188 vmmaplk (&map->lock)
 2nd 0xffff8000227cc138 nfsnode (&np->n_lock)
lock order data w2 -> w1 missing          
lock order "&map->lock"(rwlock) -> "&np->n_lock"(rrwlock) first seen at:
#0  rw_enter+0x65                       
#1  rrw_enter+0x56                      
#2  VOP_LOCK+0x5b                       
#3  vn_lock+0xad
#4  vn_rdwr+0x7f
#5  vndstrategy+0x2e6
#6  physio+0x227
#7  spec_write+0x95
#8  VOP_WRITE+0x41
#9  vn_write+0xfc
#10 dofilewritev+0x14d
#11 sys_pwrite+0x5c
#12 syscall+0x3a9
#13 Xsyscall+0x128
lock order (1) &map->lock -> (2) &np->n_lock
#0  rw_enter+0x65
#1  rrw_enter+0x56
#2  VOP_LOCK+0x5b
#3  vn_lock+0xad
#4  vn_rdwr+0x7f
#5  vndstrategy+0x2e6
#6  physio+0x227
#7  spec_write+0x95
#8  VOP_WRITE+0x41
#9  vn_write+0xfc
#10 dofilewritev+0x14d
#11 sys_pwrite+0x5c
#12 syscall+0x3a9
#13 Xsyscall+0x128
lock order (2) &np->n_lock -> (3) netlock
#0  rw_enter_write+0x43
#1  solock+0x4b
#2  sosend+0x136
#3  nfs_send+0x89
#4  nfs_request+0x270
#5  nfs_getattr+0xc8 
#6  VOP_GETATTR+0x51 
#7  mountnfs+0x379
#8  nfs_mount+0x121
#9  sys_mount+0x364
#10 syscall+0x3a9
#11 Xsyscall+0x128
lock order (3) netlock -> (1) &map->lock
#0  rw_enter_read+0x38
#1  uvmfault_lookup+0x8a
#2  uvm_fault_check+0x32
#3  uvm_fault+0xfc
#4  kpageflttrap+0x12b
#5  kerntrap+0x91
#6  alltraps_kern_meltdown+0x7b
#7  copyin+0x5e
#8  sysctl_bounded_arr+0x83
#9  tcp_sysctl+0x387 
#10 sys_sysctl+0x184 
#11 syscall+0x3a9
#12 Xsyscall+0x128


diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index ad50f29e066..b4e43d8a1b7 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -369,6 +369,8 @@ static struct witness_lock_order_data       
*witness_lock_order_get(
                                            struct witness *child);
 static void    witness_list_lock(struct lock_instance *instance,
                    int (*prnt)(const char *fmt, ...));
+static void    witness_print_cycle(struct witness *parent,
+                   struct witness *child);
 static void    witness_setflag(struct lock_object *lock, int flag, int set);
 
 /*
@@ -1101,6 +1107,8 @@ witness_checkorder(struct lock_object *lock, int flags,
                                        printf("lock order data "
                                            "w1 -> w2 missing\n");
                                }
+
+                               witness_print_cycle(w1, w);
                        }
                        witness_debugger(0);
                        goto out_splx;
@@ -1870,6 +1885,88 @@ witness_list_lock(struct lock_instance *instance,
                stacktrace_print(&instance->li_stack->ls_stack, prnt);
 }
 
+static int
+witness_dls(struct witness *w, struct witness *target, struct witness **path,
+    int depth, int *remaining)
+{
+       int i, any_remaining;
+
+       if (depth == 0) {
+               *remaining = 1;
+               return (w == target);
+       }
+
+       any_remaining = 0;
+       for (i = 1; i <= w_max_used_index; i++) {
+               if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) {
+                       if (witness_dls(&w_data[i], target, path, depth - 1,
+                           remaining)) {
+                               path[depth - 1] = &w_data[i];
+                               *remaining = 1;
+                               return 1;
+                       }
+                       if (remaining)
+                               any_remaining = 1;
+               }
+       }
+       *remaining = any_remaining;
+       return 0;
+}
+
+static void
+witness_print_cycle_edge(struct witness *parent, struct witness *child,
+    int step, int last)
+{
+       struct witness_lock_order_data *wlod;
+       int next;
+
+       if (last)
+               next = 1;
+       else
+               next = step + 1;
+       printf("lock order (%d) %s -> (%d) %s\n",
+           step, parent->w_type->lt_name,
+           next, child->w_type->lt_name);
+       if (witness_watch > 1) {
+               mtx_enter(&w_mtx);
+               wlod = witness_lock_order_get(parent, child);
+               mtx_leave(&w_mtx);
+
+               if (wlod != NULL)
+                       stacktrace_print(&wlod->wlod_stack, printf);
+               else
+                       printf("lock order data c %p -> p %p missing\n",
+                           child, parent);
+       }
+}
+
+static void
+witness_print_cycle(struct witness *parent, struct witness *child)
+{
+       struct witness *path[4];
+       struct witness *w;
+       int depth, remaining;
+       int step = 0;
+
+       /* Find the shortest path from child to parent. */
+       for (depth = 1; depth < nitems(path); depth++) {
+               if (witness_dls(child, parent, path, depth, &remaining))
+                       goto found;
+               if (!remaining)
+                       break;
+       }
+       printf("incomplete path, depth %d\n", depth);
+       return;
+
+found:
+       witness_print_cycle_edge(parent, child, ++step, 0);
+       for (w = child; depth > 0; depth--) {
+               witness_print_cycle_edge(w, path[depth - 1], ++step,
+                   depth == 1);
+               w = path[depth - 1];
+       }
+}
+
 #ifdef DDB
 static int
 witness_thread_has_locks(struct proc *p)

Reply via email to