On Sun, Mar 09, 2014 at 02:16:57PM -0400, Glen Barber wrote:
> panic: vm_fault: fault on nofault entry, addr: fffffe03becbc000

I see, this panic is for access to the kernel map, not for the direct map.
I think that this is a race with other CPU unmapping some page in the
kernel map, which cannot be solved by access checks.

Please try the following.  I booted with the patch and checked that
kgdb /boot/kernel/kernel /dev/mem works, but did not tried to reproduce
the issue.

diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
index abbbb21..5a4d8a9 100644
--- a/sys/amd64/amd64/mem.c
+++ b/sys/amd64/amd64/mem.c
@@ -76,14 +76,16 @@ MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range 
descriptors");
 int
 memrw(struct cdev *dev, struct uio *uio, int flags)
 {
-       int o;
-       u_long c = 0, v;
        struct iovec *iov;
-       int error = 0;
+       u_long c, v;
+       int error, o, sflags;
        vm_offset_t addr, eaddr;
 
        GIANT_REQUIRED;
 
+       error = 0;
+       c = 0;
+       sflags = curthread_pflags_set(TDP_DEVMEMIO);
        while (uio->uio_resid > 0 && error == 0) {
                iov = uio->uio_iov;
                if (iov->iov_len == 0) {
@@ -98,7 +100,15 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
 kmemphys:
                        o = v & PAGE_MASK;
                        c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
-                       error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
+                       v = PHYS_TO_DMAP(v);
+                       if (v < DMAP_MIN_ADDRESS ||
+                           (v > DMAP_MIN_ADDRESS + dmaplimit &&
+                           v <= DMAP_MAX_ADDRESS) ||
+                           pmap_kextract(v) == 0) {
+                               error = EFAULT;
+                               goto ret;
+                       }
+                       error = uiomove((void *)v, (int)c, uio);
                        continue;
                }
                else if (dev2unit(dev) == CDEV_MINOR_KMEM) {
@@ -119,22 +129,30 @@ kmemphys:
                        addr = trunc_page(v);
                        eaddr = round_page(v + c);
 
-                       if (addr < VM_MIN_KERNEL_ADDRESS)
-                               return (EFAULT);
-                       for (; addr < eaddr; addr += PAGE_SIZE) 
-                               if (pmap_extract(kernel_pmap, addr) == 0)
-                                       return (EFAULT);
-
+                       if (addr < VM_MIN_KERNEL_ADDRESS) {
+                               error = EFAULT;
+                               goto ret;
+                       }
+                       for (; addr < eaddr; addr += PAGE_SIZE) {
+                               if (pmap_extract(kernel_pmap, addr) == 0) {
+                                       error = EFAULT;
+                                       goto ret;
+                               }
+                       }
                        if (!kernacc((caddr_t)(long)v, c,
                            uio->uio_rw == UIO_READ ? 
-                           VM_PROT_READ : VM_PROT_WRITE))
-                               return (EFAULT);
+                           VM_PROT_READ : VM_PROT_WRITE)) {
+                               error = EFAULT;
+                               goto ret;
+                       }
 
                        error = uiomove((caddr_t)(long)v, (int)c, uio);
                        continue;
                }
                /* else panic! */
        }
+ret:
+       curthread_pflags_restore(sflags);
        return (error);
 }
 
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index f7d0afd..b1cbdbc 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -787,6 +787,12 @@ nogo:
                        frame->tf_rip = (long)curpcb->pcb_onfault;
                        return (0);
                }
+               if ((td->td_pflags & TDP_DEVMEMIO) != 0) {
+                       KASSERT(curpcb->pcb_onfault != NULL,
+                           ("/dev/mem without pcb_onfault"));
+                       frame->tf_rip = (long)curpcb->pcb_onfault;
+                       return (0);
+               }
                trap_fatal(frame, eva);
                return (-1);
        }
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fce1f8a..e7cd022 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -424,6 +424,7 @@ do {                                                        
                \
 #define        TDP_RESETSPUR   0x04000000 /* Reset spurious page fault 
history. */
 #define        TDP_NERRNO      0x08000000 /* Last errno is already in td_errno 
*/
 #define        TDP_UIOHELD     0x10000000 /* Current uio has pages held in 
td_ma */
+#define        TDP_DEVMEMIO    0x20000000 /* Accessing memory for /dev/mem */
 
 /*
  * Reasons that the current thread can not be run yet.
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 4a6495f..023860c 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -269,6 +269,8 @@ RetryFault:;
        map_generation = fs.map->timestamp;
 
        if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
+               if ((curthread->td_pflags & TDP_DEVMEMIO) != 0)
+                       return (KERN_FAILURE);
                panic("vm_fault: fault on nofault entry, addr: %lx",
                    (u_long)vaddr);
        }

Attachment: pgp0oNMBj_FI2.pgp
Description: PGP signature

Reply via email to