On Sun, Dec 06, 2015 at 06:51:36PM +0100, Fabian Keil wrote:
> > > #16 0xffffffff80877d5a in bcopy () at 
> > > /usr/src/sys/amd64/amd64/support.S:118
> > > #17 0xffffffff805f64e8 in uiomove_faultflag (cp=<value optimized out>, 
> > > n=<value optimized out>, uio=0xfffffe009444aae0, nofault=<value optimized 
> > > out>) at /usr/src/sys/kern/subr_uio.c:208
> > > #18 0xffffffff8046236f in msdosfs_read (ap=<value optimized out>) at 
> > > /usr/src/sys/fs/msdosfs/msdosfs_vnops.c:596
> > > #19 0xffffffff808feb20 in VOP_READ_APV (vop=<value optimized out>, 
> > > a=<value optimized out>) at vnode_if.c:930
> > > #20 0xffffffff8039bf3a in mdstart_vnode (sc=0xfffff8004c7ce000, 
> > > bp=0xfffff80028fc81f0) at vnode_if.h:384  
> > From the frame 20, do 'p *bp' in kgdb and mail the result.  Do you have
> > any non-standard values for buffer cache knobs, esp. for MAXPHYS ?
> 
> (kgdb) p *bp
> $1 = {bio_cmd = 1 '\001', bio_flags = 16 '\020', bio_cflags = 0 '\0', 
> bio_pflags = 0 '\0', bio_dev = 0x0, bio_disk = 0x0, bio_offset = 0, 
> bio_bcount = 0, 
>   bio_data = 0xfffffe0077d94000 <Address 0xfffffe0077d94000 out of bounds>, 
> bio_ma = 0xfffff8000275bc00, bio_ma_offset = 960,
bio_ma_n = 33,
This is the issue.  The upper layer (ZFS ?) passed down the request
which is max-sized (see bio_length == 32 pages) but not aligned.
The physical buffer used for transient mapping cannot handle this.

bio_error = 0, bio_resid = 0, 
>   bio_done = 0xffffffff804e51d0 <g_std_done>, bio_driver1 = 0x0, bio_driver2 
> = 0x0, bio_caller1 = 0x0, bio_caller2 = 0x0, bio_queue = {tqe_next = 0x0, 
> tqe_prev = 0xfffff8004c7ce018}, bio_attribute = 0x0, 
>   bio_from = 0xfffff80010131d80, bio_to = 0xfffff800694f2a00, bio_length = 
> 131072, bio_completed = 0, bio_children = 0, bio_inbed = 0, bio_parent = 
> 0xfffff8000628bd90, bio_t0 = {sec = 33029, 
>     frac = 13163670047247984455}, bio_task = 0, bio_task_arg = 0x0, 
> bio_classifier1 = 0x0, bio_classifier2 = 0x0, bio_pblkno = 0}
>  
> I don't use non-standard values for MAXPHYS or other buffer cache settings.
> 

Try the following patch.

diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index a47066e..52142ed 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -836,8 +836,8 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
        struct buf *pb;
        bus_dma_segment_t *vlist;
        struct thread *td;
-       off_t len, zerosize;
-       int ma_offs;
+       off_t iolen, len, zerosize;
+       int ma_offs, npages;
 
        switch (bp->bio_cmd) {
        case BIO_READ:
@@ -858,6 +858,7 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
        pb = NULL;
        piov = NULL;
        ma_offs = bp->bio_ma_offset;
+       len = bp->bio_length;
 
        /*
         * VNODE I/O
@@ -890,7 +891,6 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
                auio.uio_iovcnt = howmany(bp->bio_length, zerosize);
                piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK);
                auio.uio_iov = piov;
-               len = bp->bio_length;
                while (len > 0) {
                        piov->iov_base = __DECONST(void *, zero_region);
                        piov->iov_len = len;
@@ -904,7 +904,6 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
                piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK);
                auio.uio_iov = piov;
                vlist = (bus_dma_segment_t *)bp->bio_data;
-               len = bp->bio_length;
                while (len > 0) {
                        piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr +
                            ma_offs);
@@ -920,11 +919,20 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
                piov = auio.uio_iov;
        } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
                pb = getpbuf(&md_vnode_pbuf_freecnt);
-               pmap_qenter((vm_offset_t)pb->b_data, bp->bio_ma, bp->bio_ma_n);
-               aiov.iov_base = (void *)((vm_offset_t)pb->b_data + ma_offs);
-               aiov.iov_len = bp->bio_length;
+               bp->bio_resid = len;
+unmapped_step:
+               npages = min(MAXPHYS, roundup2(len + ma_offs, PAGE_SIZE)) /
+                   PAGE_SIZE;
+               iolen = min(npages * PAGE_SIZE - ma_offs, len);
+               KASSERT(iolen > 0, ("zero iolen"));
+               pmap_qenter((vm_offset_t)pb->b_data,
+                   &bp->bio_ma[ma_offs / PAGE_SIZE], npages);
+               aiov.iov_base = (void *)((vm_offset_t)pb->b_data +
+                   ma_offs % PAGE_SIZE);
+               aiov.iov_len = iolen;
                auio.uio_iov = &aiov;
                auio.uio_iovcnt = 1;
+               auio.uio_resid = aiov.iov_len;
        } else {
                aiov.iov_base = bp->bio_data;
                aiov.iov_len = bp->bio_length;
@@ -948,15 +956,21 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
                vn_finished_write(mp);
        }
 
-       if (pb) {
-               pmap_qremove((vm_offset_t)pb->b_data, bp->bio_ma_n);
+       if (pb != NULL) {
+               pmap_qremove((vm_offset_t)pb->b_data, npages);
+               if (error == 0) {
+                       len -= iolen;
+                       bp->bio_resid -= iolen;
+                       ma_offs += iolen;
+                       if (len > 0)
+                               goto unmapped_step;
+               }
                relpbuf(pb, &md_vnode_pbuf_freecnt);
        }
 
-       if (piov != NULL)
-               free(piov, M_MD);
-
-       bp->bio_resid = auio.uio_resid;
+       free(piov, M_MD);
+       if (pb == NULL)
+               bp->bio_resid = auio.uio_resid;
        return (error);
 }
 
_______________________________________________
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to