Module Name: src Committed By: bouyer Date: Tue Jul 25 16:15:50 UTC 2023
Modified Files: src/sys/arch/xen/xen: xbd_xenbus.c Log Message: Propoerly handle 4k sector size backends: - report the backend's sector size to upper layers, not DEV_BSIZE. Adjust the number of sectors accordingly. - Use sc_secsize instead of XEN_BSIZE where appropriate. The sectors numbers in I/O requests are still in XEN_BSIZE units, but must be a multiple of sc_secsize/XEN_BSIZE. - As a consequence of previous, the buffer has to be aligned to sc_secsize, aligned to XEN_BSIZE may not be enough. This means that we may have to xbd_map_align() more buffer, including some without B_PHYS set. - Add some more DPRINTF lines, related to I/O requests Tested with a linux dom0. thanks to Christian Kujau for providing access to his hardware for testing and debugging. To generate a diff of this commit: cvs rdiff -u -r1.133 -r1.134 src/sys/arch/xen/xen/xbd_xenbus.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/xen/xbd_xenbus.c diff -u src/sys/arch/xen/xen/xbd_xenbus.c:1.133 src/sys/arch/xen/xen/xbd_xenbus.c:1.134 --- src/sys/arch/xen/xen/xbd_xenbus.c:1.133 Fri Jul 21 11:28:50 2023 +++ src/sys/arch/xen/xen/xbd_xenbus.c Tue Jul 25 16:15:50 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: xbd_xenbus.c,v 1.133 2023/07/21 11:28:50 bouyer Exp $ */ +/* $NetBSD: xbd_xenbus.c,v 1.134 2023/07/25 16:15:50 bouyer Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -50,7 +50,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.133 2023/07/21 11:28:50 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.134 2023/07/25 16:15:50 bouyer Exp $"); #include "opt_xen.h" @@ -169,7 +169,7 @@ struct xbd_xenbus_softc { #define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */ #define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */ - uint64_t sc_sectors; /* number of XEN_BSIZE sectors for this device */ + uint64_t sc_sectors; /* number of sc_secsize sectors for this device */ u_long sc_secsize; /* sector size */ uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */ u_long sc_info; /* VDISK_* */ @@ -674,15 +674,14 @@ xbd_backend_changed(void *arg, XenbusSta xbd_connect(sc); sc->sc_shutdown = BLKIF_SHUTDOWN_RUN; sc->sc_xbdsize = - sc->sc_sectors * (uint64_t)XEN_BSIZE / DEV_BSIZE; + sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE; dg = &sc->sc_dksc.sc_dkdev.dk_geom; memset(dg, 0, sizeof(*dg)); - dg->dg_secperunit = sc->sc_xbdsize; - dg->dg_secsize = DEV_BSIZE; + dg->dg_secperunit = sc->sc_sectors; + dg->dg_secsize = sc->sc_secsize; dg->dg_ntracks = 1; - // XXX: Ok to hard-code DEV_BSIZE? - dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize); + dg->dg_nsectors = (1024 * 1024) / dg->dg_secsize; dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors; bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0); @@ -693,10 +692,10 @@ xbd_backend_changed(void *arg, XenbusSta hypervisor_unmask_event(sc->sc_evtchn); format_bytes(buf, uimin(9, sizeof(buf)), - sc->sc_sectors * XEN_BSIZE); + sc->sc_sectors * dg->dg_secsize); aprint_normal_dev(sc->sc_dksc.sc_dev, "%s, %d bytes/sect x %" PRIu64 " sectors\n", - buf, (int)dg->dg_secsize, sc->sc_xbdsize); + buf, (int)dg->dg_secsize, sc->sc_sectors); snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS, sc->sc_features); aprint_normal_dev(sc->sc_dksc.sc_dev, @@ -739,14 +738,6 @@ xbd_connect(struct xbd_xenbus_softc *sc) panic("%s: can't read number from %s/virtual-device\n", device_xname(sc->sc_dksc.sc_dev), sc->sc_xbusd->xbusd_otherend); - err = xenbus_read_ull(NULL, - sc->sc_xbusd->xbusd_otherend, "sectors", §ors, 10); - if (err) - panic("%s: can't read number from %s/sectors\n", - device_xname(sc->sc_dksc.sc_dev), - sc->sc_xbusd->xbusd_otherend); - sc->sc_sectors = sectors; - err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10); if (err) @@ -760,6 +751,14 @@ xbd_connect(struct xbd_xenbus_softc *sc) device_xname(sc->sc_dksc.sc_dev), sc->sc_xbusd->xbusd_otherend); + err = xenbus_read_ull(NULL, + sc->sc_xbusd->xbusd_otherend, "sectors", §ors, 10); + if (err) + panic("%s: can't read number from %s/sectors\n", + device_xname(sc->sc_dksc.sc_dev), + sc->sc_xbusd->xbusd_otherend); + sc->sc_sectors = sectors * (uint64_t)XEN_BSIZE / sc->sc_secsize; + xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); } @@ -839,6 +838,8 @@ again: bp, (long)bp->b_bcount)); if (bp->b_error != 0 || rep->status != BLKIF_RSP_OKAY) { + DPRINTF(("%s: error %d status %d\n", __func__, + bp->b_error, rep->status)); bp->b_error = EIO; bp->b_resid = bp->b_bcount; } @@ -1137,7 +1138,7 @@ xbd_diskstart(device_t self, struct buf goto out; } - if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { + if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_sectors) { /* invalid block number */ error = EINVAL; goto out; @@ -1173,7 +1174,7 @@ xbd_diskstart(device_t self, struct buf bp->b_resid = bp->b_bcount; xbdreq->req_bp = bp; xbdreq->req_data = bp->b_data; - if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) { + if (__predict_false((vaddr_t)bp->b_data & (sc->sc_secsize - 1))) { if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) { DPRINTF(("xbd_diskstart: no align\n")); error = EAGAIN; @@ -1274,8 +1275,12 @@ xbd_diskstart_submit(struct xbd_xenbus_s req->id = req_id; req->operation = bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; - req->sector_number = bp->b_rawblkno + (start >> XEN_BSHIFT); + req->sector_number = (bp->b_rawblkno * sc->sc_secsize / XEN_BSIZE) + + (start >> XEN_BSHIFT); req->handle = sc->sc_handle; + DPRINTF(("%s: id %" PRIu64 " op %d sn %" PRIu64 " handle %d\n", + __func__, req->id, req->operation, req->sector_number, + req->handle)); size = uimin(bp->b_bcount - start, XBD_MAX_CHUNK); for (dmaseg = 0; dmaseg < dmamap->dm_nsegs && size > 0; dmaseg++) { @@ -1295,9 +1300,9 @@ xbd_diskstart_submit(struct xbd_xenbus_s } size -= nbytes; - KASSERT(((ma & PAGE_MASK) & (XEN_BSIZE - 1)) == 0); - KASSERT((nbytes & (XEN_BSIZE - 1)) == 0); - KASSERT((size & (XEN_BSIZE - 1)) == 0); + KASSERT(((ma & PAGE_MASK) & (sc->sc_secsize - 1)) == 0); + KASSERT((nbytes & (sc->sc_secsize - 1)) == 0); + KASSERT((size & (sc->sc_secsize - 1)) == 0); first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT; nsects = nbytes >> XEN_BSHIFT; @@ -1306,6 +1311,8 @@ xbd_diskstart_submit(struct xbd_xenbus_s reqseg->last_sect = first_sect + nsects - 1; KASSERT(reqseg->first_sect <= reqseg->last_sect); KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE)); + DPRINTF(("%s: seg %d fs %d ls %d\n", __func__, segidx, + reqseg->first_sect, reqseg->last_sect)); reqseg->gref = gntref[dmaseg]; } @@ -1331,8 +1338,11 @@ xbd_diskstart_submit_indirect(struct xbd req->operation = BLKIF_OP_INDIRECT; req->indirect_op = bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; - req->sector_number = bp->b_rawblkno; + req->sector_number = bp->b_rawblkno * sc->sc_secsize / XEN_BSIZE; req->handle = sc->sc_handle; + DPRINTF(("%s: id %" PRIu64 " op %d sn %" PRIu64 " handle %d\n", + __func__, req->id, req->indirect_op, req->sector_number, + req->handle)); xbdreq->req_indirect = SLIST_FIRST(&sc->sc_indirect_head); KASSERT(xbdreq->req_indirect != NULL); /* always as many as reqs */ @@ -1346,12 +1356,17 @@ xbd_diskstart_submit_indirect(struct xbd ma = ds->ds_addr; nbytes = ds->ds_len; + KASSERT(((ma & PAGE_MASK) & (sc->sc_secsize - 1)) == 0); + KASSERT((nbytes & (sc->sc_secsize - 1)) == 0); + first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT; nsects = nbytes >> XEN_BSHIFT; reqseg->first_sect = first_sect; reqseg->last_sect = first_sect + nsects - 1; reqseg->gref = xbdreq->req_gntref[dmaseg]; + DPRINTF(("%s: seg %d fs %d ls %d\n", __func__, dmaseg, + reqseg->first_sect, reqseg->last_sect)); KASSERT(reqseg->first_sect <= reqseg->last_sect); KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE)); @@ -1367,12 +1382,6 @@ xbd_diskstart_submit_indirect(struct xbd static int xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req) { - /* - * Only can get here if this is physio() request, block I/O - * uses DEV_BSIZE-aligned buffers. - */ - KASSERT((req->req_bp->b_flags & B_PHYS) != 0); - sc->sc_cnt_map_unalign.ev_count++; if (sc->sc_unalign_used) {