On 28/04/18(Sat) 16:55, Martin Pieuchot wrote:
> On 26/04/18(Thu) 23:06, Landry Breuil wrote:
> > On Thu, Apr 26, 2018 at 10:01:25PM +0200, Martin Pieuchot wrote:
> > > This is the 4th attempt to implement clustering for msdosfs. See [0]
> > > for the previous story.
> > >
> > > This version implements the strict necessary to read a maximum of 64k
> > > per read. Unlike the previous version we no longer mix cluster and
> > > block numbers.
> > >
> > > This speeds up msdosfs reads significantly. Regress tests are passing.
> > >
> > > I'd appreciate more tests and reviews.
> >
> > Well, i see a 2.5x dropout in perfs.. rsyncing a 100mb dir from the same
> > usb key:
> >
> > sd1 at scsibus4 targ 1 lun 0: <Kingston, DataTraveler G2, 1.00> SCSI2
> > 0/direct removable serial.09511624F0417921125B
> > sd1: 3689MB, 512 bytes/sector, 7555528 sectors
> >
> > without the diff:
> > sent 101,634,811 bytes received 153 bytes 2,540,870.35 bytes/sec
> >
> > with the diff:
> > sent 101,634,811 bytes received 153 bytes 936,727.78 bytes/sec
> > sent 101,634,811 bytes received 153 bytes 928,173.19 bytes/sec
> >
> > To be analysed ...
>
> Thanks to landry I now understand why we cannot keep using "block numbers"
> as logical value to index buffers.
>
> The VFS clustering code assume that the next logical block number correspond
> to the next block on disk. In the case of FAT it is true for cluster.
>
> So I believe we should commit the diff below.
Here's an updated version that addresses some comments from krw@.
Mainly use 'uint32_t' for cluster numbers, 'daddr_t' should be kept
for DEV_BSIZE values.
Still ok?
Index: msdosfs/denode.h
===================================================================
RCS file: /cvs/src/sys/msdosfs/denode.h,v
retrieving revision 1.32
diff -u -p -r1.32 denode.h
--- msdosfs/denode.h 13 Jun 2017 18:13:18 -0000 1.32
+++ msdosfs/denode.h 28 Apr 2018 14:47:10 -0000
@@ -142,7 +142,6 @@ struct denode {
struct vnode *de_devvp; /* vnode of blk dev we live on */
uint32_t de_flag; /* flag bits */
dev_t de_dev; /* device where direntry lives */
- daddr_t de_lastr;
uint32_t de_dirclust; /* cluster of the directory file containing
this entry */
uint32_t de_diroffset; /* offset of this entry in the directory
cluster */
uint32_t de_fndoffset; /* offset of found dir entry */
Index: msdosfs/msdosfs_fat.c
===================================================================
RCS file: /cvs/src/sys/msdosfs/msdosfs_fat.c,v
retrieving revision 1.31
diff -u -p -r1.31 msdosfs_fat.c
--- msdosfs/msdosfs_fat.c 30 Dec 2017 20:47:00 -0000 1.31
+++ msdosfs/msdosfs_fat.c 28 Apr 2018 14:47:10 -0000
@@ -1020,14 +1020,12 @@ extendfile(struct denode *dep, uint32_t
bp = getblk(pmp->pm_devvp, cntobn(pmp,
cn++),
pmp->pm_bpcluster, 0, 0);
else {
- bp = getblk(DETOV(dep), de_cn2bn(pmp,
frcn++),
+ bp = getblk(DETOV(dep), frcn++,
pmp->pm_bpcluster, 0, 0);
/*
* Do the bmap now, as in msdosfs_write
*/
- if (pcbmap(dep,
- de_bn2cn(pmp, bp->b_lblkno),
- &bp->b_blkno, 0, 0))
+ if (pcbmap(dep, bp->b_lblkno,
&bp->b_blkno, 0, 0))
bp->b_blkno = -1;
if (bp->b_blkno == -1)
panic("extendfile: pcbmap");
Index: msdosfs/msdosfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.118
diff -u -p -r1.118 msdosfs_vnops.c
--- msdosfs/msdosfs_vnops.c 28 Apr 2018 03:13:05 -0000 1.118
+++ msdosfs/msdosfs_vnops.c 29 Apr 2018 14:01:02 -0000
@@ -78,13 +78,13 @@
static uint32_t fileidhash(uint64_t);
+int msdosfs_bmaparray(struct vnode *, uint32_t, daddr_t *, int *);
int msdosfs_kqfilter(void *);
int filt_msdosfsread(struct knote *, long);
int filt_msdosfswrite(struct knote *, long);
int filt_msdosfsvnode(struct knote *, long);
void filt_msdosfsdetach(struct knote *);
-
/*
* Some general notes:
*
@@ -510,18 +510,15 @@ int
msdosfs_read(void *v)
{
struct vop_read_args *ap = v;
- int error = 0;
- uint32_t diff;
- int blsize;
- int isadir;
- uint32_t n;
- long on;
- daddr_t lbn, rablock, rablkno;
- struct buf *bp;
struct vnode *vp = ap->a_vp;
struct denode *dep = VTODE(vp);
struct msdosfsmount *pmp = dep->de_pmp;
struct uio *uio = ap->a_uio;
+ int isadir, error = 0;
+ uint32_t n, diff, size, on;
+ struct buf *bp;
+ uint32_t cn;
+ daddr_t bn;
/*
* If they didn't ask for any data, then we are done.
@@ -536,7 +533,8 @@ msdosfs_read(void *v)
if (uio->uio_offset >= dep->de_FileSize)
return (0);
- lbn = de_cluster(pmp, uio->uio_offset);
+ cn = de_cluster(pmp, uio->uio_offset);
+ size = pmp->pm_bpcluster;
on = uio->uio_offset & pmp->pm_crbomask;
n = ulmin(pmp->pm_bpcluster - on, uio->uio_resid);
@@ -549,31 +547,22 @@ msdosfs_read(void *v)
if (diff < n)
n = diff;
- /* convert cluster # to block # if a directory */
- if (isadir) {
- error = pcbmap(dep, lbn, &lbn, 0, &blsize);
- if (error)
- return (error);
- }
/*
* If we are operating on a directory file then be sure to
* do i/o with the vnode for the filesystem instead of the
* vnode for the directory.
*/
if (isadir) {
- error = bread(pmp->pm_devvp, lbn, blsize, &bp);
+ /* convert cluster # to block # */
+ error = pcbmap(dep, cn, &bn, 0, &size);
+ if (error)
+ return (error);
+ error = bread(pmp->pm_devvp, bn, size, &bp);
} else {
- rablock = lbn + 1;
- rablkno = de_cn2bn(pmp, rablock);
- if (dep->de_lastr + 1 == lbn &&
- de_cn2off(pmp, rablock) < dep->de_FileSize)
- error = breadn(vp, de_cn2bn(pmp, lbn),
- pmp->pm_bpcluster, &rablkno,
- &pmp->pm_bpcluster, 1, &bp);
+ if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize)
+ error = bread(vp, cn, size, &bp);
else
- error = bread(vp, de_cn2bn(pmp, lbn),
- pmp->pm_bpcluster, &bp);
- dep->de_lastr = lbn;
+ error = bread_cluster(vp, cn, size, &bp);
}
n = min(n, pmp->pm_bpcluster - bp->b_resid);
if (error) {
@@ -601,8 +590,7 @@ msdosfs_write(void *v)
int extended = 0;
uint32_t osize;
int error = 0;
- uint32_t count, lastcn;
- daddr_t bn;
+ uint32_t count, lastcn, cn;
struct buf *bp;
int ioflag = ap->a_ioflag;
struct uio *uio = ap->a_uio;
@@ -679,30 +667,30 @@ msdosfs_write(void *v)
lastcn = de_clcount(pmp, osize) - 1;
do {
- if (de_cluster(pmp, uio->uio_offset) > lastcn) {
+ croffset = uio->uio_offset & pmp->pm_crbomask;
+ cn = de_cluster(pmp, uio->uio_offset);
+
+ if (cn > lastcn) {
error = ENOSPC;
break;
}
- bn = de_blk(pmp, uio->uio_offset);
- if ((uio->uio_offset & pmp->pm_crbomask) == 0
- && (de_blk(pmp, uio->uio_offset + uio->uio_resid) >
de_blk(pmp, uio->uio_offset)
- || uio->uio_offset + uio->uio_resid >=
dep->de_FileSize)) {
+ if (croffset == 0 &&
+ (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > cn ||
+ (uio->uio_offset + uio->uio_resid) >= dep->de_FileSize)) {
/*
* If either the whole cluster gets written,
* or we write the cluster from its start beyond EOF,
* then no need to read data from disk.
*/
- bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
+ bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0);
clrbuf(bp);
/*
* Do the bmap now, since pcbmap needs buffers
* for the fat table. (see msdosfs_strategy)
*/
if (bp->b_blkno == bp->b_lblkno) {
- error = pcbmap(dep,
- de_bn2cn(pmp, bp->b_lblkno),
- &bp->b_blkno, 0, 0);
+ error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno,
0, 0);
if (error)
bp->b_blkno = -1;
}
@@ -714,16 +702,16 @@ msdosfs_write(void *v)
}
} else {
/*
- * The block we need to write into exists, so read it
in.
+ * The block we need to write into exists, so
+ * read it in.
*/
- error = bread(thisvp, bn, pmp->pm_bpcluster, &bp);
+ error = bread(thisvp, cn, pmp->pm_bpcluster, &bp);
if (error) {
brelse(bp);
break;
}
}
- croffset = uio->uio_offset & pmp->pm_crbomask;
n = ulmin(uio->uio_resid, pmp->pm_bpcluster - croffset);
if (uio->uio_offset + n > dep->de_FileSize) {
dep->de_FileSize = uio->uio_offset + n;
@@ -1756,7 +1744,7 @@ msdosfs_islocked(void *v)
/*
* vp - address of vnode file the file
- * bn - which cluster we are interested in mapping to a filesystem block
number.
+ * bn - which cluster we are interested in mapping to a filesystem block
number
* vpp - returns the vnode for the block special file holding the filesystem
* containing the file of interest
* bnp - address of where to return the filesystem relative block number
@@ -1766,19 +1754,56 @@ msdosfs_bmap(void *v)
{
struct vop_bmap_args *ap = v;
struct denode *dep = VTODE(ap->a_vp);
- struct msdosfsmount *pmp = dep->de_pmp;
+ uint32_t cn;
if (ap->a_vpp != NULL)
*ap->a_vpp = dep->de_devvp;
if (ap->a_bnp == NULL)
return (0);
- if (ap->a_runp) {
+
+ cn = ap->a_bn;
+ if (cn != ap->a_bn)
+ return (EFBIG);
+
+ return (msdosfs_bmaparray(ap->a_vp, cn, ap->a_bnp, ap->a_runp));
+}
+
+int
+msdosfs_bmaparray(struct vnode *vp, uint32_t cn, daddr_t *bnp, int *runp)
+{
+ struct denode *dep = VTODE(vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct mount *mp;
+ int error, maxrun = 0, run;
+ daddr_t runbn;
+
+ mp = vp->v_mount;
+
+ if (runp) {
/*
- * Sequential clusters should be counted here.
+ * XXX
+ * If MAXBSIZE is the largest transfer the disks can handle,
+ * we probably want maxrun to be 1 block less so that we
+ * don't create a block larger than the device can handle.
*/
- *ap->a_runp = 0;
+ *runp = 0;
+ maxrun = min(MAXBSIZE / mp->mnt_stat.f_iosize - 1,
+ pmp->pm_maxcluster - cn);
}
- return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0));
+
+ if ((error = pcbmap(dep, cn, bnp, 0, 0)) != 0)
+ return (error);
+
+ for (run = 1; run <= maxrun; run++) {
+ error = pcbmap(dep, cn + run, &runbn, 0, 0);
+ if (error != 0 || (runbn != *bnp + de_cn2bn(pmp, run)))
+ break;
+ }
+
+ if (runp)
+ *runp = run - 1;
+
+ return (0);
}
int
@@ -1800,8 +1825,7 @@ msdosfs_strategy(void *v)
* don't allow files with holes, so we shouldn't ever see this.
*/
if (bp->b_blkno == bp->b_lblkno) {
- error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno),
- &bp->b_blkno, 0, 0);
+ error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
if (error)
bp->b_blkno = -1;
if (bp->b_blkno == -1)