On Friday 25 June 2010 20:01:42 Garrett Cooper wrote:
> On Fri, Jun 25, 2010 at 9:52 AM, David Naylor <naylor.b.da...@gmail.com> 
wrote:
> > Hi,
> > 
> > I've created a patch that increases the performance of mtree.  This is of
> > particular use during a port install.  In an extreme case I have
> > experienced a ~20% increase [1].
> > 
> > For a full discussion see PR bin/143732.  This arose out of [2] where I
> > experienced the increase.
> > 
> > For your convenience I have attached the patch.
> > 
> > Please review this patch and if it is acceptable, commit it.
> > 
> > Regards,
> > 
> > David
> > 
> > 1] http://markmail.org/message/iju3l6hyv7s7emrb
> > 2] http://markmail.org/message/gfztjpszl5dozzii
> 
> Hmm... this has other interesting applications other than just ports,
> but unfortunately pkg_install won't really feel as much of a
> performance boost (because it uses mtree -e -U when +MTREE exists in
> the package).
> 
> Comments follow.
> Thanks!
> -Garrett

I've included some more changes.  I do not think -[uU] needs to be set when 
skipping vwalk.  I also move sflag so it will get called even if eflag is set 
(bug in my patch).  

I'm not sure sflag will produce the same results as check() and vwalk() may 
call compare() in different orders?  Will this impact on crc?

> --- /usr/src/usr.sbin/mtree/verify.c  2010-02-07 15:07:28.000000000 +0200
> +++ verify.c  2010-02-07 15:04:10.000000000 +0200
> @@ -50,17 +50,23 @@
>  static NODE *root;
>  static char path[MAXPATHLEN];
> 
> -static void  miss(NODE *, char *);
> +static int   miss(NODE *, char *);
> +static int   check(NODE *, char *);
>  static int   vwalk(void);
> 
>  int
>  mtree_verifyspec(FILE *fi)
>  {
> -     int rval;
> +     int rval = 0;
> 
>       root = mtree_readspec(fi);
> -     rval = vwalk();
> -     miss(root, path);
> +     /*
> +      * No need to walk entire tree if we are only updating the structure
> +      * and extra files are ignored.
> +      */
> +     if (!(uflag && eflag))
> +             rval = vwalk();
> 
> gcooper> This is where the performance boost is coming from as you're
> not walking the directory tree, correct?

Yes, if an update is requested without reporting extra files.  In some cases 
with a well populated /usr/local (and a slowish HDD) it could take some time.  

I think it is possible to even use this optimisation if uflag is not set?  

> +     rval |= miss(root, path);
>       return (rval);
>  }
> 
> @@ -155,15 +161,47 @@
>       return (rval);
>  }
> 
> -static void
> +static int
> +check(NODE *p, char *tail)
> +{
> +     FTSENT fts;
> +     struct stat fts_stat;
> +
> +     strcpy(tail, p->name);
> 
> gcooper> Dangerous. Please use strlcpy with appropriate bounds.

This is the same code used in miss.c:171.  

It appears the code assumes that a path a mtree file describes will not exceed 
MAXPATHLEN and does not check to see if the buffer overflows.  I could create a 
patch to fix that.  Perhaps a separate patch?

> +     /*
> +      * It is assumed that compare() only requires fts_accpath and fts_statp
> +      * fields in the FTSENT structure.
> +      */
> +     fts.fts_accpath = path;
> +     fts.fts_statp = &fts_stat;
> +
> +     if (stat(path, fts.fts_statp))
> +             return (0);
> 
> gcooper> What about symlink functionality? lstat(2)?

Fixed.  This should now be consistent with fts_read.  

> +     p->flags |= F_VISIT;
> +     if ((p->flags & F_NOCHANGE) == 0 && compare(p->name, p, &fts))
> +             return (MISMATCHEXIT);
> +     else
> +             return (0);
> +
> +     /*
> +      * tail is not restored to '\0' as the next time tail (or path) is used
> +      * is with a strcpy (thus overriding the '\0').  See +19 lines below.
> +      */
> +}
> +
> +static int
>  miss(NODE *p, char *tail)
>  {
>       int create;
>       char *tp;
>       const char *type, *what;
> -     int serr;
> +     int serr, rval = 0;
> 
> gcooper> This isn't correct as per-style(9). Please do:
> gcooper>
> gcooper> int rval = 0;
> gcooper> int serr;
> gcooper>
> gcooper> This reduces diff churn and is more style(9)-istically correct.

I didn't know that.  Good point.  

>       for (; p; p = p->next) {
> +             if (uflag && eflag)
> +                     rval |= check(p, tail);
>               if (p->flags & F_OPT && !(p->flags & F_VISIT))
>                       continue;
>               if (p->type != F_DIR && (dflag || p->flags & F_VISIT))
> @@ -256,4 +294,5 @@
>                       (void)printf("%s: file flags not set: %s\n",
>                           path, strerror(errno));
>       }
> +     return (rval);
>  }
--- mtree/verify.c	2010-05-27 13:47:20.000000000 +0200
+++ verify.c	2010-06-25 22:04:40.000000000 +0200
@@ -50,17 +50,25 @@
 static NODE *root;
 static char path[MAXPATHLEN];
 
-static void	miss(NODE *, char *);
+static int	miss(NODE *, char *);
+static int	check(NODE *, char *);
 static int	vwalk(void);
 
 int
 mtree_verifyspec(FILE *fi)
 {
-	int rval;
+	int rval = 0;
 
 	root = mtree_readspec(fi);
-	rval = vwalk();
-	miss(root, path);
+	/* No need to walk tree if we are ignoring extra files. */
+	if (!eflag)
+		rval = vwalk();
+	rval |= miss(root, path);
+
+	/* Called here to make sure vwalk() and check() have been called */
+	if (sflag)
+		warnx("%s checksum: %lu", fullpath, (unsigned long)crc_total);
+
 	return (rval);
 }
 
@@ -135,35 +143,72 @@
 		if (ep)
 			continue;
 extra:
-		if (!eflag) {
-			(void)printf("%s extra", RP(p));
-			if (rflag) {
-				if ((S_ISDIR(p->fts_statp->st_mode)
-				    ? rmdir : unlink)(p->fts_accpath)) {
-					(void)printf(", not removed: %s",
-					    strerror(errno));
-				} else
-					(void)printf(", removed");
-			}
-			(void)putchar('\n');
+		(void)printf("%s extra", RP(p));
+		if (rflag) {
+			if ((S_ISDIR(p->fts_statp->st_mode)
+			    ? rmdir : unlink)(p->fts_accpath)) {
+				(void)printf(", not removed: %s",
+				    strerror(errno));
+			} else
+				(void)printf(", removed");
 		}
+		(void)putchar('\n');
 		(void)fts_set(t, p, FTS_SKIP);
 	}
 	(void)fts_close(t);
-	if (sflag)
-		warnx("%s checksum: %lu", fullpath, (unsigned long)crc_total);
 	return (rval);
 }
 
-static void
+static int
+check(NODE *p, char *tail)
+{
+	FTSENT fts;
+	struct stat fts_stat;
+
+	strcpy(tail, p->name);
+
+	/*
+	 * It is assumed that compare() only requires fts_accpath and fts_statp
+	 * fields in the FTSENT structure.
+	 */
+	fts.fts_accpath = path;
+	fts.fts_statp = &fts_stat;
+
+	if (ftsoptions & FTS_LOGICAL) {
+		if (stat(path, fts.fts_statp) || lstat(path, fts.fts_statp))
+			return (0);
+	} else if (lstat(path, fts.fts_statp))
+		return (0);
+
+	p->flags |= F_VISIT;
+	if ((p->flags & F_NOCHANGE) == 0 && compare(p->name, p, &fts))
+		return (MISMATCHEXIT);
+	else
+		return (0);
+
+	/*
+	 * tail is not restored to '\0' as the next time tail (or path) is used
+	 * is with a strcpy (thus overriding the '\0').  See +25 lines below.
+	 */
+}
+
+static int
 miss(NODE *p, char *tail)
 {
 	int create;
 	char *tp;
 	const char *type, *what;
+	int rval = 0;
 	int serr;
 
 	for (; p; p = p->next) {
+		/*
+		 * If check() needs to be called (eflag set) and dflag is set 
+		 * then only call for directories (as we are ignoring 
+		 * everything else) otherwise always call.
+		 */
+		if (eflag && (!dflag || (p->type == F_DIR)))
+			rval |= check(p, tail);
 		if (p->flags & F_OPT && !(p->flags & F_VISIT))
 			continue;
 		if (p->type != F_DIR && (dflag || p->flags & F_VISIT))
@@ -256,4 +301,5 @@
 			(void)printf("%s: file flags not set: %s\n",
 			    path, strerror(errno));
 	}
+	return (rval);
 }

Attachment: signature.asc
Description: This is a digitally signed message part.

Reply via email to