On Mon, Mar 15, 2010 at 9:13 PM, Seth Falcon <s...@userprimary.net> wrote:

> I'm curious to know if this is a problem you have encountered while using R.
>
> My initial thought is that there isn't much benefit of making this part
> of the code smarter.  If your patch simplifies things, I'd be more
> interested.
>
> + seth
>

Yes. I had noticed that R occasionally segfaults (especially when I
run many concurrent R processes), so I used valgrind to log every use
of R. In the valgrind logs, I tracked the problem to list_files().

I attached a patch to platform.c (for trunk). Unfortunately, I am
having trouble building R from the subversion trunk--it is taking a
very long time decompressing/installing the recommended packages--so I
haven't been able to verify the fix yet. But my version of platform.c
does compile, and it does simplify the code b/c count_files() is no
longer needed.
--- platform.c~	2010-03-17 09:57:26.268110062 -0400
+++ platform.c	2010-03-17 09:56:09.346917055 -0400
@@ -871,61 +871,10 @@
 
 #include <tre/tre.h>
 
-static void count_files(const char *dnp, int *count,
-			Rboolean allfiles, Rboolean recursive,
-			const regex_t *reg)
-{
-    DIR *dir;
-    struct dirent *de;
-    char p[PATH_MAX];
-#ifdef Windows
-    /* For consistency with other functions */
-    struct _stati64 sb;
-#else
-    struct stat sb;
-#endif
-
-    if (strlen(dnp) >= PATH_MAX)  /* should not happen! */
-	error(_("directory/folder path name too long"));
-    if ((dir = opendir(dnp)) == NULL) {
-	warning(_("list.files: '%s' is not a readable directory"), dnp);
-    } else {
-	while ((de = readdir(dir))) {
-	    if (allfiles || !R_HiddenFile(de->d_name)) {
-		if (recursive) {
-#ifdef Win32
-		    if (strlen(dnp) == 2 && dnp[1] == ':')
-			snprintf(p, PATH_MAX, "%s%s", dnp, de->d_name);
-		    else
-			snprintf(p, PATH_MAX, "%s%s%s", dnp, R_FileSep, de->d_name);
-#else
-		    snprintf(p, PATH_MAX, "%s%s%s", dnp, R_FileSep, de->d_name);
-#endif		    
-#ifdef Windows
-		    _stati64(p, &sb);
-#else
-		    stat(p, &sb);
-#endif
-		    if ((sb.st_mode & S_IFDIR) > 0) {
-			if (strcmp(de->d_name, ".") && strcmp(de->d_name, ".."))
-				count_files(p, count, allfiles, recursive, reg);
-			continue;
-		    }
-		}
-		if (reg) {
-		    if (tre_regexec(reg, de->d_name, 0, NULL, 0) == 0) (*count)++;
-		} else (*count)++;
-	    }
-	}
-	closedir(dir);
-    }
-}
-
 static void list_files(const char *dnp, const char *stem, int *count, SEXP ans,
 		       Rboolean allfiles, Rboolean recursive,
-                       const regex_t *reg)
+		       int pattern, regex_t reg, int *countmax, PROTECT_INDEX *idx)
 {
-    int ans_len = length(ans);
     DIR *dir;
     struct dirent *de;
     char p[PATH_MAX], stem2[PATH_MAX];
@@ -971,20 +920,28 @@
 			    } else
 				strcpy(stem2, de->d_name);
 			    list_files(p, stem2, count, ans, allfiles,
-				       recursive, reg);
+				       recursive, pattern, reg, countmax, idx);
 			}
 			continue;
 		    }
 		}
-                /* number of files could have changed since call to count_files */
-                if (*count >= ans_len) break;
-		if (reg) {
-		    if (tre_regexec(reg, de->d_name, 0, NULL, 0) == 0)
+		if (pattern) {
+                    if (tre_regexec(&reg, de->d_name, 0, NULL, 0) == 0) {
+                        if (*count == *countmax - 1) {
+                            *countmax *= 2;
+                            REPROTECT(ans = lengthgets(ans, *countmax), *idx);
+                        }
 			SET_STRING_ELT(ans, (*count)++,
 				       filename(stem, de->d_name));
-		} else
+                    }
+		} else {
+                    if (*count == *countmax - 1) {
+                        *countmax *= 2;
+                        REPROTECT(ans = lengthgets(ans, *countmax), *idx);
+                    }
 		    SET_STRING_ELT(ans, (*count)++,
 				   filename(stem, de->d_name));
+                }
 	    }
 	}
 	closedir(dir);
@@ -993,11 +950,13 @@
 
 SEXP attribute_hidden do_listfiles(SEXP call, SEXP op, SEXP args, SEXP rho)
 {
+    PROTECT_INDEX ipx;
     SEXP d, p, ans;
     int allfiles, fullnames, count, pattern, recursive, igcase, flags;
     int i, ndir;
     const char *dnp;
     regex_t reg;
+    int countmax = 100;
 
     checkArity(op, args);
     d = CAR(args);  args = CDR(args);
@@ -1019,19 +978,13 @@
 
     if (pattern && tre_regcomp(&reg, translateChar(STRING_ELT(p, 0)), flags))
 	error(_("invalid 'pattern' regular expression"));
-    count = 0;
-    for (i = 0; i < ndir ; i++) {
-	if (STRING_ELT(d, i) == NA_STRING) continue;
-	dnp = R_ExpandFileName(translateChar(STRING_ELT(d, i)));
-	count_files(dnp, &count, allfiles, recursive, pattern ? &reg : NULL);
-    }
-    PROTECT(ans = allocVector(STRSXP, count));
+    PROTECT_WITH_INDEX(ans = allocVector(STRSXP, count), &ipx);
     count = 0;
     for (i = 0; i < ndir ; i++) {
 	if (STRING_ELT(d, i) == NA_STRING) continue;
 	dnp = R_ExpandFileName(translateChar(STRING_ELT(d, i)));
 	list_files(dnp, fullnames ? dnp : NULL, &count, ans, allfiles,
-		   recursive, pattern ? &reg : NULL);
+		   recursive, pattern, reg, &countmax, &ipx);
     }
     if (pattern)
 	tre_regfree(&reg);
______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to