Hi Noah, If possible please add a commit message and sent the patch using git send-email or git format-patch. Feel free to use the description of https://sourceware.org/bugzilla/show_bug.cgi?id=27711#c0 that Frank wrote as commit message. It clearly explains the intention.
On Thu, 2021-07-01 at 16:38 -0400, Noah Sanci via Elfutils-devel wrote: > diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog > index 286c910a..5afbafcd 100644 > --- a/debuginfod/ChangeLog > +++ b/debuginfod/ChangeLog > @@ -1,3 +1,8 @@ > +2021-07-01 Noah Sanci <nsa...@redhat.com> > + PR27711 > + * debuginfod.cxx (groom): Allowed the use of regexes during the > + grooming process. Slightly better would be to describe the changes. e.g. (options): Add --regex-groon, -r option. (regex_groom): New static bool defaults to false. (parse_opt): Handle 'r' option by setting regex_groom to true. (groom): Introduce and use reg_include and reg_exclude. > 2021-06-03 Frank Ch. Eigler <f...@redhat.com> > > PR27863 > diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx > index 543044c6..4f7fd2d5 100644 > --- a/debuginfod/debuginfod.cxx > +++ b/debuginfod/debuginfod.cxx > @@ -360,6 +360,7 @@ static const struct argp_option options[] = > { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 }, > { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 > }, > { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 }, > + { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X > arguments to groom the database.",0}, > #define ARGP_KEY_FDCACHE_FDS 0x1001 > { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number > of archive files to keep in fdcache.", 0 }, > #define ARGP_KEY_FDCACHE_MBS 0x1002 > @@ -407,6 +408,7 @@ static map<string,string> scan_archives; > static vector<string> extra_ddl; > static regex_t file_include_regex; > static regex_t file_exclude_regex; > +static bool regex_groom = false; > static bool traverse_logical; > static long fdcache_fds; > static long fdcache_mbs; > @@ -527,6 +529,9 @@ parse_opt (int key, char *arg, > if (rc != 0) > argp_failure(state, 1, EINVAL, "regular expression"); > break; > + case 'r': > + regex_groom = true; > + break; > case ARGP_KEY_FDCACHE_FDS: > fdcache_fds = atol (arg); > break; > @@ -3249,8 +3254,11 @@ void groom() > int64_t fileid = sqlite3_column_int64 (files, 1); > const char* filename = ((const char*) sqlite3_column_text > (files, 2) ?: ""); > struct stat s; > + bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0); > + bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0); > + > rc = stat(filename, &s); > - if (rc < 0 || (mtime != (int64_t) s.st_mtime)) > + if ( (regex_groom && reg_exclude && !reg_include) || rc < 0 || > (mtime != (int64_t) s.st_mtime) ) > { OK, so we groom the file as before rc < 0 || (mtime != (int64_t) s.st_mtime) ) But also (if -r is given) if the file matches the exclude regexp, but not the include one. So if I read this right, an exclude regexp match means groom that file, but an include rexexp match means, don't groom (except if it disappeared on itself). > if (verbose > 2) > obatched(clog) << "groom: forgetting file=" << filename > << " mtime=" << mtime << endl; > @@ -3261,7 +3269,6 @@ void groom() > } > else > inc_metric("groomed_total", "decision", "fresh"); > - > if (sigusr1 != forced_rescan_count) // stop early if scan triggered > break; > } Spurious line removed? > diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 > index 1ba42cf6..1adf703a 100644 > --- a/doc/debuginfod.8 > +++ b/doc/debuginfod.8 > @@ -159,6 +159,9 @@ scan, independent of the rescan time (including if > it was zero), > interrupting a groom pass (if any). > > .TP > +.B "\-r" > +Apply the -I and -X during groom cycles, so that files excluded by > the regexes are removed from the index. These parameters are in > addition to what normally qualifies a file for grooming, not a > replacement. > + OK. That matches my reading of the code. Good. > .B "\-g SECONDS" "\-\-groom\-time=SECONDS" > Set the groom time for the index database. This is the amount of time > the grooming thread will wait after finishing a grooming pass before > diff --git a/tests/ChangeLog b/tests/ChangeLog > index d8fa97fa..346b9e6e 100644 > --- a/tests/ChangeLog > +++ b/tests/ChangeLog > @@ -1,3 +1,8 @@ > +2021-07-01 Noah Sanci <nsa...@redhat.com> > + PR2711 > + * run-debuginfod-find.sh: Added test case for grooming the database > + using regexes. > + > 2021-06-16 Frank Ch. Eigler <f...@redhat.com> > > * run-debuginfod-find.sh: Fix intermittent groom/stale failure, > diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh > index 456dc2f8..bd78bf46 100755 > --- a/tests/run-debuginfod-find.sh > +++ b/tests/run-debuginfod-find.sh > @@ -36,13 +36,14 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache > PID1=0 > PID2=0 > PID3=0 > +PID4=0 > > cleanup() > { > - if [ $PID1 -ne 0 ]; then kill $PID1 || true; wait $PID1; fi > - if [ $PID2 -ne 0 ]; then kill $PID2 || true; wait $PID2; fi > - if [ $PID3 -ne 0 ]; then kill $PID3 || true; wait $PID3; fi > - > + if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi > + if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi > + if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi > + if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi > rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree > ${PWD}/.client_cache* ${PWD}/tmp* > exit_cleanup > } > @@ -293,7 +294,7 @@ kill -USR1 $PID1 > wait_ready $PORT1 'thread_work_total{role="traverse"}' 3 > wait_ready $PORT1 'thread_work_pending{role="scan"}' 0 > wait_ready $PORT1 'thread_busy{role="scan"}' 0 > - > +cp $DB $DB.backup > # Rerun same tests for the prog2 binary > filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v > debuginfo $BUILDID2 2>vlog` > cmp $filename F/prog2 > @@ -705,4 +706,29 @@ DEBUGINFOD_URLS="file://${PWD}/mocktree/" > filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find > source aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd /my/path/main.c` > cmp $filename ${local_dir}/main.c > > -exit 0 > +######################################################################## > +## PR27711 > +# Test to ensure the -A removes files from the index using a given regex > +while true; do > + PORT3=`expr '(' $RANDOM % 1000 ')' + 9000` > + ss -atn | fgrep ":$PORT3" || break > +done > +env LD_LIBRARY_PATH=$ldpath > DEBUGINFOD_URLS="http://127.0.0.1:$PORT3/" > ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT3 -t0 -g0 > --regex-groom --include="^$" --exclude=".*" -d $DB.backup > > vlog$PORT3 2>&1 & > +PID4=$! > +wait_ready $PORT3 'ready' 1 > +tempfiles vlog$PORT3 > +errfiles vlog$PORT3 > + > +kill -USR2 $PID4 > +wait_ready $PORT3 'thread_work_total{role="groom"}' 1 > +wait_ready $PORT3 'groom{statistic="archive d/e"}' 0 > +wait_ready $PORT3 'groom{statistic="archive sdef"}' 0 > +wait_ready $PORT3 'groom{statistic="archive sref"}' 0 > +wait_ready $PORT3 'groom{statistic="buildids"}' 0 > +wait_ready $PORT3 'groom{statistic="file d/e"}' 0 > +wait_ready $PORT3 'groom{statistic="file s"}' 0 > +wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0 > +wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0 > + > +kill $PID4 > +exit 0; OK, so this checks that nothing ^$ gets included (kept) and everything .* gets excluded (removed/groomed)? Thanks, Mark