Nguyễn Thái Ngọc Duy <[email protected]> writes:
> prepare_packed_git_one() is modified to allow count-objects to hook a
> report function to so we don't need to duplicate the pack searching
> logic in count-objects.c. When report_pack_garbage is NULL, the
> overhead is insignificant.
>
> The garbage is reported with warning() instead of error() in packed
> garbage case because it's not an error to have garbage. Loose garbage
> is still reported as errors and will be converted to warnings later.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]>
> ---
Thanks.
Tests look good and the series is getting much closer.
> diff --git a/sha1_file.c b/sha1_file.c
> index 239bee7..5bedf78 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -21,6 +21,7 @@
> #include "sha1-lookup.h"
> #include "bulk-checkin.h"
> #include "streaming.h"
> +#include "dir.h"
>
> #ifndef O_NOATIME
> #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
> @@ -1000,6 +1001,57 @@ void install_packed_git(struct packed_git *pack)
> packed_git = pack;
> }
>
> +void (*report_garbage)(const char *desc, const char *path);
> +
> +static void report_helper(const struct string_list *list,
> + int seen_bits, int first, int last)
> +{
> + const char *msg;
> + switch (seen_bits) {
> + case 0: msg = "no corresponding .idx nor .pack"; break;
> + case 1: msg = "no corresponding .idx"; break;
> + case 2: msg = "no corresponding .pack"; break;
That's dense.
> + default:
> + return;
> + }
> + for (; first <= last; first++)
This looks odd. If you use the usual last+1 convention between the
caller and callee, you do not have to do this, or call this function
with "i - 1" and "list->nr -1" as the last parameter.
> +static void report_pack_garbage(struct string_list *list)
> +{
> + int i, baselen = -1, first = 0, seen_bits = 0;
> +
> + if (!report_garbage)
> + return;
> +
> + sort_string_list(list);
> +
> + for (i = 0; i < list->nr; i++) {
> + const char *path = list->items[i].string;
> + if (baselen != -1 &&
> + strncmp(path, list->items[first].string, baselen)) {
> + report_helper(list, seen_bits, first, i - 1);
> + baselen = -1;
> + seen_bits = 0;
> + }
> + if (baselen == -1) {
> + const char *dot = strrchr(path, '.');
> + if (!dot) {
> + report_garbage("garbage found", path);
> + continue;
> + }
> + baselen = dot - path + 1;
> + first = i;
> + }
> + if (!strcmp(path + baselen, "pack"))
> + seen_bits |= 1;
> + else if (!strcmp(path + baselen, "idx"))
> + seen_bits |= 2;
> + }
> + report_helper(list, seen_bits, first, list->nr - 1);
> +}
> @@ -1009,6 +1061,7 @@ static void prepare_packed_git_one(char *objdir, int
> local)
> int len;
> DIR *dir;
> struct dirent *de;
> + struct string_list garbage = STRING_LIST_INIT_DUP;
>
> sprintf(path, "%s/pack", objdir);
> len = strlen(path);
> ...
> @@ -1043,8 +1106,20 @@ static void prepare_packed_git_one(char *objdir, int
> local)
> (p = add_packed_git(path, len + namelen, local)) !=
> NULL)
> install_packed_git(p);
> }
> +
> + if (!report_garbage)
> + continue;
> +
> + if (has_extension(de->d_name, ".idx") ||
> + has_extension(de->d_name, ".pack") ||
> + has_extension(de->d_name, ".keep"))
> + string_list_append(&garbage, path);
It might be OK to put .pack and .keep in the same "if (A || B)" as
it may happen to be that they do not need any special treatment
right now, but I do not think this is a good idea in general.
You would want to do things differently for ".idx", e.g.
diff --git a/sha1_file.c b/sha1_file.c
index 5bedf78..450521f 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1076,6 +1076,7 @@ static void prepare_packed_git_one(char *objdir, int
local)
while ((de = readdir(dir)) != NULL) {
int namelen = strlen(de->d_name);
struct packed_git *p;
+ int is_a_bad_idx = 0;
if (len + namelen + 1 > sizeof(path)) {
if (report_garbage) {
@@ -1105,12 +1106,14 @@ static void prepare_packed_git_one(char *objdir, int
local)
*/
(p = add_packed_git(path, len + namelen, local)) !=
NULL)
install_packed_git(p);
+ else
+ is_a_bad_idx = 1;
}
if (!report_garbage)
continue;
- if (has_extension(de->d_name, ".idx") ||
+ if ((has_extension(de->d_name, ".idx") && !is_a_bad_idx) ||
has_extension(de->d_name, ".pack") ||
has_extension(de->d_name, ".keep"))
string_list_append(&garbage, path);
so that you can say something about .pack/.keep files that do not
have a working .idx file. In the above example, the only special
thing you would do for .idx is just to check if it is a bad one, but
in later patches you may have to do different things in the body
(i.e. something else in addition to string_list_append(&garbage))
not just in the condition. Collapsing these into a condition to a
single "if (A||B||C)" may be suffering from a lack of foresight.
> diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh
> index d645328..e4bb3a1 100755
> --- a/t/t5304-prune.sh
> +++ b/t/t5304-prune.sh
> @@ -195,4 +195,30 @@ test_expect_success 'gc: prune old objects after local
> clone' '
> )
> '
>
> +test_expect_success 'garbage report in count-objects -v' '
> + : >.git/objects/pack/foo &&
> + : >.git/objects/pack/foo.bar &&
> + : >.git/objects/pack/foo.keep &&
> + : >.git/objects/pack/foo.pack &&
> + : >.git/objects/pack/fake.bar &&
> + : >.git/objects/pack/fake.keep &&
> + : >.git/objects/pack/fake.pack &&
> + : >.git/objects/pack/fake.idx &&
> + : >.git/objects/pack/fake2.keep &&
> + : >.git/objects/pack/fake3.idx &&
> + git count-objects -v 2>stderr &&
> + grep "index file .git/objects/pack/fake.idx is too small" stderr &&
The above suggested change will make a difference to
fake.{pack,keep} because of this breakage, I think.
> + grep "^warning:" stderr | sort >actual &&
> + cat >expected <<\EOF &&
> +warning: garbage found: .git/objects/pack/fake.bar
> +warning: garbage found: .git/objects/pack/foo
> +warning: garbage found: .git/objects/pack/foo.bar
> +warning: no corresponding .idx nor .pack: .git/objects/pack/fake2.keep
> +warning: no corresponding .idx: .git/objects/pack/foo.keep
> +warning: no corresponding .idx: .git/objects/pack/foo.pack
> +warning: no corresponding .pack: .git/objects/pack/fake3.idx
> +EOF
> + test_cmp expected actual
> +'
> +
> test_done
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html