This is the sixteenth series of hash function transition patches.

Note that the patches khash patches use a different name (kh_oid_map_t)
than the SHA-1 type (kh_sha1_t). The _oid names are already used by the
oidset code, which uses a set approach, not a map approach.

This series has slightly more patches than before, but it also does a
little more conversion than before.

Changes from v1:
* Add support for object_id khash maps.
* Use this support in the pack bitmap code.
* Improve comments in notes code.
* Fix mistranslation of fast-import code.
* Provide more descriptive values and comments in fast-import code.
* Fix miscapitalization in commit message.
* Add code to look up a hash algorithm by length.
* Use this code, along with René's patch, to restructure the archive
  patches.
* Fix a preexisting off-by-one issue on error in builtin/difftool.
* Adopt Ævar's suggested changes to Gitweb.

René Scharfe (1):
  get-tar-commit-id: parse comment record

brian m. carlson (34):
  t/lib-submodule-update: use appropriate length constant
  khash: move oid hash table definition
  pack-bitmap: make bitmap header handling hash agnostic
  pack-bitmap: convert struct stored_bitmap to object_id
  pack-bitmap: replace sha1_to_hex
  pack-bitmap: switch hard-coded constants to the_hash_algo
  pack-bitmap: switch hash tables to use struct object_id
  submodule: avoid hard-coded constants
  notes-merge: switch to use the_hash_algo
  notes: make hash size independent
  notes: replace sha1_to_hex
  object-store: rename and expand packed_git's sha1 member
  builtin/name-rev: make hash-size independent
  fast-import: make hash-size independent
  fast-import: replace sha1_to_hex
  builtin/am: make hash size independent
  builtin/pull: make hash-size independent
  http-push: convert to use the_hash_algo
  http-backend: allow 64-character hex names
  http-push: remove remaining uses of sha1_to_hex
  http-walker: replace sha1_to_hex
  http: replace hard-coded constant with the_hash_algo
  http: compute hash of downloaded objects using the_hash_algo
  http: replace sha1_to_hex
  remote-curl: make hash size independent
  hash: add a function to lookup hash algorithm by length
  builtin/get-tar-commit-id: make hash size independent
  archive: convert struct archiver_args to object_id
  refspec: make hash size independent
  builtin/difftool: use parse_oid_hex
  dir: make untracked cache extension hash size independent
  read-cache: read data in a hash-independent way
  Git.pm: make hash size independent
  gitweb: make hash size independent

 archive-tar.c               |  7 +--
 archive-zip.c               | 10 ++--
 archive.c                   |  8 +--
 archive.h                   |  2 +-
 builtin/am.c                |  9 ++--
 builtin/difftool.c          | 10 ++--
 builtin/get-tar-commit-id.c | 14 +++++-
 builtin/name-rev.c          | 14 +++---
 builtin/pack-objects.c      |  6 +--
 builtin/pack-redundant.c    |  2 +-
 builtin/pull.c              | 11 +++--
 dir.c                       | 28 +++++------
 fast-import.c               | 66 +++++++++++++++----------
 gitweb/gitweb.perl          | 97 +++++++++++++++++++++++++------------
 hash.h                      |  2 +
 http-backend.c              |  3 ++
 http-push.c                 | 29 +++++------
 http-walker.c               | 18 +++----
 http.c                      | 33 +++++++------
 http.h                      |  2 +-
 khash.h                     | 18 +++++++
 merge-recursive.c           |  2 +-
 notes-merge.c               |  6 +--
 notes.c                     | 44 +++++++++--------
 object-store.h              |  2 +-
 oidset.h                    | 12 -----
 pack-bitmap-write.c         |  8 +--
 pack-bitmap.c               | 76 ++++++++++++++---------------
 pack-bitmap.h               |  4 +-
 packfile.c                  |  6 +--
 perl/Git.pm                 |  2 +-
 read-cache.c                | 74 +++++++++++-----------------
 refspec.c                   |  2 +-
 remote-curl.c               | 11 +++--
 sha1-file.c                 |  8 +++
 submodule.c                 |  2 +-
 t/lib-submodule-update.sh   |  3 +-
 37 files changed, 362 insertions(+), 289 deletions(-)

Diff-intervalle contre v1 :
 -:  ---------- >  1:  a8262704bf khash: move oid hash table definition
 1:  78b7a887d1 =  2:  6ea91e43bb pack-bitmap: make bitmap header handling hash 
agnostic
 2:  307dd4d7f3 =  3:  2c1e7d56b4 pack-bitmap: convert struct stored_bitmap to 
object_id
 3:  7b31ed912b =  4:  cc74a66e9c pack-bitmap: replace sha1_to_hex
 4:  90a2cbba90 =  5:  a6d0161ade pack-bitmap: switch hard-coded constants to 
the_hash_algo
 -:  ---------- >  6:  55db506411 pack-bitmap: switch hash tables to use struct 
object_id
 5:  869587b01d =  7:  e8ed86d773 submodule: avoid hard-coded constants
 6:  ce253521c0 =  8:  3f7e1da6d2 notes-merge: switch to use the_hash_algo
 7:  36da40abe0 !  9:  f367ddef94 notes: make hash size independent
    @@ -82,7 +82,7 @@
      
     -/* hex SHA1 + 19 * '/' + NUL */
     -#define FANOUT_PATH_MAX GIT_SHA1_HEXSZ + FANOUT_PATH_SEPARATORS + 1
    -+/* hex oid + one slash between each pair + NUL */
    ++/* hex oid + '/' between each pair of hex digits + NUL */
     +#define FANOUT_PATH_MAX GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS_MAX + 1
      
      static void construct_path_with_fanout(const unsigned char *sha1,
 8:  91829a63e3 ! 10:  8e3508e891 notes: replace sha1_to_hex
    @@ -12,7 +12,7 @@
      --- a/notes.c
      +++ b/notes.c
     @@
    - /* hex oid + one slash between each pair + NUL */
    + /* hex oid + '/' between each pair of hex digits + NUL */
      #define FANOUT_PATH_MAX GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS_MAX + 1
      
     -static void construct_path_with_fanout(const unsigned char *sha1,
 9:  0b049ec2b0 = 11:  a3d1f218dd object-store: rename and expand packed_git's 
sha1 member
10:  3b3a389040 = 12:  7d9a9a5c12 builtin/name-rev: make hash-size independent
11:  f6cf848d3e ! 13:  fe457d42f0 fast-import: make hash-size independent
    @@ -11,6 +11,44 @@
      diff --git a/fast-import.c b/fast-import.c
      --- a/fast-import.c
      +++ b/fast-import.c
    +@@
    +  */
    + #define NO_DELTA S_ISUID
    + 
    ++/*
    ++ * The amount of additional space required in order to write an object 
into the
    ++ * current pack. This is the hash lengths at the end of the pack, plus the
    ++ * length of one object ID.
    ++ */
    ++#define PACK_SIZE_THRESHOLD (the_hash_algo->rawsz * 3)
    ++
    + struct object_entry {
    +   struct pack_idx_entry idx;
    +   struct object_entry *next;
    +@@
    +   git_deflate_end(&s);
    + 
    +   /* Determine if we should auto-checkpoint. */
    +-  if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
    +-          || (pack_size + 60 + s.total_out) < pack_size) {
    ++  if ((max_packsize
    ++          && (pack_size + PACK_SIZE_THRESHOLD + s.total_out) > 
max_packsize)
    ++          || (pack_size + PACK_SIZE_THRESHOLD + s.total_out) < pack_size) 
{
    + 
    +           /* This new object needs to *not* have the current pack_id. */
    +           e->pack_id = pack_id + 1;
    +@@
    +   int status = Z_OK;
    + 
    +   /* Determine if we should auto-checkpoint. */
    +-  if ((max_packsize && (pack_size + 60 + len) > max_packsize)
    +-          || (pack_size + 60 + len) < pack_size)
    ++  if ((max_packsize
    ++          && (pack_size + PACK_SIZE_THRESHOLD + len) > max_packsize)
    ++          || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size)
    +           cycle_packfile();
    + 
    +   hashfile_checkpoint(pack_file, &checkpoint);
     @@
                c += e->name->str_len + 1;
                hashcpy(e->versions[0].oid.hash, (unsigned char *)c);
    @@ -34,7 +72,8 @@
        uintmax_t num_notes = 0;
        struct object_id oid;
     -  char realpath[60];
    -+  char realpath[GIT_MAX_RAWSZ * 3];
    ++  /* hex oid + '/' between each pair of hex digits + NUL */
    ++  char realpath[GIT_MAX_HEXSZ + ((GIT_MAX_HEXSZ / 2) - 1) + 1];
     +  const unsigned hexsz = the_hash_algo->hexsz;
      
        if (!root->tree)
    @@ -71,7 +110,7 @@
                                                       commit_type, &size,
                                                       &commit_oid);
     -          if (!buf || size < 46)
    -+          if (!buf || size < the_hash_algo->hexsz)
    ++          if (!buf || size < the_hash_algo->hexsz + 6)
                        die("Not a valid commit: %s", p);
                free(buf);
        } else
    @@ -89,7 +128,7 @@
                                                               commit_type,
                                                               &size, &n->oid);
     -                  if (!buf || size < 46)
    -+                  if (!buf || size < the_hash_algo->hexsz)
    ++                  if (!buf || size < the_hash_algo->hexsz + 6)
                                die("Not a valid commit: %s", from);
                        free(buf);
                } else
12:  366df3eeb4 = 14:  66999cae86 fast-import: replace sha1_to_hex
13:  3d3b79cbed = 15:  8dd1749b3d builtin/am: make hash size independent
14:  3a3f8ddd55 <  -:  ---------- builtin/pull: make hash-size independent
 -:  ---------- > 16:  253a42571e builtin/pull: make hash-size independent
15:  fc22aed0ad = 17:  16c417edda http-push: convert to use the_hash_algo
16:  a2da549b64 = 18:  6d867f375d http-backend: allow 64-character hex names
17:  c7481c69d8 = 19:  9e53e3be47 http-push: remove remaining uses of 
sha1_to_hex
18:  dbb3840e5c ! 20:  0124870940 http-walker: replace sha1_to_hex
    @@ -3,7 +3,7 @@
         http-walker: replace sha1_to_hex
     
         Since sha1_to_hex is limited to SHA-1, replace the uses of it in this
    -    file with hasH_to_hex.  Rename several variables accordingly to reflect
    +    file with hash_to_hex.  Rename several variables accordingly to reflect
         that they are no longer limited to SHA-1.
     
         Signed-off-by: brian m. carlson <sand...@crustytoothpaste.net>
19:  9b0a16a9d9 = 21:  d9107144c0 http: replace hard-coded constant with 
the_hash_algo
20:  bf433661da = 22:  f8d7da7253 http: compute hash of downloaded objects 
using the_hash_algo
21:  a66eb80a2f = 23:  f0e47aa063 http: replace sha1_to_hex
22:  423b42feca = 24:  26f115ab3f remote-curl: make hash size independent
23:  4b15d67a24 <  -:  ---------- archive-tar: make hash size independent
 -:  ---------- > 25:  b22c25095c hash: add a function to lookup hash algorithm 
by length
 -:  ---------- > 26:  89d15c7609 get-tar-commit-id: parse comment record
 -:  ---------- > 27:  5ccbfd2ff1 builtin/get-tar-commit-id: make hash size 
independent
24:  ee52d16b11 ! 28:  ede9b2c9b9 archive: convert struct archiver_args to 
object_id
    @@ -3,8 +3,8 @@
         archive: convert struct archiver_args to object_id
     
         Change the commit_sha1 member to be called "commit_oid" and change it 
to
    -    be a pointer to struct object_id.  Additionally, update two uses of
    -    GIT_SHA1_HEXSZ to use the_hash_algo instead.
    +    be a pointer to struct object_id.  Additionally, update some uses of
    +    GIT_SHA1_HEXSZ and hard-coded values to use the_hash_algo instead.
     
         Signed-off-by: brian m. carlson <sand...@crustytoothpaste.net>
     
    @@ -15,20 +15,21 @@
      
      static void write_global_extended_header(struct archiver_args *args)
      {
    --  const unsigned char *hash = args->commit_sha1;
    +-  const unsigned char *sha1 = args->commit_sha1;
     +  const struct object_id *oid = args->commit_oid;
        struct strbuf ext_header = STRBUF_INIT;
        struct ustar_header header;
        unsigned int mode;
      
    --  if (hash)
    +-  if (sha1)
     +  if (oid)
                strbuf_append_ext_header(&ext_header, "comment",
    --                                   hash_to_hex(hash),
    +-                                   sha1_to_hex(sha1), 40);
     +                                   oid_to_hex(oid),
    -                                    the_hash_algo->hexsz);
    ++                                   the_hash_algo->hexsz);
        if (args->time > USTAR_MAX_MTIME) {
                strbuf_append_ext_header_uint(&ext_header, "mtime",
    +                                         args->time);
     
      diff --git a/archive-zip.c b/archive-zip.c
      --- a/archive-zip.c
25:  47ddaca720 = 29:  4334a5d833 refspec: make hash size independent
26:  8f2437f0ef ! 30:  c74bb05533 builtin/difftool: use parse_oid_hex
    @@ -5,6 +5,11 @@
         Instead of using get_oid_hex and adding constants to the result, use
         parse_oid_hex to make this code independent of the hash size.
     
    +    Additionally, correct a typo that would cause us to print one too few
    +    characters on error, since we will already have incremented the pointer
    +    to point to the beginning of the object ID before we get to printing 
the
    +    error message.
    +
         Signed-off-by: brian m. carlson <sand...@crustytoothpaste.net>
     
      diff --git a/builtin/difftool.c b/builtin/difftool.c
    @@ -15,15 +20,17 @@
        if (*p != ' ')
                return error("expected ' ', got '%c'", *p);
     -  if (get_oid_hex(++p, oid1))
    -+  if (parse_oid_hex(++p, oid1, (const char **)&p))
    -           return error("expected object ID, got '%s'", p + 1);
    +-          return error("expected object ID, got '%s'", p + 1);
     -  p += GIT_SHA1_HEXSZ;
    ++  if (parse_oid_hex(++p, oid1, (const char **)&p))
    ++          return error("expected object ID, got '%s'", p);
        if (*p != ' ')
                return error("expected ' ', got '%c'", *p);
     -  if (get_oid_hex(++p, oid2))
    -+  if (parse_oid_hex(++p, oid2, (const char **)&p))
    -           return error("expected object ID, got '%s'", p + 1);
    +-          return error("expected object ID, got '%s'", p + 1);
     -  p += GIT_SHA1_HEXSZ;
    ++  if (parse_oid_hex(++p, oid2, (const char **)&p))
    ++          return error("expected object ID, got '%s'", p);
        if (*p != ' ')
                return error("expected ' ', got '%c'", *p);
        *status = *++p;
27:  bf2f8ae68b = 31:  d7618969e2 dir: make untracked cache extension hash size 
independent
28:  0465f487fd = 32:  faa9e37821 read-cache: read data in a hash-independent 
way
29:  7396961044 = 33:  0086840da3 Git.pm: make hash size independent
30:  8777c5e1f3 ! 34:  c91abe5eb7 gitweb: make hash size independent
    @@ -8,11 +8,15 @@
         hex characters, and use this variable anywhere we would have previously
         hard-coded a 40 in a regex.
     
    +    Add some helper functions which allow us to write tighter regexes that
    +    match exactly the number of hex characters we're expecting.
    +
         Similarly, switch the code that looks for deleted diffinfo information
         to look for either 40 or 64 zeros, and update one piece of code to use
         this function.  Finally, when formatting a log line, allow an
         abbreviated describe output to contain up to 64 characters.
     
    +    Helped-by: Ævar Arnfjörð Bjarmason <ava...@gmail.com>
         Signed-off-by: brian m. carlson <sand...@crustytoothpaste.net>
     
      diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
    @@ -22,8 +26,37 @@
      # ======================================================================
      # input validation and dispatch
      
    ++# Various hash size-related values.
    ++my $sha1_len = 40;
    ++my $sha256_extra_len = 24;
    ++my $sha256_len = $sha1_len + $sha256_extra_len;
    ++
    ++# A regex matching $len hex characters. $len may be a range (e.g. 7,64).
    ++sub oid_nlen_regex {
    ++  my $len = shift;
    ++  my $hchr = qr/[0-9a-fA-F]/;
    ++  return qr/(?:(?:$hchr){$len})/;
    ++}
    ++
    ++# A regex matching two sets of $nlen hex characters, prefixed by the 
literal
    ++# string $prefix and with the literal string $infix between them.
    ++sub oid_nlen_prefix_infix_regex {
    ++  my $nlen = shift;
    ++  my $prefix = shift;
    ++  my $infix = shift;
    ++
    ++  my $rx = oid_nlen_regex($nlen);
    ++
    ++  return qr/^\Q$prefix\E$rx\Q$infix\E$rx$/;
    ++}
    ++
     +# A regex matching a valid object ID.
    -+our $oid_regex = qr/(?:[0-9a-fA-F]{40}(?:[0-9a-fA-F]{24})?)/;
    ++our $oid_regex;
    ++{
    ++  my $x = oid_nlen_regex($sha1_len);
    ++  my $y = oid_nlen_regex($sha256_extra_len);
    ++  $oid_regex = qr/(?:$x(?:$y)?)/;
    ++}
     +
      # input parameters can be collected from a variety of sources (presently, 
CGI
      # and PATH_INFO), so we define an %input_params hash that collects them 
all
    @@ -37,16 +70,26 @@
                return 1;
        }
        # it must be correct pathname
    +@@
    + sub format_log_line_html {
    +   my $line = shift;
    + 
    ++  # Potentially abbreviated OID.
    ++  my $regex = oid_nlen_regex("7,64");
    ++
    +   $line = esc_html($line, -nbsp=>1);
    +   $line =~ s{
    +         \b
     @@
                  (?<!-) # see strbuf_check_tag_ref(). Tags can't start with -
                  [A-Za-z0-9.-]+
                  (?!\.) # refs can't end with ".", see check_refname_format()
     -            -g[0-9a-fA-F]{7,40}
    -+            -g[0-9a-fA-F]{7,64}
    ++            -g$regex
                  |
                  # Just a normal looking Git SHA1
     -            [0-9a-fA-F]{7,40}
    -+            [0-9a-fA-F]{7,64}
    ++      $regex
              )
              \b
          }{
    @@ -55,7 +98,8 @@
        }
        # match <hash>
     -  if ($line =~ m/^index [0-9a-fA-F]{40},[0-9a-fA-F]{40}/) {
    -+  if ($line =~ m/^index $oid_regex,$oid_regex/) {
    ++  if ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", ",") |
    ++      $line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", ",")) {
                # can match only for combined diff
                $line = 'index ';
                for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) {
    @@ -64,7 +108,8 @@
                }
      
     -  } elsif ($line =~ m/^index [0-9a-fA-F]{40}..[0-9a-fA-F]{40}/) {
    -+  } elsif ($line =~ m/^index $oid_regex..$oid_regex/) {
    ++  } elsif ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", 
"..") |
    ++           $line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", 
"..")) {
                # can match only for ordinary diff
                my ($from_link, $to_link);
                if ($from->{'href'}) {

Reply via email to