While stress testing `git filter-repo`, I noticed an issue with
encoding; further digging led to the fixes and features in this series.
See the individual commit messages for details.

Changes since v1 (full range-diff below):
  * Applied style fixes Eric pointed out in his review (thanks!)
  * Rebased on latest master (83232e38, "The seventh batch"), resolving
    a trivial merge conflict.  Now merges cleanly with next and pu as
    well.

I'm a bit under the weather so I may be slow to respond...

Elijah Newren (5):
  t9350: fix encoding test to actually test reencoding
  fast-import: support 'encoding' commit header
  fast-export: avoid stripping encoding header if we cannot reencode
  fast-export: differentiate between explicitly utf-8 and implicitly
    utf-8
  fast-export: do automatic reencoding of commit messages only if
    requested

 Documentation/git-fast-import.txt |  7 ++++
 builtin/fast-export.c             | 44 +++++++++++++++++++++----
 fast-import.c                     | 11 +++++--
 t/t9300-fast-import.sh            | 20 ++++++++++++
 t/t9350-fast-export.sh            | 53 +++++++++++++++++++++++++------
 5 files changed, 118 insertions(+), 17 deletions(-)

Range-diff:
1:  d6efd05142 ! 1:  9cc04242bd t9350: fix encoding test to actually test 
reencoding
    @@ -26,8 +26,7 @@
     -  # use author and committer name in ISO-8859-1 to match it.
     -  . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
     +  test_when_finished "git reset --hard HEAD~1" &&
    -+  test_when_finished "git config --unset i18n.commitencoding" &&
    -+  git config i18n.commitencoding iso-8859-7 &&
    ++  test_config i18n.commitencoding iso-8859-7 &&
        test_tick &&
        echo rosten >file &&
     -  git commit -s -m den file &&
2:  02f48c7559 ! 2:  0cd023ac7a fast-import: support 'encoding' commit header
    @@ -51,9 +51,8 @@
        }
        if (!committer)
                die("Expected committer but didn't get one");
    -+  if (skip_prefix(command_buf.buf, "encoding ", &encoding)) {
    ++  if (skip_prefix(command_buf.buf, "encoding ", &encoding))
     +          read_next_command();
    -+  }
        parse_data(&msg, 0, NULL);
        read_next_command();
        parse_from(b);
    @@ -69,7 +68,7 @@
     +          strbuf_addf(&new_data,
     +                  "encoding %s\n",
     +                  encoding);
    -+  strbuf_addf(&new_data, "\n");
    ++  strbuf_addch(&new_data, '\n');
        strbuf_addbuf(&new_data, &msg);
        free(author);
        free(committer);
    @@ -78,14 +77,14 @@
      --- a/t/t9300-fast-import.sh
      +++ b/t/t9300-fast-import.sh
     @@
    -   background_import_still_running
    +   sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import
      '
      
     +###
    -+### series W (other new features)
    ++### series X (other new features)
     +###
     +
    -+test_expect_success 'W: handling encoding' '
    ++test_expect_success 'X: handling encoding' '
     +  test_tick &&
     +  cat >input <<-INPUT_END &&
     +  commit refs/heads/encoding
3:  86c348402d ! 3:  1fddf51402 fast-export: avoid stripping encoding header if 
we cannot reencode
    @@ -41,8 +41,7 @@
     +test_expect_success 'encoding preserved if reencoding fails' '
     +
     +  test_when_finished "git reset --hard HEAD~1" &&
    -+  test_when_finished "git config --unset i18n.commitencoding" &&
    -+  git config i18n.commitencoding iso-8859-7 &&
    ++  test_config i18n.commitencoding iso-8859-7 &&
     +  echo rosten >file &&
     +  git commit -s -m "$(printf "Pi: \360; Invalid: \377")" file &&
     +  git fast-export wer^..wer >iso-8859-7.fi &&
4:  c09b23bc59 = 4:  4a2e04b3ae fast-export: differentiate between explicitly 
utf-8 and implicitly utf-8
5:  24b69a0db9 ! 5:  44aacb1a0b fast-export: do automatic reencoding of commit 
messages only if requested
    @@ -92,8 +92,7 @@
     +test_expect_success 'reencoding iso-8859-7' '
      
        test_when_finished "git reset --hard HEAD~1" &&
    -   test_when_finished "git config --unset i18n.commitencoding" &&
    -@@
    +   test_config i18n.commitencoding iso-8859-7 &&
        test_tick &&
        echo rosten >file &&
        git commit -s -m "$(printf "Pi: \360")" file &&
    @@ -109,8 +108,7 @@
     +test_expect_success 'aborting on iso-8859-7' '
     +
     +  test_when_finished "git reset --hard HEAD~1" &&
    -+  test_when_finished "git config --unset i18n.commitencoding" &&
    -+  git config i18n.commitencoding iso-8859-7 &&
    ++  test_config i18n.commitencoding iso-8859-7 &&
     +  echo rosten >file &&
     +  git commit -s -m "$(printf "Pi: \360")" file &&
     +  test_must_fail git fast-export --reencode=abort wer^..wer >iso-8859-7.fi
    @@ -119,8 +117,7 @@
     +test_expect_success 'preserving iso-8859-7' '
     +
     +  test_when_finished "git reset --hard HEAD~1" &&
    -+  test_when_finished "git config --unset i18n.commitencoding" &&
    -+  git config i18n.commitencoding iso-8859-7 &&
    ++  test_config i18n.commitencoding iso-8859-7 &&
     +  echo rosten >file &&
     +  git commit -s -m "$(printf "Pi: \360")" file &&
     +  git fast-export --reencode=no wer^..wer >iso-8859-7.fi &&
    @@ -134,8 +131,7 @@
      test_expect_success 'encoding preserved if reencoding fails' '
      
        test_when_finished "git reset --hard HEAD~1" &&
    -@@
    -   git config i18n.commitencoding iso-8859-7 &&
    +   test_config i18n.commitencoding iso-8859-7 &&
        echo rosten >file &&
        git commit -s -m "$(printf "Pi: \360; Invalid: \377")" file &&
     -  git fast-export wer^..wer >iso-8859-7.fi &&
-- 
2.21.0.782.g44aacb1a0b

Reply via email to