It seems that `tr' in GNU coreutils does not recoginize multibyte
character, but other imprementation, e.g. HP-UX, Solaris, recoginizes it.

As a result, [ echo AB | LC_ALL=ja_JP.eucJP tr AB '\244\263' ] is
transformed as [ echo AB | LC_ALL=ja_JP.eucJP tr A '\244\263' ], so that
'\244\263' is recognized as a single multibyte character.  We do not
expect that.
From 852b1290450995644c604270720ec5b432e323b5 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <nori...@kcn.ne.jp>
Date: Sat, 8 Nov 2014 14:26:56 +0900
Subject: [PATCH] tests: fix encoding with `tr' to support multibyte in test
 suite

tests/euc-mb, tests/fgrep-infloop, tests/invalid-multibyte-infloop,
tests/prefix-of-multibyte, tests/sjis-mb: Fix them.
---
 tests/euc-mb                    | 2 +-
 tests/fgrep-infloop             | 2 +-
 tests/invalid-multibyte-infloop | 2 +-
 tests/prefix-of-multibyte       | 2 +-
 tests/sjis-mb                   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/euc-mb b/tests/euc-mb
index b625046..c62f99f 100755
--- a/tests/euc-mb
+++ b/tests/euc-mb
@@ -12,7 +12,7 @@ require_compiled_in_MB_support
 locale=ja_JP.EUC-JP
 
 make_input () {
-  echo "$1" | tr AB '\244\263'
+  echo "$1" | LC_ALL=C tr AB '\244\263'
 }
 
 euc_grep () {
diff --git a/tests/fgrep-infloop b/tests/fgrep-infloop
index 015ec74..0da5be1 100755
--- a/tests/fgrep-infloop
+++ b/tests/fgrep-infloop
@@ -6,7 +6,7 @@ require_en_utf8_locale_
 require_timeout_
 require_compiled_in_MB_support
 
-encode() { echo "$1" | tr ABC '\357\274\241'; }
+encode() { echo "$1" | LC_ALL=C tr ABC '\357\274\241'; }
 
 encode ABC > in || framework_failure_
 fail=0
diff --git a/tests/invalid-multibyte-infloop b/tests/invalid-multibyte-infloop
index d7c6165..8b3458f 100755
--- a/tests/invalid-multibyte-infloop
+++ b/tests/invalid-multibyte-infloop
@@ -7,7 +7,7 @@ require_en_utf8_locale_
 require_compiled_in_MB_support
 require_timeout_
 
-encode() { echo "$1" | tr A '\202'; }
+encode() { echo "$1" | LC_ALL=C tr A '\202'; }
 
 encode AA > input
 
diff --git a/tests/prefix-of-multibyte b/tests/prefix-of-multibyte
index 2228a22..b00c64e 100755
--- a/tests/prefix-of-multibyte
+++ b/tests/prefix-of-multibyte
@@ -5,7 +5,7 @@
 require_en_utf8_locale_
 require_compiled_in_MB_support
 
-encode() { echo "$1" | tr ABC '\357\274\241'; }
+encode() { echo "$1" | LC_ALL=C tr ABC '\357\274\241'; }
 
 encode ABC >exp1
 encode aABC >exp2
diff --git a/tests/sjis-mb b/tests/sjis-mb
index 990c770..8735792 100755
--- a/tests/sjis-mb
+++ b/tests/sjis-mb
@@ -21,7 +21,7 @@ locale=ja_JP.SHIFT_JIS
 #   matching "A" against "@A", or mistaking a valid "A" match for the second
 #   byte of a multi-byte character.
 
-encode() { echo "$1" | tr @% '\203\301'; }
+encode() { echo "$1" | LC_ALL=C tr @% '\203\301'; }
 
 k=0
 test_grep_reject() {
-- 
2.1.3

Reply via email to