On 7. 6. 25 03:11, Jun Omae wrote:
On 2025/06/07 1:22, Timofei Zhakov wrote:
Yay! It makes sense. Thank you so much for pointing me out.

Is it worthed to change the locale to utf8 for all tests?

However, if not, I think I could temporarily modify it, and set it back to C after the test body. The patch:

[[[
Index: subversion/tests/cmdline/basic_tests.py
===================================================================
--- subversion/tests/cmdline/basic_tests.py (revision 1926036)
+++ subversion/tests/cmdline/basic_tests.py (working copy)
@@ -3349,37 +3349,42 @@
 def unicode_arguments_test(sbox: svntest.sandbox.Sandbox):
   """test unicode arguments"""

-  UNICODE_TEST_STRING = '\U0001f449\U0001f448'
-  sbox.build(read_only=False, empty=True)
+  oldlocale = os.environ["LC_ALL"]
+  os.environ["LC_ALL"] = "C.utf8"

-  unicode_item = sbox.ospath(UNICODE_TEST_STRING)
-  test_item = sbox.ospath("test")
+  try:
+    UNICODE_TEST_STRING = '\U0001f449\U0001f448'
+    sbox.build(read_only=False, empty=True)

-  svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir", unicode_item)
-  svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir", test_item)
-  svntest.actions.run_and_verify_svn2(None, [], 0, "propset",
-                                      "name", UNICODE_TEST_STRING, unicode_item)
-  svntest.actions.run_and_verify_svn2(None, [], 0, "ci", sbox.wc_dir,
-                                      "-m", UNICODE_TEST_STRING,
-                                      "--with-revprop",
-                                      "revprop=" + UNICODE_TEST_STRING)
+    unicode_item = sbox.ospath(UNICODE_TEST_STRING)
+    test_item = sbox.ospath("test")

-  expected_disk = wc.State("", {
-    UNICODE_TEST_STRING: Item(props={ "name": UNICODE_TEST_STRING }),
-    "test"             : Item(),
-  })
+    svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir", unicode_item)
+    svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir", test_item)
+    svntest.actions.run_and_verify_svn2(None, [], 0, "propset",
+                                        "name", UNICODE_TEST_STRING, unicode_item)
+    svntest.actions.run_and_verify_svn2(None, [], 0, "ci", sbox.wc_dir,
+                                        "-m", UNICODE_TEST_STRING,
+                                        "--with-revprop",
+                                        "revprop=" + UNICODE_TEST_STRING)

-  svntest.actions.verify_disk(sbox.wc_dir, expected_disk, check_props=True)
-  os.chdir(sbox.wc_dir)
-  svntest.actions.run_and_verify_log_xml(
-    expected_revprops=[{
- "svn:author": "jrandom",
- "svn:date": "",
- "svn:log": UNICODE_TEST_STRING,
-      "revprop": UNICODE_TEST_STRING
-    }],
-    args=["-r1", "--with-all-revprops"])
+    expected_disk = wc.State("", {
+      UNICODE_TEST_STRING: Item(props={ "name": UNICODE_TEST_STRING }),
+      "test"             : Item(),
+    })

+    svntest.actions.verify_disk(sbox.wc_dir, expected_disk, check_props=True)
+    os.chdir(sbox.wc_dir)
+    svntest.actions.run_and_verify_log_xml(
+      expected_revprops=[{
+ "svn:author": "jrandom",
+ "svn:date": "",
+ "svn:log": UNICODE_TEST_STRING,
+        "revprop": UNICODE_TEST_STRING
+      }],
+      args=["-r1", "--with-all-revprops"])
+  finally:
+    os.environ["LC_ALL"] = oldlocale

 ########################################################################
 # Run the tests
]]]

--
Timofei Zhakov

Sounds good. However, I think we could add a decorator to set UTF-8 locale and restore original locale like the following patch.

Also, C.UTF-8 (or en_US.UTF-8) locale is not available in all POSIX environments. We should check the locale available before the using. The unit test will be skipped if the locale unavailable in the proposed patch.

[[[
diff --git a/subversion/tests/cmdline/basic_tests.py 
b/subversion/tests/cmdline/basic_tests.py
index 213c07d82..419f24ca3 100755
--- a/subversion/tests/cmdline/basic_tests.py
+++ b/subversion/tests/cmdline/basic_tests.py
@@ -40,6 +40,7 @@ XFail = svntest.testcase.XFail_deco
  Issues = svntest.testcase.Issues_deco
  Issue = svntest.testcase.Issue_deco
  Wimp = svntest.testcase.Wimp_deco
+Utf8LocaleTest = svntest.testcase.Utf8LocaleTest_deco
  Item = wc.StateItem

  # Generic UUID-matching regular expression
@@ -3346,6 +3347,7 @@ def argv_with_best_fit_chars(sbox):
    if count == 0:
      raise svntest.Skip('No best fit characters in code page %r' % codepage)

+@Utf8LocaleTest
  def unicode_arguments_test(sbox: svntest.sandbox.Sandbox):
    """test unicode arguments"""

diff --git a/subversion/tests/cmdline/svntest/testcase.py 
b/subversion/tests/cmdline/svntest/testcase.py
index 85deec4c6..aff866944 100644
--- a/subversion/tests/cmdline/svntest/testcase.py
+++ b/subversion/tests/cmdline/svntest/testcase.py
@@ -349,3 +349,48 @@ def SkipDumpLoadCrossCheck_deco(cond_func = lambda: True):

  # Create a singular alias, for linguistic correctness
  Issue_deco = Issues_deco
+
+
+def _detect_utf8_locale():
+    import locale
+
+    orig = None
+    try:
+        for name in ('C.UTF-8', 'en_US.UTF-8'):
+            try:
+                orig = locale.setlocale(locale.LC_ALL, name)
+            except locale.Error:
+                continue
+            else:
+                return name
+    finally:
+        if orig is not None:
+            locale.setlocale(locale.LC_ALL, orig)
+
+    return None  # utf-8 locale unavailable
+
+
+_utf8_locale = _detect_utf8_locale() if os.name != 'nt' else False
+
+
+def Utf8LocaleTest_deco(f):
+
+    import functools
+
+    @functools.wraps(f)
+    def wrapper(sbox: svntest.sandbox.Sandbox):
+        if _utf8_locale is None:
+            raise svntest.Skip
+        if _utf8_locale is not False:
+            orig = os.environ.get('LC_ALL')
+            os.environ['LC_ALL'] = _utf8_locale
+        try:
+            return f(sbox)
+        finally:
+            if _utf8_locale is not False:
+                if orig is None:
+                    del os.environ['LC_ALL']
+                else:
+                    os.environ['LC_ALL'] = orig
+
+    return wrapper
]]]
--
Jun Omae<jun6...@gmail.com>  (大前 潤)

+1 for this solution with the decorator, I'd only prefer to call it @RequireUtf8 – but that's a minor point.

Setting Python's global default encoding is a non-starter; not only because Subversion runs on systems that don't support UTF-8, but also because it would make the tests stop exercising our encoding conversion functions.

-- Brane

Reply via email to