libsvn_subr/utf-test failure due to unaligned memory access

James McCoy Sat, 03 Mar 2018 11:18:56 -0800

This test has been failing for some time on Debian's alpha buildd[0].
One of the alpha porters looked into it and gave a thorough diagnosis[1]
of the issue which basically boils down to: using a char* as
apr_uint16_t*/apr_int32_t* is going to result in unaligned access.


In a (much belated) reply[2], I proposed the attached patch which
memcpy()s the string to an array of the appropriate type before passing
it through to svn_utf__utf{16,32}_to_utf8().  I also took the
opportunity to consolidate the handing of counted and non-counted
conversions.

Does this look appropriate?  If so, I can commit to trunk and nominate
for 1.10.

[0]: https://buildd.debian.org/status/logs.php?pkg=subversion&arch=alpha
[1]: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=823133#22
[2]: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=823133#27

Cheers,
-- 
James
GPG Key: 4096R/91BF BF4D 6956 BD5D F7B7  2D23 DFE6 91AE 331B A3DB

Index: trunk/subversion/tests/libsvn_subr/utf-test.c
===================================================================
--- trunk/subversion/tests/libsvn_subr/utf-test.c	(revision 1825748)
+++ trunk/subversion/tests/libsvn_subr/utf-test.c	(working copy)
@@ -752,8 +752,10 @@
   {
     svn_boolean_t sixteenbit;
     svn_boolean_t bigendian;
+    apr_size_t sourcelen;
     const char *source;
     const char *result;
+    svn_boolean_t counted;
   } tests[] = {
 
 #define UTF_32_LE FALSE, FALSE
@@ -762,33 +764,37 @@
 #define UTF_16_BE TRUE, TRUE
 
     /* Normal character conversion */
-    { UTF_32_LE, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test" },
-    { UTF_32_BE, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test" },
-    { UTF_16_LE, "t\0" "e\0" "s\0" "t\0" "\0\0", "test" },
-    { UTF_16_BE, "\0t" "\0e" "\0s" "\0t" "\0\0", "test" },
+    { UTF_32_LE, 4, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test", FALSE },
+    { UTF_32_BE, 4, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test", FALSE },
+    { UTF_16_LE, 4, "t\0" "e\0" "s\0" "t\0" "\0\0", "test", FALSE },
+    { UTF_16_BE, 4, "\0t" "\0e" "\0s" "\0t" "\0\0", "test", FALSE },
 
     /* Valid surrogate pairs */
-    { UTF_16_LE, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
-    { UTF_16_LE, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
-    { UTF_16_LE, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+    { UTF_16_LE, 2, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */
+    { UTF_16_LE, 2, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */
+    { UTF_16_LE, 2, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */
 
-    { UTF_16_BE, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
-    { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
-    { UTF_16_BE, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+    { UTF_16_BE, 2, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */
+    { UTF_16_BE, 2, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */
+    { UTF_16_BE, 2, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */
 
     /* Swapped, single and trailing surrogate pairs */
-    { UTF_16_LE, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
-    { UTF_16_LE, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*" },
-    { UTF_16_LE, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*" },
-    { UTF_16_LE, "\x1E\xDD" "\0\0", "\xed\xb4\x9e" },
-    { UTF_16_LE, "\xFF\xDB" "\0\0", "\xed\xaf\xbf" },
+    { UTF_16_LE, 4, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE },
+    { UTF_16_LE, 3, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*", FALSE },
+    { UTF_16_LE, 3, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*", FALSE },
+    { UTF_16_LE, 1, "\x1E\xDD" "\0\0", "\xed\xb4\x9e", FALSE },
+    { UTF_16_LE, 1, "\xFF\xDB" "\0\0", "\xed\xaf\xbf", FALSE },
 
-    { UTF_16_BE, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
-    { UTF_16_BE, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*" },
-    { UTF_16_BE, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*" },
-    { UTF_16_BE, "\xDD\x1E" "\0\0", "\xed\xb4\x9e" },
-    { UTF_16_BE, "\xDB\xFF" "\0\0", "\xed\xaf\xbf" },
+    { UTF_16_BE, 4, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE },
+    { UTF_16_BE, 3, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*", FALSE },
+    { UTF_16_BE, 3, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*", FALSE },
+    { UTF_16_BE, 1, "\xDD\x1E" "\0\0", "\xed\xb4\x9e", FALSE },
+    { UTF_16_BE, 1, "\xDB\xFF" "\0\0", "\xed\xaf\xbf", FALSE },
 
+    /* Counted strings with NUL characters */
+    { UTF_16_LE, 3, "x\0" "\0\0" "y\0" "*\0", "x\0y", TRUE },
+    { UTF_32_BE, 3, "\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*", "x\0y", TRUE },
+
 #undef UTF_32_LE
 #undef UTF_32_BE
 #undef UTF_16_LE
@@ -799,33 +805,35 @@
 
   const struct cvt_test_t *tc;
   const svn_string_t *result;
-  int i;
+#define SRCLEN 5
+  apr_uint16_t source16[SRCLEN];
+  apr_int32_t source32[SRCLEN];
 
-  for (i = 1, tc = tests; tc->source; ++tc, ++i)
+  for (tc = tests; tc->source; ++tc)
     {
       if (tc->sixteenbit)
-        SVN_ERR(svn_utf__utf16_to_utf8(&result, (const void*)tc->source,
-                                       SVN_UTF__UNKNOWN_LENGTH,
-                                       tc->bigendian, pool, pool));
+        {
+          memset(&source16, 0, SRCLEN * sizeof(*source16));
+          memcpy(&source16, tc->source, (tc->sourcelen + 1) * sizeof(*source16));
+          SVN_ERR(svn_utf__utf16_to_utf8(&result, source16,
+                                         tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH,
+                                         tc->bigendian, pool, pool));
+        }
       else
-        SVN_ERR(svn_utf__utf32_to_utf8(&result, (const void*)tc->source,
-                                       SVN_UTF__UNKNOWN_LENGTH,
-                                       tc->bigendian, pool, pool));
-      SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
+        {
+          memset(&source32, 0, SRCLEN * sizeof(*source32));
+          memcpy(&source32, tc->source, (tc->sourcelen + 1) * sizeof(*source32));
+          SVN_ERR(svn_utf__utf32_to_utf8(&result, source32,
+                                         tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH,
+                                         tc->bigendian, pool, pool));
+        }
+      if (tc->counted)
+        SVN_ERR_ASSERT(0 == memcmp(result->data, tc->result, tc->sourcelen));
+      else
+        SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
     }
+#undef SRCLEN
 
-  /* Test counted strings with NUL characters */
-  SVN_ERR(svn_utf__utf16_to_utf8(
-              &result, (void*)("x\0" "\0\0" "y\0" "*\0"), 3,
-              FALSE, pool, pool));
-  SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
-
-  SVN_ERR(svn_utf__utf32_to_utf8(
-              &result,
-              (void*)("\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*"), 3,
-              TRUE, pool, pool));
-  SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
-
   return SVN_NO_ERROR;
 }

libsvn_subr/utf-test failure due to unaligned memory access

Reply via email to