This is an automated email from the ASF dual-hosted git repository.

swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git


The following commit(s) were added to refs/heads/master by this push:
     new 0e24ad2c Fix ISO Latin-1 decoder sign extension (#660)
0e24ad2c is described below

commit 0e24ad2c9a15c30d828446650790d95610b32b43
Author: metsw24-max <[email protected]>
AuthorDate: Tue May 12 10:26:06 2026 +0530

    Fix ISO Latin-1 decoder sign extension (#660)
---
 src/main/cpp/charsetdecoder.cpp                 |  2 +-
 src/test/cpp/helpers/charsetdecodertestcase.cpp | 32 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/main/cpp/charsetdecoder.cpp b/src/main/cpp/charsetdecoder.cpp
index b7f852a7..569e7c5c 100644
--- a/src/main/cpp/charsetdecoder.cpp
+++ b/src/main/cpp/charsetdecoder.cpp
@@ -355,7 +355,7 @@ class ISOLatinCharsetDecoder : public CharsetDecoder
 
                        while (src < srcEnd)
                        {
-                               auto sv = static_cast<unsigned int>(*src++);
+                               auto sv = static_cast<unsigned 
int>(static_cast<unsigned char>(*src++));
                                Transcoder::encode(sv, out);
                        }
                        in.increment_position(availableByteCount);
diff --git a/src/test/cpp/helpers/charsetdecodertestcase.cpp 
b/src/test/cpp/helpers/charsetdecodertestcase.cpp
index ad357232..12b04a19 100644
--- a/src/test/cpp/helpers/charsetdecodertestcase.cpp
+++ b/src/test/cpp/helpers/charsetdecodertestcase.cpp
@@ -40,6 +40,7 @@ LOGUNIT_CLASS(CharsetDecoderTestCase)
        LOGUNIT_TEST(decode2);
        LOGUNIT_TEST(decode3);
        LOGUNIT_TEST(decode4);
+       LOGUNIT_TEST(testISOLatinHighBytes);
 #if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
     LOGUNIT_TEST(testMbstowcsInfiniteLoop);
 #endif
@@ -152,6 +153,37 @@ public:
                }
        }
 
+       /**
+        * Decoding ISO-8859-1 must map every byte 0x80..0xFF to the
+        * code point of the same numeric value. On platforms where plain
+        * char is signed (default on MSVC/GCC/Clang for x86/x64), a
+        * static_cast<unsigned int>(*src) sign-extends bytes >= 0x80 into
+        * 0xFFFFFFxx, which Transcoder::encode then treats as out-of-range
+        * Unicode and replaces with U+FFFD (or appends garbage on wchar_t
+        * builds). The .properties configuration loader uses this decoder
+        * per the Java spec, so the bug silently corrupts any non-ASCII
+        * Latin-1 byte that appears in a log4cxx configuration file.
+        */
+       void testISOLatinHighBytes()
+       {
+               char buf[1];
+               auto dec = CharsetDecoder::getISOLatinDecoder();
+               for (unsigned int b = 0x80; b <= 0xFF; ++b)
+               {
+                       buf[0] = static_cast<char>(b);
+                       ByteBuffer in(buf, 1);
+                       LogString out;
+                       log4cxx_status_t stat = dec->decode(in, out);
+                       LOGUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
+
+                       // Build the expected LogString by encoding code point b
+                       // through the same Transcoder path the decoder uses.
+                       LogString expected;
+                       Transcoder::encode(b, expected);
+                       LOGUNIT_ASSERT_EQUAL(expected, out);
+               }
+       }
+
 #if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
     /**
      * Tests that we don't loop infinitely when mbsrtowcs refuses to consume

Reply via email to