tools/source/misc/json_writer.cxx |  135 ++++++++++++++++++++++----------------
 1 file changed, 79 insertions(+), 56 deletions(-)

New commits:
commit aff3ce8837e00db6e91d0deb6c4f2ccb0d520c54
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Wed Oct 13 13:17:23 2021 +0200
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Wed Oct 13 19:34:13 2021 +0200

    Improve JsonWriter's escapement code
    
    It should now be conformant to JSON spec; and additionally,
    it escapes two characters that are valid in JSON, but invalid
    in JavaScript (as described in [1]).
    
    [1] 
http://web.archive.org/web/20201203234157/http://timelessrepo.com/json-isnt-a-javascript-subset
    
    Change-Id: I1081ade89a57fefefde672f2b8fa08e97627fc50
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/123510
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/tools/source/misc/json_writer.cxx 
b/tools/source/misc/json_writer.cxx
index 09f34c25c3c7..7024b580c7fd 100644
--- a/tools/source/misc/json_writer.cxx
+++ b/tools/source/misc/json_writer.cxx
@@ -119,6 +119,56 @@ void JsonWriter::endStruct()
     mbFirstFieldInNode = false;
 }
 
+static char getEscapementChar(char ch)
+{
+    switch (ch)
+    {
+        case '\b':
+            return 'b';
+        case '\t':
+            return 't';
+        case '\n':
+            return 'n';
+        case '\f':
+            return 'f';
+        case '\r':
+            return 'r';
+        default:
+            return ch;
+    }
+}
+
+static bool writeEscapedSequence(sal_uInt32 ch, char*& pos)
+{
+    switch (ch)
+    {
+        case '\b':
+        case '\t':
+        case '\n':
+        case '\f':
+        case '\r':
+        case '"':
+        case '/':
+        case '\\':
+            *pos++ = '\\';
+            *pos++ = getEscapementChar(ch);
+            return true;
+        // Special processing of U+2028 and U+2029, which are valid JSON, but 
invalid JavaScript
+        // Write them in escaped '\u2028' or '\u2029' form
+        case 0x2028:
+        case 0x2029:
+            *pos++ = '\\';
+            *pos++ = 'u';
+            *pos++ = '2';
+            *pos++ = '0';
+            *pos++ = '2';
+            *pos++ = ch == 0x2028 ? '8' : '9';
+            return true;
+        default:
+            return false;
+    }
+}
+
 void JsonWriter::writeEscapedOUString(const OUString& rPropVal)
 {
     // Convert from UTF-16 to UTF-8 and perform escaping
@@ -126,42 +176,9 @@ void JsonWriter::writeEscapedOUString(const OUString& 
rPropVal)
     while (i < rPropVal.getLength())
     {
         sal_uInt32 ch = rPropVal.iterateCodePoints(&i);
-        if (ch == '\\')
-        {
-            *mPos = static_cast<char>(ch);
-            ++mPos;
-            *mPos = static_cast<char>(ch);
-            ++mPos;
-        }
-        else if (ch == '"')
-        {
-            *mPos = '\\';
-            ++mPos;
-            *mPos = static_cast<char>(ch);
-            ++mPos;
-        }
-        else if (ch == '\n')
-        {
-            *mPos = '\\';
-            ++mPos;
-            *mPos = 'n';
-            ++mPos;
-        }
-        else if (ch == '\r')
-        {
-            *mPos = '\\';
-            ++mPos;
-            *mPos = 'r';
-            ++mPos;
-        }
-        else if (ch == '\f')
-        {
-            *mPos = '\\';
-            ++mPos;
-            *mPos = 'f';
-            ++mPos;
-        }
-        else if (ch <= 0x7F)
+        if (writeEscapedSequence(ch, mPos))
+            continue;
+        if (ch <= 0x7F)
         {
             *mPos = static_cast<char>(ch);
             ++mPos;
@@ -200,9 +217,8 @@ void JsonWriter::put(const char* pPropName, const OUString& 
rPropVal)
 {
     auto nPropNameLength = strlen(pPropName);
     // But values can be any UTF-8,
-    // see rtl_ImplGetFastUTF8ByteLen in sal/rtl/string.cxx for why a factor 3
-    // is the worst case
-    auto nWorstCasePropValLength = rPropVal.getLength() * 3;
+    // if the string only contains of 0x2028, it will be expanded 6 times (see 
writeEscapedSequence)
+    auto nWorstCasePropValLength = rPropVal.getLength() * 6;
     ensureSpace(nPropNameLength + nWorstCasePropValLength + 8);
 
     addCommaBeforeField();
@@ -241,24 +257,31 @@ void JsonWriter::put(const char* pPropName, 
std::string_view rPropVal)
     for (size_t i = 0; i < rPropVal.size(); ++i)
     {
         char ch = rPropVal[i];
-        if (ch == '\\')
-        {
-            *mPos = ch;
-            ++mPos;
-            *mPos = ch;
-            ++mPos;
-        }
-        else if (ch == '"')
+        switch (ch)
         {
-            *mPos = '\\';
-            ++mPos;
-            *mPos = ch;
-            ++mPos;
-        }
-        else
-        {
-            *mPos = ch;
-            ++mPos;
+            case '\b':
+            case '\t':
+            case '\n':
+            case '\f':
+            case '\r':
+            case '"':
+            case '/':
+            case '\\':
+                writeEscapedSequence(ch, mPos);
+                break;
+            case '\xE2': // Special processing of U+2028 and U+2029
+                if (i + 2 < rPropVal.size() && rPropVal[i + 1] == '\x80'
+                    && (rPropVal[i + 2] == '\xA8' || rPropVal[i + 2] == 
'\xA9'))
+                {
+                    writeEscapedSequence(rPropVal[i + 2] == '\xA8' ? 0x2028 : 
0x2029, mPos);
+                    i += 2;
+                    break;
+                }
+                [[fallthrough]];
+            default:
+                *mPos = ch;
+                ++mPos;
+                break;
         }
     }
 

Reply via email to