Title: [131836] trunk/Source
Revision
131836
Author
msab...@apple.com
Date
2012-10-18 18:22:21 -0700 (Thu, 18 Oct 2012)

Log Message

convertUTF8ToUTF16() Should Check for ASCII Input
ihttps://bugs.webkit.org/show_bug.cgi?id=99739

Reviewed by Geoffrey Garen.

Source/_javascript_Core: 

Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to 
create a string using the 8 bit source.  Added a new OpaqueJSString::create(LChar*, unsigned).
Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call.

* API/JSStringRef.cpp:
(JSStringCreateWithUTF8CString):
* API/JSStringRefCF.cpp:
(JSStringCreateWithCFString):
* API/OpaqueJSString.h:
(OpaqueJSString::create):
(OpaqueJSString):
(OpaqueJSString::OpaqueJSString):

Source/WTF: 

Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion.  This is
used to check to see if all characters are ASCII and is returned via a bool*.

* wtf/unicode/UTF8.cpp:
(WTF::Unicode::convertUTF8ToUTF16):
* wtf/unicode/UTF8.h:

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/API/JSStringRef.cpp (131835 => 131836)


--- trunk/Source/_javascript_Core/API/JSStringRef.cpp	2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/API/JSStringRef.cpp	2012-10-19 01:22:21 UTC (rev 131836)
@@ -46,8 +46,12 @@
         size_t length = strlen(string);
         Vector<UChar, 1024> buffer(length);
         UChar* p = buffer.data();
-        if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length))
+        bool sourceIsAllASCII;
+        if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length, &sourceIsAllASCII)) {
+            if (sourceIsAllASCII)
+                return OpaqueJSString::create(reinterpret_cast<const LChar*>(string), length).leakRef();
             return OpaqueJSString::create(buffer.data(), p - buffer.data()).leakRef();
+        }
     }
 
     // Null string.

Modified: trunk/Source/_javascript_Core/API/JSStringRefCF.cpp (131835 => 131836)


--- trunk/Source/_javascript_Core/API/JSStringRefCF.cpp	2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/API/JSStringRefCF.cpp	2012-10-19 01:22:21 UTC (rev 131836)
@@ -46,7 +46,7 @@
         COMPILE_ASSERT(sizeof(UniChar) == sizeof(UChar), unichar_and_uchar_must_be_same_size);
         return OpaqueJSString::create(reinterpret_cast<UChar*>(buffer.get()), length).leakRef();
     } else {
-        return OpaqueJSString::create(0, 0).leakRef();
+        return OpaqueJSString::create(static_cast<const LChar*>(0), 0).leakRef();
     }
 }
 

Modified: trunk/Source/_javascript_Core/API/OpaqueJSString.h (131835 => 131836)


--- trunk/Source/_javascript_Core/API/OpaqueJSString.h	2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/API/OpaqueJSString.h	2012-10-19 01:22:21 UTC (rev 131836)
@@ -41,6 +41,11 @@
         return adoptRef(new OpaqueJSString);
     }
 
+    static PassRefPtr<OpaqueJSString> create(const LChar* characters, unsigned length)
+    {
+        return adoptRef(new OpaqueJSString(characters, length));
+    }
+
     static PassRefPtr<OpaqueJSString> create(const UChar* characters, unsigned length)
     {
         return adoptRef(new OpaqueJSString(characters, length));
@@ -70,6 +75,11 @@
             m_string = String(string.characters16(), string.length());
     }
 
+    OpaqueJSString(const LChar* characters, unsigned length)
+    {
+        m_string = String(characters, length);
+    }
+
     OpaqueJSString(const UChar* characters, unsigned length)
     {
         m_string = String(characters, length);

Modified: trunk/Source/_javascript_Core/ChangeLog (131835 => 131836)


--- trunk/Source/_javascript_Core/ChangeLog	2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/ChangeLog	2012-10-19 01:22:21 UTC (rev 131836)
@@ -1,3 +1,23 @@
+2012-10-18  Michael Saboff  <msab...@apple.com>
+
+        convertUTF8ToUTF16() Should Check for ASCII Input
+        ihttps://bugs.webkit.org/show_bug.cgi?id=99739
+
+        Reviewed by Geoffrey Garen.
+
+        Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to 
+        create a string using the 8 bit source.  Added a new OpaqueJSString::create(LChar*, unsigned).
+        Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call.
+
+        * API/JSStringRef.cpp:
+        (JSStringCreateWithUTF8CString):
+        * API/JSStringRefCF.cpp:
+        (JSStringCreateWithCFString):
+        * API/OpaqueJSString.h:
+        (OpaqueJSString::create):
+        (OpaqueJSString):
+        (OpaqueJSString::OpaqueJSString):
+
 2012-10-18  Oliver Hunt  <oli...@apple.com>
 
         Unbreak jsc tests.  Last minute "clever"-ness is clearly just not

Modified: trunk/Source/WTF/ChangeLog (131835 => 131836)


--- trunk/Source/WTF/ChangeLog	2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/WTF/ChangeLog	2012-10-19 01:22:21 UTC (rev 131836)
@@ -1,5 +1,19 @@
 2012-10-18  Michael Saboff  <msab...@apple.com>
 
+        convertUTF8ToUTF16() Should Check for ASCII Input
+        ihttps://bugs.webkit.org/show_bug.cgi?id=99739
+
+        Reviewed by Geoffrey Garen.
+
+        Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion.  This is
+        used to check to see if all characters are ASCII and is returned via a bool*.
+
+        * wtf/unicode/UTF8.cpp:
+        (WTF::Unicode::convertUTF8ToUTF16):
+        * wtf/unicode/UTF8.h:
+
+2012-10-18  Michael Saboff  <msab...@apple.com>
+
         Mac WTF build checks dependencies before copying header files
         https://bugs.webkit.org/show_bug.cgi?id=99770
 

Modified: trunk/Source/WTF/wtf/unicode/UTF8.cpp (131835 => 131836)


--- trunk/Source/WTF/wtf/unicode/UTF8.cpp	2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/WTF/wtf/unicode/UTF8.cpp	2012-10-19 01:22:21 UTC (rev 131836)
@@ -297,11 +297,12 @@
 
 ConversionResult convertUTF8ToUTF16(
     const char** sourceStart, const char* sourceEnd, 
-    UChar** targetStart, UChar* targetEnd, bool strict)
+    UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict)
 {
     ConversionResult result = conversionOK;
     const char* source = *sourceStart;
     UChar* target = *targetStart;
+    UChar orAllData = 0;
     while (source < sourceEnd) {
         int utf8SequenceLength = inlineUTF8SequenceLength(*source);
         if (sourceEnd - source < utf8SequenceLength)  {
@@ -329,10 +330,14 @@
                     source -= utf8SequenceLength; // return to the illegal value itself
                     result = sourceIllegal;
                     break;
-                } else
+                } else {
                     *target++ = replacementCharacter;
-            } else
+                    orAllData |= replacementCharacter;
+                }
+            } else {
                 *target++ = character; // normal case
+                orAllData |= character;
+            }
         } else if (U_IS_SUPPLEMENTARY(character)) {
             // target is a character in range 0xFFFF - 0x10FFFF
             if (target + 1 >= targetEnd) {
@@ -342,17 +347,24 @@
             }
             *target++ = U16_LEAD(character);
             *target++ = U16_TRAIL(character);
+            orAllData = 0xffff;
         } else {
             if (strict) {
                 source -= utf8SequenceLength; // return to the start
                 result = sourceIllegal;
                 break; // Bail out; shouldn't continue
-            } else
+            } else {
                 *target++ = replacementCharacter;
+                orAllData |= replacementCharacter;
+            }
         }
     }
     *sourceStart = source;
     *targetStart = target;
+
+    if (sourceAllASCII)
+        *sourceAllASCII = !(orAllData & 0x7f);
+
     return result;
 }
 

Modified: trunk/Source/WTF/wtf/unicode/UTF8.h (131835 => 131836)


--- trunk/Source/WTF/wtf/unicode/UTF8.h	2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/WTF/wtf/unicode/UTF8.h	2012-10-19 01:22:21 UTC (rev 131836)
@@ -64,7 +64,7 @@
 
     WTF_EXPORT_PRIVATE ConversionResult convertUTF8ToUTF16(
                     const char** sourceStart, const char* sourceEnd, 
-                    UChar** targetStart, UChar* targetEnd, bool strict = true);
+                    UChar** targetStart, UChar* targetEnd, bool* isSourceAllASCII = 0, bool strict = true);
 
     WTF_EXPORT_PRIVATE ConversionResult convertLatin1ToUTF8(
                     const LChar** sourceStart, const LChar* sourceEnd, 
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to