- Revision
- 131836
- Author
- msab...@apple.com
- Date
- 2012-10-18 18:22:21 -0700 (Thu, 18 Oct 2012)
Log Message
convertUTF8ToUTF16() Should Check for ASCII Input
ihttps://bugs.webkit.org/show_bug.cgi?id=99739
Reviewed by Geoffrey Garen.
Source/_javascript_Core:
Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to
create a string using the 8 bit source. Added a new OpaqueJSString::create(LChar*, unsigned).
Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call.
* API/JSStringRef.cpp:
(JSStringCreateWithUTF8CString):
* API/JSStringRefCF.cpp:
(JSStringCreateWithCFString):
* API/OpaqueJSString.h:
(OpaqueJSString::create):
(OpaqueJSString):
(OpaqueJSString::OpaqueJSString):
Source/WTF:
Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion. This is
used to check to see if all characters are ASCII and is returned via a bool*.
* wtf/unicode/UTF8.cpp:
(WTF::Unicode::convertUTF8ToUTF16):
* wtf/unicode/UTF8.h:
Modified Paths
Diff
Modified: trunk/Source/_javascript_Core/API/JSStringRef.cpp (131835 => 131836)
--- trunk/Source/_javascript_Core/API/JSStringRef.cpp 2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/API/JSStringRef.cpp 2012-10-19 01:22:21 UTC (rev 131836)
@@ -46,8 +46,12 @@
size_t length = strlen(string);
Vector<UChar, 1024> buffer(length);
UChar* p = buffer.data();
- if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length))
+ bool sourceIsAllASCII;
+ if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length, &sourceIsAllASCII)) {
+ if (sourceIsAllASCII)
+ return OpaqueJSString::create(reinterpret_cast<const LChar*>(string), length).leakRef();
return OpaqueJSString::create(buffer.data(), p - buffer.data()).leakRef();
+ }
}
// Null string.
Modified: trunk/Source/_javascript_Core/API/JSStringRefCF.cpp (131835 => 131836)
--- trunk/Source/_javascript_Core/API/JSStringRefCF.cpp 2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/API/JSStringRefCF.cpp 2012-10-19 01:22:21 UTC (rev 131836)
@@ -46,7 +46,7 @@
COMPILE_ASSERT(sizeof(UniChar) == sizeof(UChar), unichar_and_uchar_must_be_same_size);
return OpaqueJSString::create(reinterpret_cast<UChar*>(buffer.get()), length).leakRef();
} else {
- return OpaqueJSString::create(0, 0).leakRef();
+ return OpaqueJSString::create(static_cast<const LChar*>(0), 0).leakRef();
}
}
Modified: trunk/Source/_javascript_Core/API/OpaqueJSString.h (131835 => 131836)
--- trunk/Source/_javascript_Core/API/OpaqueJSString.h 2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/API/OpaqueJSString.h 2012-10-19 01:22:21 UTC (rev 131836)
@@ -41,6 +41,11 @@
return adoptRef(new OpaqueJSString);
}
+ static PassRefPtr<OpaqueJSString> create(const LChar* characters, unsigned length)
+ {
+ return adoptRef(new OpaqueJSString(characters, length));
+ }
+
static PassRefPtr<OpaqueJSString> create(const UChar* characters, unsigned length)
{
return adoptRef(new OpaqueJSString(characters, length));
@@ -70,6 +75,11 @@
m_string = String(string.characters16(), string.length());
}
+ OpaqueJSString(const LChar* characters, unsigned length)
+ {
+ m_string = String(characters, length);
+ }
+
OpaqueJSString(const UChar* characters, unsigned length)
{
m_string = String(characters, length);
Modified: trunk/Source/_javascript_Core/ChangeLog (131835 => 131836)
--- trunk/Source/_javascript_Core/ChangeLog 2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/_javascript_Core/ChangeLog 2012-10-19 01:22:21 UTC (rev 131836)
@@ -1,3 +1,23 @@
+2012-10-18 Michael Saboff <msab...@apple.com>
+
+ convertUTF8ToUTF16() Should Check for ASCII Input
+ ihttps://bugs.webkit.org/show_bug.cgi?id=99739
+
+ Reviewed by Geoffrey Garen.
+
+ Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to
+ create a string using the 8 bit source. Added a new OpaqueJSString::create(LChar*, unsigned).
+ Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call.
+
+ * API/JSStringRef.cpp:
+ (JSStringCreateWithUTF8CString):
+ * API/JSStringRefCF.cpp:
+ (JSStringCreateWithCFString):
+ * API/OpaqueJSString.h:
+ (OpaqueJSString::create):
+ (OpaqueJSString):
+ (OpaqueJSString::OpaqueJSString):
+
2012-10-18 Oliver Hunt <oli...@apple.com>
Unbreak jsc tests. Last minute "clever"-ness is clearly just not
Modified: trunk/Source/WTF/ChangeLog (131835 => 131836)
--- trunk/Source/WTF/ChangeLog 2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/WTF/ChangeLog 2012-10-19 01:22:21 UTC (rev 131836)
@@ -1,5 +1,19 @@
2012-10-18 Michael Saboff <msab...@apple.com>
+ convertUTF8ToUTF16() Should Check for ASCII Input
+ ihttps://bugs.webkit.org/show_bug.cgi?id=99739
+
+ Reviewed by Geoffrey Garen.
+
+ Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion. This is
+ used to check to see if all characters are ASCII and is returned via a bool*.
+
+ * wtf/unicode/UTF8.cpp:
+ (WTF::Unicode::convertUTF8ToUTF16):
+ * wtf/unicode/UTF8.h:
+
+2012-10-18 Michael Saboff <msab...@apple.com>
+
Mac WTF build checks dependencies before copying header files
https://bugs.webkit.org/show_bug.cgi?id=99770
Modified: trunk/Source/WTF/wtf/unicode/UTF8.cpp (131835 => 131836)
--- trunk/Source/WTF/wtf/unicode/UTF8.cpp 2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/WTF/wtf/unicode/UTF8.cpp 2012-10-19 01:22:21 UTC (rev 131836)
@@ -297,11 +297,12 @@
ConversionResult convertUTF8ToUTF16(
const char** sourceStart, const char* sourceEnd,
- UChar** targetStart, UChar* targetEnd, bool strict)
+ UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict)
{
ConversionResult result = conversionOK;
const char* source = *sourceStart;
UChar* target = *targetStart;
+ UChar orAllData = 0;
while (source < sourceEnd) {
int utf8SequenceLength = inlineUTF8SequenceLength(*source);
if (sourceEnd - source < utf8SequenceLength) {
@@ -329,10 +330,14 @@
source -= utf8SequenceLength; // return to the illegal value itself
result = sourceIllegal;
break;
- } else
+ } else {
*target++ = replacementCharacter;
- } else
+ orAllData |= replacementCharacter;
+ }
+ } else {
*target++ = character; // normal case
+ orAllData |= character;
+ }
} else if (U_IS_SUPPLEMENTARY(character)) {
// target is a character in range 0xFFFF - 0x10FFFF
if (target + 1 >= targetEnd) {
@@ -342,17 +347,24 @@
}
*target++ = U16_LEAD(character);
*target++ = U16_TRAIL(character);
+ orAllData = 0xffff;
} else {
if (strict) {
source -= utf8SequenceLength; // return to the start
result = sourceIllegal;
break; // Bail out; shouldn't continue
- } else
+ } else {
*target++ = replacementCharacter;
+ orAllData |= replacementCharacter;
+ }
}
}
*sourceStart = source;
*targetStart = target;
+
+ if (sourceAllASCII)
+ *sourceAllASCII = !(orAllData & 0x7f);
+
return result;
}
Modified: trunk/Source/WTF/wtf/unicode/UTF8.h (131835 => 131836)
--- trunk/Source/WTF/wtf/unicode/UTF8.h 2012-10-19 01:13:41 UTC (rev 131835)
+++ trunk/Source/WTF/wtf/unicode/UTF8.h 2012-10-19 01:22:21 UTC (rev 131836)
@@ -64,7 +64,7 @@
WTF_EXPORT_PRIVATE ConversionResult convertUTF8ToUTF16(
const char** sourceStart, const char* sourceEnd,
- UChar** targetStart, UChar* targetEnd, bool strict = true);
+ UChar** targetStart, UChar* targetEnd, bool* isSourceAllASCII = 0, bool strict = true);
WTF_EXPORT_PRIVATE ConversionResult convertLatin1ToUTF8(
const LChar** sourceStart, const LChar* sourceEnd,