Diff
Modified: trunk/LayoutTests/ChangeLog (103245 => 103246)
--- trunk/LayoutTests/ChangeLog 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/LayoutTests/ChangeLog 2011-12-19 18:30:20 UTC (rev 103246)
@@ -1,3 +1,15 @@
+2011-12-19 Adam Barth <aba...@webkit.org>
+
+ WebKit should support HTML entities that expand to more than one character
+ https://bugs.webkit.org/show_bug.cgi?id=74826
+
+ Reviewed by Darin Adler.
+
+ Show test progression.
+
+ * html5lib/runner-expected.txt:
+ * platform/chromium/html5lib/runner-expected.txt:
+
2011-12-19 Eric Carlson <eric.carl...@apple.com>
Render text tracks
Modified: trunk/LayoutTests/html5lib/runner-expected.txt (103245 => 103246)
--- trunk/LayoutTests/html5lib/runner-expected.txt 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/LayoutTests/html5lib/runner-expected.txt 2011-12-19 18:30:20 UTC (rev 103246)
@@ -166,103 +166,8 @@
resources/tests23.dat: PASS
-resources/tests24.dat:
-1
-2
-3
-4
-5
-6
+resources/tests24.dat: PASS
-Test 1 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>≂̸
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸"
-
-Test 2 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>≂̸A
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸A"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸A"
-
-Test 3 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>  
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "  "
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " "
-
-Test 4 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>  A
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "  A"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " A"
-
-Test 5 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>⊂⃒
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒"
-
-Test 6 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>⊂⃒A
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒A"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒A"
resources/tests25.dat: PASS
resources/tests26.dat: PASS
Modified: trunk/LayoutTests/platform/chromium/html5lib/runner-expected.txt (103245 => 103246)
--- trunk/LayoutTests/platform/chromium/html5lib/runner-expected.txt 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/LayoutTests/platform/chromium/html5lib/runner-expected.txt 2011-12-19 18:30:20 UTC (rev 103246)
@@ -166,103 +166,8 @@
resources/tests23.dat: PASS
-resources/tests24.dat:
-1
-2
-3
-4
-5
-6
+resources/tests24.dat: PASS
-Test 1 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>≂̸
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸"
-
-Test 2 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>≂̸A
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸A"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "≂̸A"
-
-Test 3 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>  
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "  "
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " "
-
-Test 4 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>  A
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "  A"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| " A"
-
-Test 5 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>⊂⃒
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒"
-
-Test 6 of 8 in resources/tests24.dat failed. Input:
-<!DOCTYPE html>⊂⃒A
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒A"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| "⊂⃒A"
resources/tests25.dat: PASS
resources/tests26.dat: PASS
Modified: trunk/Source/WebCore/ChangeLog (103245 => 103246)
--- trunk/Source/WebCore/ChangeLog 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/ChangeLog 2011-12-19 18:30:20 UTC (rev 103246)
@@ -1,3 +1,55 @@
+2011-12-19 Adam Barth <aba...@webkit.org>
+
+ WebKit should support HTML entities that expand to more than one character
+ https://bugs.webkit.org/show_bug.cgi?id=74826
+
+ Reviewed by Darin Adler.
+
+ Tests: html5lib/runner.html
+
+ * html/parser/HTMLEntityNames.in:
+ - Add missing HTML entities from HTML5 spec. I'll sort this file
+ in a followup patch. (It's not quite sorted perfectly and
+ sorting in this patch would introduce noise into the patch.)
+ * html/parser/HTMLEntityParser.cpp:
+ (WebCore::decodeNamedEntity):
+ - convertToUTF16 always returns true, so make it return void instead.
+ - Teach the entity parse that some entities expand to two characters.
+ * html/parser/HTMLEntityParser.h:
+ - Add a warning that decodeNamedEntity is really a broken API.
+ - This patch doesn't actually change any behavior of this API, but
+ it does illustrate that the two callers of this API (the two XML
+ parsers) really need to move a more sensible API.
+ * html/parser/HTMLEntitySearch.cpp:
+ (WebCore::HTMLEntitySearch::HTMLEntitySearch):
+ (WebCore::HTMLEntitySearch::advance):
+ * html/parser/HTMLEntitySearch.h:
+ (WebCore::HTMLEntitySearch::fail):
+ - Remove the concept of currentValue. This isn't really used for
+ anything and conflicts with the idea that entities can expand
+ to more than one character.
+ * html/parser/HTMLEntityTable.h:
+ - Add storage for two UChar32 values per entity.
+ * html/parser/create-html-entity-table:
+ (convert_value_to_int):
+ - Teach this script to handle entities that expand to multiple
+ Unicode characters.
+ * xml/parser/CharacterReferenceParserInlineMethods.h:
+ (WebCore::consumeCharacterReference):
+ - Update this function now that convertToUTF16 returns void.
+ * xml/parser/XMLCharacterReferenceParser.cpp:
+ - The XML version of convertToUTF16 also needs to return void to
+ match the HTML signature. (It used to return true all the time
+ as well.)
+ * xml/parser/XMLTreeBuilder.cpp:
+ (WebCore::XMLTreeBuilder::processHTMLEntity):
+ - Update this caller use leftValue instead of value. My sense is
+ that this code is moderately broken today because it's using HTML
+ entities in parsing XML. I've added a FIXME. This code is
+ disabled in all builds, so I don't feel a big need to fix this
+ issue in this patch. We should either finish this project or
+ delete this complexity from the project.
+
2011-12-19 Andreas Kling <kl...@webkit.org>
Avoid instantiating ScrollAnimators when possible.
Modified: trunk/Source/WebCore/html/parser/HTMLEntityNames.in (103245 => 103246)
--- trunk/Source/WebCore/html/parser/HTMLEntityNames.in 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/html/parser/HTMLEntityNames.in 2011-12-19 18:30:20 UTC (rev 103246)
@@ -1814,8 +1814,8 @@
"shcy;","U+00448"
"shortmid;","U+02223"
"shortparallel;","U+02225"
-"shy;","U+000AD "
-"shy","U+000AD "
+"shy;","U+000AD"
+"shy","U+000AD"
"sigma;","U+003C3"
"sigmaf;","U+003C2"
"sigmav;","U+003C2"
@@ -2136,3 +2136,96 @@
"zscr;","U+1D4CF"
"zwj;","U+0200D"
"zwnj;","U+0200C"
+"acE;","U+0223E U+00333"
+"bne;","U+0003D U+020E5"
+"bnequiv;","U+02261 U+020E5"
+"caps;","U+02229 U+0FE00"
+"cups;","U+0222A U+0FE00"
+"fjlig;","U+00066 U+0006A"
+"gesl;","U+022DB U+0FE00"
+"gvertneqq;","U+02269 U+0FE00"
+"gvnE;","U+02269 U+0FE00"
+"lates;","U+02AAD U+0FE00"
+"lesg;","U+022DA U+0FE00"
+"lvertneqq;","U+02268 U+0FE00"
+"lvnE;","U+02268 U+0FE00"
+"nang;","U+02220 U+020D2"
+"napE;","U+02A70 U+00338"
+"napid;","U+0224B U+00338"
+"nbump;","U+0224E U+00338"
+"nbumpe;","U+0224F U+00338"
+"ncongdot;","U+02A6D U+00338"
+"nedot;","U+02250 U+00338"
+"nesim;","U+02242 U+00338"
+"ngE;","U+02267 U+00338"
+"ngeqq;","U+02267 U+00338"
+"ngeqslant;","U+02A7E U+00338"
+"nges;","U+02A7E U+00338"
+"nGg;","U+022D9 U+00338"
+"nGt;","U+0226B U+020D2"
+"nGtv;","U+0226B U+00338"
+"nlE;","U+02266 U+00338"
+"nleqq;","U+02266 U+00338"
+"nleqslant;","U+02A7D U+00338"
+"nles;","U+02A7D U+00338"
+"nLl;","U+022D8 U+00338"
+"nLt;","U+0226A U+020D2"
+"nLtv;","U+0226A U+00338"
+"NotEqualTilde;","U+02242 U+00338"
+"NotGreaterFullEqual;","U+02267 U+00338"
+"NotGreaterGreater;","U+0226B U+00338"
+"NotGreaterSlantEqual;","U+02A7E U+00338"
+"NotHumpDownHump;","U+0224E U+00338"
+"NotHumpEqual;","U+0224F U+00338"
+"notindot;","U+022F5 U+00338"
+"notinE;","U+022F9 U+00338"
+"NotLeftTriangleBar;","U+029CF U+00338"
+"NotLessLess;","U+0226A U+00338"
+"NotLessSlantEqual;","U+02A7D U+00338"
+"NotNestedGreaterGreater;","U+02AA2 U+00338"
+"NotNestedLessLess;","U+02AA1 U+00338"
+"NotPrecedesEqual;","U+02AAF U+00338"
+"NotRightTriangleBar;","U+029D0 U+00338"
+"NotSquareSubset;","U+0228F U+00338"
+"NotSquareSuperset;","U+02290 U+00338"
+"NotSubset;","U+02282 U+020D2"
+"NotSucceedsEqual;","U+02AB0 U+00338"
+"NotSucceedsTilde;","U+0227F U+00338"
+"NotSuperset;","U+02283 U+020D2"
+"nparsl;","U+02AFD U+020E5"
+"npart;","U+02202 U+00338"
+"npre;","U+02AAF U+00338"
+"npreceq;","U+02AAF U+00338"
+"nrarrc;","U+02933 U+00338"
+"nrarrw;","U+0219D U+00338"
+"nsce;","U+02AB0 U+00338"
+"nsubE;","U+02AC5 U+00338"
+"nsubset;","U+02282 U+020D2"
+"nsubseteqq;","U+02AC5 U+00338"
+"nsucceq;","U+02AB0 U+00338"
+"nsupE;","U+02AC6 U+00338"
+"nsupset;","U+02283 U+020D2"
+"nsupseteqq;","U+02AC6 U+00338"
+"nvap;","U+0224D U+020D2"
+"nvge;","U+02265 U+020D2"
+"nvgt;","U+0003E U+020D2"
+"nvle;","U+02264 U+020D2"
+"nvlt;","U+0003C U+020D2"
+"nvltrie;","U+022B4 U+020D2"
+"nvrtrie;","U+022B5 U+020D2"
+"nvsim;","U+0223C U+020D2"
+"race;","U+0223D U+00331"
+"smtes;","U+02AAC U+0FE00"
+"sqcaps;","U+02293 U+0FE00"
+"sqcups;","U+02294 U+0FE00"
+"ThickSpace;","U+0205F U+0200A"
+"varsubsetneq;","U+0228A U+0FE00"
+"varsubsetneqq;","U+02ACB U+0FE00"
+"varsupsetneq;","U+0228B U+0FE00"
+"varsupsetneqq;","U+02ACC U+0FE00"
+"vnsub;","U+02282 U+020D2"
+"vnsup;","U+02283 U+020D2"
+"vsubnE;","U+02ACB U+0FE00"
+"vsubne;","U+0228A U+0FE00"
+"vsupnE;","U+02ACC U+0FE00"
+"vsupne;","U+0228B U+0FE00"
Modified: trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp (103245 => 103246)
--- trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp 2011-12-19 18:30:20 UTC (rev 103246)
@@ -70,17 +70,16 @@
return value;
}
- inline static bool convertToUTF16(UChar32 value, StringBuilder& decodedEntity)
+ inline static void convertToUTF16(UChar32 value, StringBuilder& decodedEntity)
{
if (U_IS_BMP(value)) {
UChar character = static_cast<UChar>(value);
ASSERT(character == value);
decodedEntity.append(character);
- return true;
+ return;
}
decodedEntity.append(U16_LEAD(value));
decodedEntity.append(U16_TRAIL(value));
- return true;
}
inline static bool acceptMalformed() { return true; }
@@ -105,7 +104,6 @@
return false;
}
if (!entitySearch.mostRecentMatch()) {
- ASSERT(!entitySearch.currentValue());
unconsumeCharacters(source, consumedCharacters);
return false;
}
@@ -129,7 +127,10 @@
if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
|| !additionalAllowedCharacter
|| !(isAlphaNumeric(cc) || cc == '=')) {
- return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity);
+ convertToUTF16(entitySearch.mostRecentMatch()->firstValue, decodedEntity);
+ if (entitySearch.mostRecentMatch()->secondValue)
+ convertToUTF16(entitySearch.mostRecentMatch()->secondValue, decodedEntity);
+ return true;
}
unconsumeCharacters(source, consumedCharacters);
return false;
@@ -138,7 +139,6 @@
}
-
bool consumeHTMLEntity(SegmentedString& source, StringBuilder& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
{
return consumeCharacterReference<HTMLEntityParser>(source, decodedEntity, notEnoughCharacters, additionalAllowedCharacter);
@@ -153,14 +153,16 @@
return 0;
}
search.advance(';');
- UChar32 entityValue = search.currentValue();
- if (U16_LENGTH(entityValue) != 1) {
- // Callers need to move off this API if the entity table has values
- // which do no fit in a 16 bit UChar!
- ASSERT_NOT_REACHED();
+ if (!search.isEntityPrefix())
return 0;
+
+ UChar32 firstValue = search.mostRecentMatch()->firstValue;
+ if (U16_LENGTH(firstValue) != 1 || search.mostRecentMatch()->secondValue) {
+ // FIXME: Callers need to move off this API. Not all entities can be
+ // represented in a single UChar!
+ return 0;
}
- return static_cast<UChar>(entityValue);
+ return static_cast<UChar>(firstValue);
}
} // namespace WebCore
Modified: trunk/Source/WebCore/html/parser/HTMLEntityParser.h (103245 => 103246)
--- trunk/Source/WebCore/html/parser/HTMLEntityParser.h 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/html/parser/HTMLEntityParser.h 2011-12-19 18:30:20 UTC (rev 103246)
@@ -34,6 +34,7 @@
bool consumeHTMLEntity(SegmentedString&, StringBuilder& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter = '\0');
// Used by the XML parser. Not suitable for use in HTML parsing. Use consumeHTMLEntity instead.
+// FIXME: Move the XML parser to an entity decoding function works for non-BMP characters!
UChar decodeNamedEntity(const char*);
}
Modified: trunk/Source/WebCore/html/parser/HTMLEntitySearch.cpp (103245 => 103246)
--- trunk/Source/WebCore/html/parser/HTMLEntitySearch.cpp 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/html/parser/HTMLEntitySearch.cpp 2011-12-19 18:30:20 UTC (rev 103246)
@@ -41,7 +41,6 @@
HTMLEntitySearch::HTMLEntitySearch()
: m_currentLength(0)
- , m_currentValue(0)
, m_mostRecentMatch(0)
, m_first(HTMLEntityTable::firstEntry())
, m_last(HTMLEntityTable::lastEntry())
@@ -124,11 +123,9 @@
}
++m_currentLength;
if (m_first->length != m_currentLength) {
- m_currentValue = 0;
return;
}
m_mostRecentMatch = m_first;
- m_currentValue = m_mostRecentMatch->value;
}
}
Modified: trunk/Source/WebCore/html/parser/HTMLEntitySearch.h (103245 => 103246)
--- trunk/Source/WebCore/html/parser/HTMLEntitySearch.h 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/html/parser/HTMLEntitySearch.h 2011-12-19 18:30:20 UTC (rev 103246)
@@ -39,7 +39,6 @@
void advance(UChar);
bool isEntityPrefix() const { return !!m_first; }
- UChar32 currentValue() const { return m_currentValue; }
int currentLength() const { return m_currentLength; }
const HTMLEntityTableEntry* mostRecentMatch() const { return m_mostRecentMatch; }
@@ -57,13 +56,11 @@
void fail()
{
- m_currentValue = 0;
m_first = 0;
m_last = 0;
}
int m_currentLength;
- UChar32 m_currentValue;
const HTMLEntityTableEntry* m_mostRecentMatch;
const HTMLEntityTableEntry* m_first;
Modified: trunk/Source/WebCore/html/parser/HTMLEntityTable.h (103245 => 103246)
--- trunk/Source/WebCore/html/parser/HTMLEntityTable.h 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/html/parser/HTMLEntityTable.h 2011-12-19 18:30:20 UTC (rev 103246)
@@ -35,7 +35,8 @@
const UChar* entity;
int length;
- UChar32 value;
+ UChar32 firstValue;
+ UChar32 secondValue;
};
class HTMLEntityTable {
Modified: trunk/Source/WebCore/html/parser/create-html-entity-table (103245 => 103246)
--- trunk/Source/WebCore/html/parser/create-html-entity-table 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/html/parser/create-html-entity-table 2011-12-19 18:30:20 UTC (rev 103246)
@@ -47,6 +47,8 @@
def convert_value_to_int(value):
+ if not value:
+ return "0";
assert(value[0] == "U")
assert(value[1] == "+")
return "0x" + value[2:]
@@ -124,10 +126,13 @@
letter = entry[ENTITY][0]
if not index.get(letter):
index[letter] = offset
- output_file.write(' { %s, %s, %s },\n' % (
+ values = entry[VALUE].split(' ')
+ assert len(values) <= 2, values
+ output_file.write(' { %s, %s, %s, %s },\n' % (
convert_entity_to_cpp_name(entry[ENTITY]),
len(entry[ENTITY]),
- convert_value_to_int(entry[VALUE])))
+ convert_value_to_int(values[0]),
+ convert_value_to_int(values[1] if len(values) >= 2 else "")))
offset += 1
output_file.write("""};
Modified: trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlineMethods.h (103245 => 103246)
--- trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlineMethods.h 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlineMethods.h 2011-12-19 18:30:20 UTC (rev 103246)
@@ -128,10 +128,12 @@
result = result * 16 + 10 + cc - 'A';
else if (cc == ';') {
source.advanceAndASSERT(cc);
- return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
- } else if (ParserFunctions::acceptMalformed())
- return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
- else {
+ ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ return true;
+ } else if (ParserFunctions::acceptMalformed()) {
+ ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ return true;
+ } else {
unconsumeCharacters(source, consumedCharacters);
return false;
}
@@ -142,10 +144,12 @@
result = result * 10 + cc - '0';
else if (cc == ';') {
source.advanceAndASSERT(cc);
- return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
- } else if (ParserFunctions::acceptMalformed())
- return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
- else {
+ ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ return true;
+ } else if (ParserFunctions::acceptMalformed()) {
+ ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ return true;
+ } else {
unconsumeCharacters(source, consumedCharacters);
return false;
}
Modified: trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.cpp (103245 => 103246)
--- trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.cpp 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.cpp 2011-12-19 18:30:20 UTC (rev 103246)
@@ -46,17 +46,16 @@
return value;
}
- inline static bool convertToUTF16(UChar32 value, StringBuilder& decodedCharacter)
+ inline static void convertToUTF16(UChar32 value, StringBuilder& decodedCharacter)
{
if (U_IS_BMP(value)) {
- UChar character = static_cast<UChar>(value);
- ASSERT(character == value);
- decodedCharacter.append(character);
- return true;
+ UChar character = static_cast<UChar>(value);
+ ASSERT(character == value);
+ decodedCharacter.append(character);
+ return;
}
decodedCharacter.append(U16_LEAD(value));
decodedCharacter.append(U16_TRAIL(value));
- return true;
}
inline static bool acceptMalformed() { return false; }
Modified: trunk/Source/WebCore/xml/parser/XMLTreeBuilder.cpp (103245 => 103246)
--- trunk/Source/WebCore/xml/parser/XMLTreeBuilder.cpp 2011-12-19 18:11:20 UTC (rev 103245)
+++ trunk/Source/WebCore/xml/parser/XMLTreeBuilder.cpp 2011-12-19 18:30:20 UTC (rev 103246)
@@ -33,7 +33,9 @@
#include "DocumentFragment.h"
#include "DocumentType.h"
#include "Frame.h"
+// FIXME: Why are we including HTML entity information in the XML parser?
#include "HTMLEntitySearch.h"
+#include "HTMLEntityTable.h"
#include "NewXMLDocumentParser.h"
#include "ProcessingInstruction.h"
#include "ScriptElement.h"
@@ -370,9 +372,19 @@
}
}
search.advance(';');
- UChar32 entityValue = search.currentValue();
+ if (!search.isEntityPrefix()) {
+ m_parser->stopParsing();
+ return;
+ }
+ UChar32 entityValue = search.mostRecentMatch()->firstValue;
+ // FIXME: We need to account for secondValue if any XML entities are longer
+ // than one unicode character.
+ ASSERT_NOT_REACHED();
+ // Darin Adler writes:
+ // You can see given the code above that this else is dead code. This code is in a strange state.
+ // And the reinterpret_cast to UChar* makes the code little-endian-specific. That is not good!
if (entityValue <= 0xFFFF)
- appendToText(reinterpret_cast<UChar*>(&entityValue), 1);
+ appendToText(reinterpret_cast<UChar*>(&entityValue), 1);
else {
UChar utf16Pair[2] = { U16_LEAD(entityValue), U16_TRAIL(entityValue) };
appendToText(utf16Pair, 2);