Title: [102095] trunk/Source/WebCore
Revision
102095
Author
[email protected]
Date
2011-12-05 22:13:01 -0800 (Mon, 05 Dec 2011)

Log Message

Could save a lot of memory in CharacterData by not always storing a String
https://bugs.webkit.org/show_bug.cgi?id=72404

Reviewed by Ojan Vafai.

When a Text node is created by the parser we check if the string is all whitespace
and if so we put it in the AtomicString table so that all future identical whitespace
strings can share the StringImpl.

No new tests. Covered by existing tests.

* html/parser/HTMLConstructionSite.cpp:
(WebCore::HTMLNames::isAllWhitespace):
(WebCore::HTMLConstructionSite::insertTextNode):

    If we do not know whether the string is all whitespace this now checks the string
    If the string is all whitespace we create an AtomicString for it.

* html/parser/HTMLConstructionSite.h:
* html/parser/HTMLTreeBuilder.cpp:
(WebCore::HTMLTreeBuilder::ExternalCharacterTokenBuffer::skipLeadingNonWhitespace): We never cared about the return value here.
(WebCore::HTMLTreeBuilder::processCharacterBuffer): Pass WhitespaceMode in the case we know whether the string is all whitespace or not.
(WebCore::HTMLTreeBuilder::defaultForInTableText): Ditto.

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (102094 => 102095)


--- trunk/Source/WebCore/ChangeLog	2011-12-06 06:10:15 UTC (rev 102094)
+++ trunk/Source/WebCore/ChangeLog	2011-12-06 06:13:01 UTC (rev 102095)
@@ -1,3 +1,29 @@
+2011-12-05  Erik Arvidsson  <[email protected]>
+
+        Could save a lot of memory in CharacterData by not always storing a String
+        https://bugs.webkit.org/show_bug.cgi?id=72404
+
+        Reviewed by Ojan Vafai.
+
+        When a Text node is created by the parser we check if the string is all whitespace
+        and if so we put it in the AtomicString table so that all future identical whitespace
+        strings can share the StringImpl.
+
+        No new tests. Covered by existing tests.
+
+        * html/parser/HTMLConstructionSite.cpp:
+        (WebCore::HTMLNames::isAllWhitespace):
+        (WebCore::HTMLConstructionSite::insertTextNode):
+
+            If we do not know whether the string is all whitespace this now checks the string
+            If the string is all whitespace we create an AtomicString for it.
+
+        * html/parser/HTMLConstructionSite.h:
+        * html/parser/HTMLTreeBuilder.cpp:
+        (WebCore::HTMLTreeBuilder::ExternalCharacterTokenBuffer::skipLeadingNonWhitespace): We never cared about the return value here.
+        (WebCore::HTMLTreeBuilder::processCharacterBuffer): Pass WhitespaceMode in the case we know whether the string is all whitespace or not.
+        (WebCore::HTMLTreeBuilder::defaultForInTableText): Ditto.
+
 2011-12-05  Benjamin Poulain  <[email protected]>
 
         Update KURL's copy copyASCII to avoid String::characters()

Modified: trunk/Source/WebCore/html/parser/HTMLConstructionSite.cpp (102094 => 102095)


--- trunk/Source/WebCore/html/parser/HTMLConstructionSite.cpp	2011-12-06 06:10:15 UTC (rev 102094)
+++ trunk/Source/WebCore/html/parser/HTMLConstructionSite.cpp	2011-12-06 06:13:01 UTC (rev 102095)
@@ -37,6 +37,7 @@
 #include "HTMLFormElement.h"
 #include "HTMLHtmlElement.h"
 #include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
 #include "HTMLScriptElement.h"
 #include "HTMLToken.h"
 #include "HTMLTokenizer.h"
@@ -79,6 +80,11 @@
         || tagName == trTag;
 }
 
+inline bool isAllWhitespace(const String& string)
+{
+    return string.isAllSpecialCharacters<isHTMLSpace>();
+}
+
 } // namespace
 
 template<typename ChildType>
@@ -332,7 +338,7 @@
         m_openElements.push(element);
 }
 
-void HTMLConstructionSite::insertTextNode(const String& characters)
+void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMode whitespaceMode)
 {
     AttachmentSite site;
     site.parent = currentNode();
@@ -340,6 +346,11 @@
     if (shouldFosterParent())
         findFosterSite(site);
 
+    // Strings composed entirely of whitespace are likely to be repeated.
+    // Turn them into AtomicString so we share a single string for each.
+    bool shouldUseAtomicString = whitespaceMode == AllWhitespace
+        || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(characters));
+
     unsigned currentPosition = 0;
 
     // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
@@ -354,10 +365,12 @@
     }
 
     while (currentPosition < characters.length()) {
-        RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition);
+        RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition);
         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
-        if (!textNode->length())
-            textNode = Text::create(site.parent->document(), characters.substring(currentPosition));
+        if (!textNode->length()) {
+            String substring = characters.substring(currentPosition);
+            textNode = Text::create(site.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring);
+        }
 
         currentPosition += textNode->length();
         ASSERT(currentPosition <= characters.length());

Modified: trunk/Source/WebCore/html/parser/HTMLConstructionSite.h (102094 => 102095)


--- trunk/Source/WebCore/html/parser/HTMLConstructionSite.h	2011-12-06 06:10:15 UTC (rev 102094)
+++ trunk/Source/WebCore/html/parser/HTMLConstructionSite.h	2011-12-06 06:13:01 UTC (rev 102095)
@@ -37,6 +37,12 @@
 
 namespace WebCore {
 
+enum WhitespaceMode {
+    AllWhitespace,
+    NotAllWhitespace,
+    WhitespaceUnknown
+};
+
 class AtomicHTMLToken;
 class Document;
 class Element;
@@ -61,7 +67,7 @@
     void insertHTMLBodyElement(AtomicHTMLToken&);
     void insertHTMLFormElement(AtomicHTMLToken&, bool isDemoted = false);
     void insertScriptElement(AtomicHTMLToken&);
-    void insertTextNode(const String&);
+    void insertTextNode(const String&, WhitespaceMode = WhitespaceUnknown);
     void insertForeignElement(AtomicHTMLToken&, const AtomicString& namespaceURI);
 
     void insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken&);

Modified: trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp (102094 => 102095)


--- trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp	2011-12-06 06:10:15 UTC (rev 102094)
+++ trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp	2011-12-06 06:13:01 UTC (rev 102095)
@@ -278,9 +278,9 @@
         return takeLeading<isHTMLSpace>();
     }
 
-    String takeLeadingNonWhitespace()
+    void skipLeadingNonWhitespace()
     {
-        return takeLeading<isNotHTMLSpace>();
+        skipLeading<isNotHTMLSpace>();
     }
 
     String takeRemaining()
@@ -2464,7 +2464,7 @@
         ASSERT(insertionMode() == InHeadMode);
         String leadingWhitespace = buffer.takeLeadingWhitespace();
         if (!leadingWhitespace.isEmpty())
-            m_tree.insertTextNode(leadingWhitespace);
+            m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
         if (buffer.isEmpty())
             return;
         defaultForInHead();
@@ -2474,7 +2474,7 @@
         ASSERT(insertionMode() == AfterHeadMode);
         String leadingWhitespace = buffer.takeLeadingWhitespace();
         if (!leadingWhitespace.isEmpty())
-            m_tree.insertTextNode(leadingWhitespace);
+            m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
         if (buffer.isEmpty())
             return;
         defaultForAfterHead();
@@ -2509,13 +2509,13 @@
         ASSERT(insertionMode() == InColumnGroupMode);
         String leadingWhitespace = buffer.takeLeadingWhitespace();
         if (!leadingWhitespace.isEmpty())
-            m_tree.insertTextNode(leadingWhitespace);
+            m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
         if (buffer.isEmpty())
             return;
         if (!processColgroupEndTagForInColumnGroup()) {
             ASSERT(isParsingFragment());
             // The spec tells us to drop these characters on the floor.
-            buffer.takeLeadingNonWhitespace();
+            buffer.skipLeadingNonWhitespace();
             if (buffer.isEmpty())
                 return;
         }
@@ -2540,7 +2540,7 @@
         ASSERT(insertionMode() == InHeadNoscriptMode);
         String leadingWhitespace = buffer.takeLeadingWhitespace();
         if (!leadingWhitespace.isEmpty())
-            m_tree.insertTextNode(leadingWhitespace);
+            m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
         if (buffer.isEmpty())
             return;
         defaultForInHeadNoscript();
@@ -2552,7 +2552,7 @@
         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
         String leadingWhitespace = buffer.takeRemainingWhitespace();
         if (!leadingWhitespace.isEmpty())
-            m_tree.insertTextNode(leadingWhitespace);
+            m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
         // FIXME: We should generate a parse error if we skipped over any
         // non-whitespace characters.
         break;
@@ -2575,7 +2575,7 @@
         String leadingWhitespace = buffer.takeRemainingWhitespace();
         if (!leadingWhitespace.isEmpty()) {
             m_tree.reconstructTheActiveFormattingElements();
-            m_tree.insertTextNode(leadingWhitespace);
+            m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
         }
         // FIXME: We should generate a parse error if we skipped over any
         // non-whitespace characters.
@@ -2727,7 +2727,7 @@
         // FIXME: parse error
         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
         m_tree.reconstructTheActiveFormattingElements();
-        m_tree.insertTextNode(characters);
+        m_tree.insertTextNode(characters, NotAllWhitespace);
         m_framesetOk = false;
         setInsertionMode(m_originalInsertionMode);
         prepareToReprocessToken();
_______________________________________________
webkit-changes mailing list
[email protected]
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to