Diff
Modified: trunk/LayoutTests/ChangeLog (197533 => 197534)
--- trunk/LayoutTests/ChangeLog 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/LayoutTests/ChangeLog 2016-03-04 01:24:28 UTC (rev 197534)
@@ -1,5 +1,17 @@
-2016-03-03 Keith Miller <keith_mil...@apple.com>
+2016-03-03 Michael Saboff <msab...@apple.com>
+ [ES6] Make Unicode RegExp pattern parsing conform to the spec
+ https://bugs.webkit.org/show_bug.cgi?id=154988
+
+ Reviewed by Benjamin Poulain.
+
+ Added tests cases.
+
+ * js/regexp-unicode-expected.txt:
+ * js/script-tests/regexp-unicode.js:
+ (shouldThrowInvalidEscape):
+
+
[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877
Modified: trunk/LayoutTests/js/regexp-unicode-expected.txt (197533 => 197534)
--- trunk/LayoutTests/js/regexp-unicode-expected.txt 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/LayoutTests/js/regexp-unicode-expected.txt 2016-03-04 01:24:28 UTC (rev 197534)
@@ -3,19 +3,19 @@
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
-PASS "a".match(/a/)[0].length is 1
-PASS "a".match(/A/i)[0].length is 1
PASS "a".match(/a/u)[0].length is 1
+PASS "a".match(/A/ui)[0].length is 1
+PASS "a".match(/a/u)[0].length is 1
PASS "a".match(/A/iu)[0].length is 1
-PASS "Ȓ".match(/Ȓ/)[0].length is 1
PASS "Ȓ".match(/Ȓ/u)[0].length is 1
-PASS "ሴ".match(/ሴ/)[0].length is 1
+PASS "Ȓ".match(/Ȓ/u)[0].length is 1
PASS "ሴ".match(/ሴ/u)[0].length is 1
-PASS "⪼".match(/⪼/)[0].length is 1
+PASS "ሴ".match(/ሴ/u)[0].length is 1
+PASS "⪼".match(/⪼/u)[0].length is 1
PASS "㿭".match(/㿭/u)[0].length is 1
PASS "𒍅".match(/𒍅/u)[0].length is 2
PASS "𒍅".match(/𒍅/u)[0].length is 2
-PASS "𝌆".match(/𝌆/)[0].length is 2
+PASS "𝌆".match(/𝌆/u)[0].length is 2
PASS /𐑏/u.test("𐑏") is true
PASS /𐑏/u.test("𐑏") is true
PASS "𝌆".match(/𝌆/u)[0].length is 2
@@ -41,6 +41,8 @@
PASS "Ťx".match(/ťx/iu)[0].length is 2
PASS "𝌆".match(/^.$/u)[0].length is 2
PASS "It is 78°".match(/.*/u)[0].length is 9
+PASS stringWithDanglingFirstSurrogate.match(/.*/u)[0].length is 3
+PASS stringWithDanglingSecondSurrogate.match(/.*/u)[0].length is 3
PASS "𝌆".match(/[𝌆a]/)[0].length is 1
PASS "𝌆".match(/[a𝌆]/u)[0].length is 2
PASS "𝌆".match(/[𝌆a]/u)[0].length is 2
@@ -91,6 +93,22 @@
PASS /abc/ui.test("ẚbc") is true
PASS /texẗ/ui.test("text") is true
PASS /text/ui.test("ẗext") is true
+PASS /\u{1}/.test("u") is true
+PASS /\u{4}/.test("u") is false
+PASS /\u{4}/.test("uuuu") is true
+PASS "800-555-1212".match(/[0-9\-]*/u)[0].length is 12
+PASS "this is ba test".match(/is b\cha test/u)[0].length is 11
+PASS new RegExp("\\/", "u").source is "\\/"
+PASS r = new RegExp("\\u{110000}", "u") threw exception SyntaxError: Invalid regular _expression_: invalid unicode {} escape.
+PASS r = new RegExp("\\-", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("\\a", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("[\\a]", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("[\\b]", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("[\\B]", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("\\x", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("[\\x]", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("\\u", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
+PASS r = new RegExp("[\\u]", "u") threw exception SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern.
PASS successfullyParsed is true
TEST COMPLETE
Modified: trunk/LayoutTests/js/script-tests/regexp-unicode.js (197533 => 197534)
--- trunk/LayoutTests/js/script-tests/regexp-unicode.js 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/LayoutTests/js/script-tests/regexp-unicode.js 2016-03-04 01:24:28 UTC (rev 197534)
@@ -3,19 +3,19 @@
);
// Test \u{} escapes in a regular _expression_
-shouldBe('"a".match(/\u{61}/)[0].length', '1');
-shouldBe('"a".match(/\u{41}/i)[0].length', '1');
+shouldBe('"a".match(/\u{61}/u)[0].length', '1');
+shouldBe('"a".match(/\u{41}/ui)[0].length', '1');
shouldBe('"a".match(/\u{061}/u)[0].length', '1');
shouldBe('"a".match(/\u{041}/iu)[0].length', '1');
-shouldBe('"\u{212}".match(/\u{212}/)[0].length', '1');
+shouldBe('"\u{212}".match(/\u{212}/u)[0].length', '1');
shouldBe('"\u{212}".match(/\u{0212}/u)[0].length', '1');
-shouldBe('"\u{1234}".match(/\u{1234}/)[0].length', '1');
+shouldBe('"\u{1234}".match(/\u{1234}/u)[0].length', '1');
shouldBe('"\u{1234}".match(/\u{01234}/u)[0].length', '1');
-shouldBe('"\u{2abc}".match(/\u{2abc}/)[0].length', '1');
+shouldBe('"\u{2abc}".match(/\u{2abc}/u)[0].length', '1');
shouldBe('"\u{03fed}".match(/\u{03fed}/u)[0].length', '1');
shouldBe('"\u{12345}".match(/\u{12345}/u)[0].length', '2');
shouldBe('"\u{12345}".match(/\u{012345}/u)[0].length', '2');
-shouldBe('"\u{1d306}".match(/\u{1d306}/)[0].length', '2');
+shouldBe('"\u{1d306}".match(/\u{1d306}/u)[0].length', '2');
shouldBeTrue('/\u{1044f}/u.test("\ud801\udc4f")');
shouldBeTrue('/\ud801\udc4f/u.test("\u{1044f}")');
@@ -47,15 +47,16 @@
// Test . matches with Unicode flag
shouldBe('"\u{1D306}".match(/^.$/u)[0].length', '2');
shouldBe('"It is 78\u00B0".match(/.*/u)[0].length', '9');
-// FIXME: These tests are disabled until https://bugs.webkit.org/show_bug.cgi?id=154863 is fixed
-// shouldBe('"\ud801XXX".match(/.*/u)[0].length', '4'); // We should match a dangling first surrogate as 1 character
-// shouldBe('"X\udfffXX".match(/.*/u)[0].length', '4'); // We should match a dangling second surrogate as 1 character
+var stringWithDanglingFirstSurrogate = "X\uD801X";
+shouldBe('stringWithDanglingFirstSurrogate.match(/.*/u)[0].length', '3'); // We should match a dangling first surrogate as 1 character
+var stringWithDanglingSecondSurrogate = "X\uDF01X";
+shouldBe('stringWithDanglingSecondSurrogate.match(/.*/u)[0].length', '3'); // We should match a dangling second surrogate as 1 character
// Test character classes with unicode characters with and without unicode flag
-shouldBe('"\u{1d306}".match(/[\u{1d306}a]/)[0].length', '1');
+shouldBe('"\u{1d306}".match(/[\uD834\uDF06a]/)[0].length', '1');
shouldBe('"\u{1d306}".match(/[a\u{1d306}]/u)[0].length', '2');
shouldBe('"\u{1d306}".match(/[\u{1d306}a]/u)[0].length', '2');
-shouldBe('"\u{1d306}".match(/[a-\u{1d306}]/)[0].length', '1');
+shouldBe('"\u{1d306}".match(/[a-\uD834\uDF06]/)[0].length', '1');
shouldBe('"\u{1d306}".match(/[a-\u{1d306}]/u)[0].length', '2');
// Test a character class that is a range from one UTF16 to a Unicode character
@@ -63,7 +64,7 @@
shouldBe('"\u1000".match(/[\u0020-\ud801\udc4f]/u)[0].length', '1');
shouldBe('"\ud801\udc27".match(/[\u0020-\ud801\udc4f]/u)[0].length', '2');
-var re1 = new RegExp("[^\u0020-\ud801\udc4f]", "u");
+var re1 = new RegExp("[^\u0020-\uD801\uDC4F]", "u");
shouldBeFalse('re1.test("Z")');
shouldBeFalse('re1.test("\u{1000}")');
shouldBeFalse('re1.test("\u{10400}")');
@@ -135,8 +136,44 @@
shouldBeUndefined('match6[1]');
shouldBe('match6[2]', '"\u{10412}\u{10412}"');
-// Miscellaneous tests
+// Check unicode case insensitive matches
shouldBeTrue('/\u1e9Abc/ui.test("abc")');
shouldBeTrue('/abc/ui.test("\u1e9Abc")');
shouldBeTrue('/tex\u1e97/ui.test("text")');
shouldBeTrue('/text/ui.test("\u1e97ext")');
+
+// Verify that without the unicode flag, \u{} doesn't parse to a unicode escapes, but to a counted match of the character 'u'.
+shouldBeTrue('/\\u{1}/.test("u")');
+shouldBeFalse('/\\u{4}/.test("u")');
+shouldBeTrue('/\\u{4}/.test("uuuu")');
+
+// Check that \- escape works in a character class for a unicode pattern
+shouldBe('"800-555-1212".match(/[0-9\\-]*/u)[0].length', '12');
+
+// Check that control letter escapes work with unicode flag
+shouldBe('"this is b\ba test".match(/is b\\cha test/u)[0].length', '11');
+
+// Check that invalid unicode patterns throw exceptions
+shouldBe('new RegExp("\\\\/", "u").source', '"\\\\/"');
+shouldThrow('r = new RegExp("\\\\u{110000}", "u")', '"SyntaxError: Invalid regular _expression_: invalid unicode {} escape"');
+
+var invalidEscapeException = "SyntaxError: Invalid regular _expression_: invalid escaped character for unicode pattern";
+var newRegExp;
+
+function shouldThrowInvalidEscape(pattern)
+{
+ newRegExp = 'r = new RegExp("' + pattern + '", "u")';
+
+ shouldThrow(newRegExp, 'invalidEscapeException');
+}
+
+shouldThrowInvalidEscape("\\\\-");
+shouldThrowInvalidEscape("\\\\a");
+shouldThrowInvalidEscape("[\\\\a]");
+shouldThrowInvalidEscape("[\\\\b]");
+shouldThrowInvalidEscape("[\\\\B]");
+shouldThrowInvalidEscape("\\\\x");
+shouldThrowInvalidEscape("[\\\\x]");
+shouldThrowInvalidEscape("\\\\u");
+shouldThrowInvalidEscape("[\\\\u]");
+
Modified: trunk/Source/_javascript_Core/ChangeLog (197533 => 197534)
--- trunk/Source/_javascript_Core/ChangeLog 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-03-04 01:24:28 UTC (rev 197534)
@@ -1,3 +1,60 @@
+2016-03-03 Michael Saboff <msab...@apple.com>
+
+ [ES6] Make Unicode RegExp pattern parsing conform to the spec
+ https://bugs.webkit.org/show_bug.cgi?id=154988
+
+ Reviewed by Benjamin Poulain.
+
+ Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
+ spec (https://tc39.github.io/ecma262/2016/#sec-patterns). In the spec, the
+ grammar is annotated with [U] annotations. Productions that are prefixed with
+ [+U] are only available with the Unicode flags while productions prefixed with
+ [~U] are only available without the Unicode flag.
+
+ Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
+ parsing errors at syntax checking time. Restricted what escapes are available for
+ non Unicode patterns. Most of this is defined in the IdentityEscape rule in the
+ pattern grammar.
+
+ Added \- as a CharacterClass only escape in Unicode patterns.
+
+ Updated the tests for these changes.
+
+ Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
+ change set r197426 was landed.
+
+ * parser/ASTBuilder.h:
+ (JSC::ASTBuilder::createRegExp):
+ * parser/Parser.cpp:
+ (JSC::Parser<LexerType>::parsePrimaryExpression):
+ * parser/SyntaxChecker.h:
+ (JSC::SyntaxChecker::createRegExp):
+ * yarr/YarrInterpreter.cpp:
+ (JSC::Yarr::Interpreter::InputStream::readChecked):
+ (JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
+ (JSC::Yarr::Interpreter::InputStream::reread):
+ (JSC::Yarr::Interpreter::InputStream::uncheckInput):
+ (JSC::Yarr::Interpreter::InputStream::atStart):
+ (JSC::Yarr::Interpreter::InputStream::atEnd):
+ (JSC::Yarr::Interpreter::testCharacterClass):
+ (JSC::Yarr::Interpreter::backtrackPatternCharacter):
+ (JSC::Yarr::Interpreter::matchDisjunction):
+ (JSC::Yarr::ByteCompiler::atomPatternCharacter):
+ * yarr/YarrParser.h:
+ (JSC::Yarr::Parser::Parser):
+ (JSC::Yarr::Parser::isIdentityEscapeAnError):
+ (JSC::Yarr::Parser::parseEscape):
+ (JSC::Yarr::Parser::parse):
+ * yarr/YarrPattern.cpp:
+ (JSC::Yarr::CharacterClassConstructor::putChar):
+ (JSC::Yarr::CharacterClassConstructor::putRange):
+ (JSC::Yarr::CharacterClassConstructor::addSorted):
+ (JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
+ * yarr/YarrSyntaxChecker.cpp:
+ (JSC::Yarr::SyntaxChecker::disjunction):
+ (JSC::Yarr::checkSyntax):
+ * yarr/YarrSyntaxChecker.h:
+
2016-03-03 Saam barati <sbar...@apple.com>
[ES6] Implement Proxy.[[DefineOwnProperty]]
Modified: trunk/Source/_javascript_Core/parser/ASTBuilder.h (197533 => 197534)
--- trunk/Source/_javascript_Core/parser/ASTBuilder.h 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/parser/ASTBuilder.h 2016-03-04 01:24:28 UTC (rev 197534)
@@ -311,7 +311,7 @@
ExpressionNode* createRegExp(const JSTokenLocation& location, const Identifier& pattern, const Identifier& flags, const JSTextPosition& start)
{
- if (Yarr::checkSyntax(pattern.string()))
+ if (Yarr::checkSyntax(pattern.string(), flags.string()))
return 0;
RegExpNode* node = new (m_parserArena) RegExpNode(location, pattern, flags);
int size = pattern.length() + 2; // + 2 for the two /'s
Modified: trunk/Source/_javascript_Core/parser/Parser.cpp (197533 => 197534)
--- trunk/Source/_javascript_Core/parser/Parser.cpp 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/parser/Parser.cpp 2016-03-04 01:24:28 UTC (rev 197534)
@@ -3684,7 +3684,7 @@
next();
TreeExpression re = context.createRegExp(location, *pattern, *flags, start);
if (!re) {
- const char* yarrErrorMsg = Yarr::checkSyntax(pattern->string());
+ const char* yarrErrorMsg = Yarr::checkSyntax(pattern->string(), flags->string());
regexFail(yarrErrorMsg);
}
return re;
Modified: trunk/Source/_javascript_Core/parser/SyntaxChecker.h (197533 => 197534)
--- trunk/Source/_javascript_Core/parser/SyntaxChecker.h 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/parser/SyntaxChecker.h 2016-03-04 01:24:28 UTC (rev 197534)
@@ -172,7 +172,7 @@
ExpressionType createNull(const JSTokenLocation&) { return NullExpr; }
ExpressionType createBracketAccess(const JSTokenLocation&, ExpressionType, ExpressionType, bool, int, int, int) { return BracketExpr; }
ExpressionType createDotAccess(const JSTokenLocation&, ExpressionType, const Identifier*, int, int, int) { return DotExpr; }
- ExpressionType createRegExp(const JSTokenLocation&, const Identifier& pattern, const Identifier&, int) { return Yarr::checkSyntax(pattern.string()) ? 0 : RegExpExpr; }
+ ExpressionType createRegExp(const JSTokenLocation&, const Identifier& pattern, const Identifier& flags, int) { return Yarr::checkSyntax(pattern.string(), flags.string()) ? 0 : RegExpExpr; }
ExpressionType createNewExpr(const JSTokenLocation&, ExpressionType, int, int, int, int) { return NewExpr; }
ExpressionType createNewExpr(const JSTokenLocation&, ExpressionType, int, int) { return NewExpr; }
ExpressionType createConditionalExpr(const JSTokenLocation&, ExpressionType, ExpressionType, ExpressionType) { return ConditionalExpr; }
Modified: trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp (197533 => 197534)
--- trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2016-03-04 01:24:28 UTC (rev 197534)
@@ -208,8 +208,7 @@
unsigned p = pos - negativePositionOffest;
ASSERT(p < length);
int result = input[p];
- if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length
- && U16_IS_TRAIL(input[p + 1])) {
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
if (atEnd())
return -1;
@@ -219,17 +218,18 @@
return result;
}
- int readSurrogatePairChecked(unsigned negativePositionOffest)
+ int readSurrogatePairChecked(unsigned negativePositionOffset)
{
- RELEASE_ASSERT(pos >= negativePositionOffest);
- unsigned p = pos - negativePositionOffest;
+ RELEASE_ASSERT(pos >= negativePositionOffset);
+ unsigned p = pos - negativePositionOffset;
ASSERT(p < length);
if (p + 1 >= length)
return -1;
int first = input[p];
- if (U16_IS_LEAD(first) && U16_IS_TRAIL(input[p + 1]))
- return U16_GET_SUPPLEMENTARY(first, input[p + 1]);
+ int second = input[p + 1];
+ if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
+ return U16_GET_SUPPLEMENTARY(first, second);
return -1;
}
@@ -238,11 +238,8 @@
{
ASSERT(from < length);
int result = input[from];
- if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length
- && U16_IS_TRAIL(input[from + 1])) {
-
+ if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
- }
return result;
}
@@ -294,9 +291,9 @@
pos -= count;
}
- bool atStart(unsigned negativePositionOffest)
+ bool atStart(unsigned negativePositionOffset)
{
- return pos == negativePositionOffest;
+ return pos == negativePositionOffset;
}
bool atEnd(unsigned negativePositionOffest)
@@ -319,7 +316,7 @@
bool testCharacterClass(CharacterClass* characterClass, int ch)
{
- if (ch & 0x1FFF80) {
+ if (!isASCII(ch)) {
for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
if (ch == characterClass->m_matchesUnicode[i])
return true;
@@ -433,10 +430,7 @@
case QuantifierGreedy:
if (backTrack->matchAmount) {
--backTrack->matchAmount;
- if (unicode && !U_IS_BMP(term.atom.patternCharacter))
- input.uncheckInput(2);
- else
- input.uncheckInput(1);
+ input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
return true;
}
break;
@@ -1267,7 +1261,7 @@
case ByteTerm::TypePatternCasedCharacterOnce:
case ByteTerm::TypePatternCasedCharacterFixed: {
if (unicode) {
- // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
+ // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
@@ -1290,7 +1284,7 @@
case ByteTerm::TypePatternCasedCharacterGreedy: {
BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
+ // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
unsigned matchAmount = 0;
@@ -1308,7 +1302,7 @@
case ByteTerm::TypePatternCasedCharacterNonGreedy: {
BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
+ // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
backTrack->matchAmount = 0;
@@ -1618,9 +1612,6 @@
void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
{
if (m_pattern.m_ignoreCase) {
- ASSERT(u_tolower(ch) <= UCHAR_MAX_VALUE);
- ASSERT(u_toupper(ch) <= UCHAR_MAX_VALUE);
-
UChar32 lo = u_tolower(ch);
UChar32 hi = u_toupper(ch);
Modified: trunk/Source/_javascript_Core/yarr/YarrParser.h (197533 => 197534)
--- trunk/Source/_javascript_Core/yarr/YarrParser.h 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/yarr/YarrParser.h 2016-03-04 01:24:28 UTC (rev 197534)
@@ -61,6 +61,7 @@
CharacterClassOutOfOrder,
EscapeUnterminated,
InvalidUnicodeEscape,
+ InvalidIdentityEscape,
NumberOfErrorCodes
};
@@ -241,6 +242,19 @@
{
}
+ // The handling of IdentityEscapes is different depending on the unicode flag.
+ // For Unicode patterns, IdentityEscapes only include SyntaxCharacters or '/'.
+ // For non-unicode patterns, most any character can be escaped.
+ bool isIdentityEscapeAnError(int ch)
+ {
+ if (m_isUnicode && !strchr("^$\\.*+?()[]{}|/", ch)) {
+ m_err = InvalidIdentityEscape;
+ return true;
+ }
+
+ return false;
+ }
+
/*
* parseEscape():
*
@@ -277,18 +291,24 @@
// Assertions
case 'b':
consume();
- if (inCharacterClass)
+ if (inCharacterClass) {
+ if (isIdentityEscapeAnError('b'))
+ break;
+
delegate.atomPatternCharacter('\b');
- else {
+ } else {
delegate.assertionWordBoundary(false);
return false;
}
break;
case 'B':
consume();
- if (inCharacterClass)
+ if (inCharacterClass) {
+ if (isIdentityEscapeAnError('B'))
+ break;
+
delegate.atomPatternCharacter('B');
- else {
+ } else {
delegate.assertionWordBoundary(true);
return false;
}
@@ -403,9 +423,12 @@
case 'x': {
consume();
int x = tryConsumeHex(2);
- if (x == -1)
+ if (x == -1) {
+ if (isIdentityEscapeAnError('x'))
+ break;
+
delegate.atomPatternCharacter('x');
- else
+ } else
delegate.atomPatternCharacter(x);
break;
}
@@ -414,20 +437,23 @@
case 'u': {
consume();
if (atEndOfPattern()) {
+ if (isIdentityEscapeAnError('u'))
+ break;
+
delegate.atomPatternCharacter('u');
break;
}
- if (peek() == '{') {
+ if (m_isUnicode && peek() == '{') {
consume();
UChar32 codePoint = 0;
do {
if (atEndOfPattern())
m_err = InvalidUnicodeEscape;
- if (!WTF::isASCIIHexDigit(peek()))
+ if (!isASCIIHexDigit(peek()))
m_err = InvalidUnicodeEscape;
- codePoint = (codePoint << 4) | WTF::toASCIIHexValue(consume());
+ codePoint = (codePoint << 4) | toASCIIHexValue(consume());
if (codePoint > UCHAR_MAX_VALUE)
m_err = InvalidUnicodeEscape;
@@ -441,9 +467,12 @@
break;
}
int u = tryConsumeHex(4);
- if (u == -1)
+ if (u == -1) {
+ if (isIdentityEscapeAnError('u'))
+ break;
+
delegate.atomPatternCharacter('u');
- else {
+ } else {
// If we have the first of a surrogate pair, look for the second.
if (U16_IS_LEAD(u) && m_isUnicode && (patternRemaining() >= 6) && peek() == '\\') {
ParseState state = saveState();
@@ -467,6 +496,17 @@
// IdentityEscape
default:
+ int ch = peek();
+
+ if (ch == '-' && m_isUnicode && inCharacterClass) {
+ // \- is allowed for ClassEscape with unicode flag.
+ delegate.atomPatternCharacter(consume());
+ break;
+ }
+
+ if (isIdentityEscapeAnError(ch))
+ break;
+
delegate.atomPatternCharacter(consume());
}
@@ -762,8 +802,9 @@
REGEXP_ERROR_PREFIX "unrecognized character after (?",
REGEXP_ERROR_PREFIX "missing terminating ] for character class",
REGEXP_ERROR_PREFIX "range out of order in character class",
- REGEXP_ERROR_PREFIX "\\ at end of pattern"
- REGEXP_ERROR_PREFIX "invalid unicode {} escape"
+ REGEXP_ERROR_PREFIX "\\ at end of pattern",
+ REGEXP_ERROR_PREFIX "invalid unicode {} escape",
+ REGEXP_ERROR_PREFIX "invalid escaped character for unicode pattern"
};
return errorMessages[m_err];
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (197533 => 197534)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2016-03-04 01:24:28 UTC (rev 197534)
@@ -69,7 +69,7 @@
void putChar(UChar32 ch)
{
// Handle ascii cases.
- if (ch <= 0x7f) {
+ if (isASCII(ch)) {
if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
addSorted(m_matches, toASCIIUpper(ch));
addSorted(m_matches, toASCIILower(ch));
@@ -108,7 +108,7 @@
void putRange(UChar32 lo, UChar32 hi)
{
- if (lo <= 0x7f) {
+ if (isASCII(lo)) {
char asciiLo = lo;
char asciiHi = std::min(hi, (UChar32)0x7f);
addSortedRange(m_ranges, lo, asciiHi);
@@ -120,7 +120,7 @@
addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
}
}
- if (hi <= 0x7f)
+ if (isASCII(hi))
return;
lo = std::max(lo, (UChar32)0x80);
@@ -190,7 +190,7 @@
private:
void addSorted(UChar32 ch)
{
- addSorted(ch <= 0x7f ? m_matches : m_matchesUnicode, ch);
+ addSorted(isASCII(ch) ? m_matches : m_matchesUnicode, ch);
}
void addSorted(Vector<UChar32>& matches, UChar32 ch)
@@ -603,7 +603,7 @@
currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter;
alternative->m_hasFixedSize = false;
} else if (m_pattern.m_unicode) {
- currentInputPosition += (!U_IS_BMP(term.patternCharacter) ? 2 : 1) * term.quantityCount;
+ currentInputPosition += U16_LENGTH(term.patternCharacter) * term.quantityCount;
} else
currentInputPosition += term.quantityCount;
break;
Modified: trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp (197533 => 197534)
--- trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp 2016-03-04 01:24:28 UTC (rev 197534)
@@ -50,10 +50,10 @@
void disjunction() {}
};
-const char* checkSyntax(const String& pattern)
+const char* checkSyntax(const String& pattern, const String& flags)
{
SyntaxChecker syntaxChecker;
- return parse(syntaxChecker, pattern, false);
+ return parse(syntaxChecker, pattern, flags.contains('u'));
}
}} // JSC::YARR
Modified: trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.h (197533 => 197534)
--- trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.h 2016-03-04 01:07:04 UTC (rev 197533)
+++ trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.h 2016-03-04 01:24:28 UTC (rev 197534)
@@ -30,7 +30,7 @@
namespace JSC { namespace Yarr {
-const char* checkSyntax(const String& pattern);
+const char* checkSyntax(const String& pattern, const String& flags);
}} // JSC::YARR