Merge r3236, r3243, r3249 and r3258 from bleeding_edge to trunk.
This fixes issue 486 (incorrect handling of cyrillic characters).
Review URL: http://codereview.chromium.org/389001
git-svn-id: http://v8.googlecode.com/svn/trunk@3268 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index c77f32d..04d1944 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -2432,16 +2432,19 @@
}
-void TextNode::MakeCaseIndependent() {
+void TextNode::MakeCaseIndependent(bool is_ascii) {
int element_count = elms_->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
if (elm.type == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.data.u_char_class;
+ // None of the standard character classses is different in the case
+ // independent case and it slows us down if we don't know that.
+ if (cc->is_standard()) continue;
ZoneList<CharacterRange>* ranges = cc->ranges();
int range_count = ranges->length();
- for (int i = 0; i < range_count; i++) {
- ranges->at(i).AddCaseEquivalents(ranges);
+ for (int j = 0; j < range_count; j++) {
+ ranges->at(j).AddCaseEquivalents(ranges, is_ascii);
}
}
}
@@ -3912,19 +3915,31 @@
}
-void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
+static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
+ int bottom,
+ int top);
+
+
+void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
+ bool is_ascii) {
+ uc16 bottom = from();
+ uc16 top = to();
+ if (is_ascii) {
+ if (bottom > String::kMaxAsciiCharCode) return;
+ if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;
+ }
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- if (IsSingleton()) {
+ if (top == bottom) {
// If this is a singleton we just expand the one character.
- int length = uncanonicalize.get(from(), '\0', chars);
+ int length = uncanonicalize.get(bottom, '\0', chars);
for (int i = 0; i < length; i++) {
uc32 chr = chars[i];
- if (chr != from()) {
+ if (chr != bottom) {
ranges->Add(CharacterRange::Singleton(chars[i]));
}
}
- } else if (from() <= kRangeCanonicalizeMax &&
- to() <= kRangeCanonicalizeMax) {
+ } else if (bottom <= kRangeCanonicalizeMax &&
+ top <= kRangeCanonicalizeMax) {
// If this is a range we expand the characters block by block,
// expanding contiguous subranges (blocks) one at a time.
// The approach is as follows. For a given start character we
@@ -3943,14 +3958,14 @@
// completely contained in a block we do this for all the blocks
// covered by the range.
unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- // First, look up the block that contains the 'from' character.
- int length = canonrange.get(from(), '\0', range);
+ // First, look up the block that contains the 'bottom' character.
+ int length = canonrange.get(bottom, '\0', range);
if (length == 0) {
- range[0] = from();
+ range[0] = bottom;
} else {
ASSERT_EQ(1, length);
}
- int pos = from();
+ int pos = bottom;
// The start of the current block. Note that except for the first
// iteration 'start' is always equal to 'pos'.
int start;
@@ -3961,10 +3976,10 @@
} else {
start = pos;
}
- // Then we add the ranges on at a time, incrementing the current
+ // Then we add the ranges one at a time, incrementing the current
// position to be after the last block each time. The position
// always points to the start of a block.
- while (pos < to()) {
+ while (pos < top) {
length = canonrange.get(start, '\0', range);
if (length == 0) {
range[0] = start;
@@ -3975,20 +3990,122 @@
// The start point of a block contains the distance to the end
// of the range.
int block_end = start + (range[0] & kPayloadMask) - 1;
- int end = (block_end > to()) ? to() : block_end;
+ int end = (block_end > top) ? top : block_end;
length = uncanonicalize.get(start, '\0', range);
for (int i = 0; i < length; i++) {
uc32 c = range[i];
uc16 range_from = c + (pos - start);
uc16 range_to = c + (end - start);
- if (!(from() <= range_from && range_to <= to())) {
+ if (!(bottom <= range_from && range_to <= top)) {
ranges->Add(CharacterRange(range_from, range_to));
}
}
start = pos = block_end + 1;
}
} else {
- // TODO(plesner) when we've fixed the 2^11 bug in unibrow.
+ // Unibrow ranges don't work for high characters due to the "2^11 bug".
+ // Therefore we do something dumber for these ranges.
+ AddUncanonicals(ranges, bottom, top);
+ }
+}
+
+
+static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
+ int bottom,
+ int top) {
+ unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+ // Zones with no case mappings. There is a DEBUG-mode loop to assert that
+ // this table is correct.
+ // 0x0600 - 0x0fff
+ // 0x1100 - 0x1cff
+ // 0x2000 - 0x20ff
+ // 0x2200 - 0x23ff
+ // 0x2500 - 0x2bff
+ // 0x2e00 - 0xa5ff
+ // 0xa800 - 0xfaff
+ // 0xfc00 - 0xfeff
+ const int boundary_count = 18;
+ // The ASCII boundary and the kRangeCanonicalizeMax boundary are also in this
+ // array. This is to split up big ranges and not because they actually denote
+ // a case-mapping-free-zone.
+ ASSERT(CharacterRange::kRangeCanonicalizeMax < 0x600);
+ const int kFirstRealCaselessZoneIndex = 2;
+ int boundaries[] = {0x80, CharacterRange::kRangeCanonicalizeMax,
+ 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
+ 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
+
+ // Special ASCII rule from spec can save us some work here.
+ if (bottom == 0x80 && top == 0xffff) return;
+
+ // We have optimized support for this range.
+ if (top <= CharacterRange::kRangeCanonicalizeMax) {
+ CharacterRange range(bottom, top);
+ range.AddCaseEquivalents(ranges, false);
+ return;
+ }
+
+ // Split up very large ranges. This helps remove ranges where there are no
+ // case mappings.
+ for (int i = 0; i < boundary_count; i++) {
+ if (bottom < boundaries[i] && top >= boundaries[i]) {
+ AddUncanonicals(ranges, bottom, boundaries[i] - 1);
+ AddUncanonicals(ranges, boundaries[i], top);
+ return;
+ }
+ }
+
+ // If we are completely in a zone with no case mappings then we are done.
+ // We start at 2 so as not to except the ASCII range from mappings.
+ for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) {
+ if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
+#ifdef DEBUG
+ for (int j = bottom; j <= top; j++) {
+ unsigned current_char = j;
+ int length = uncanonicalize.get(current_char, '\0', chars);
+ for (int k = 0; k < length; k++) {
+ ASSERT(chars[k] == current_char);
+ }
+ }
+#endif
+ return;
+ }
+ }
+
+ // Step through the range finding equivalent characters.
+ ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
+ for (int i = bottom; i <= top; i++) {
+ int length = uncanonicalize.get(i, '\0', chars);
+ for (int j = 0; j < length; j++) {
+ uc32 chr = chars[j];
+ if (chr != i && (chr < bottom || chr > top)) {
+ characters->Add(chr);
+ }
+ }
+ }
+
+ // Step through the equivalent characters finding simple ranges and
+ // adding ranges to the character class.
+ if (characters->length() > 0) {
+ int new_from = characters->at(0);
+ int new_to = new_from;
+ for (int i = 1; i < characters->length(); i++) {
+ int chr = characters->at(i);
+ if (chr == new_to + 1) {
+ new_to++;
+ } else {
+ if (new_to == new_from) {
+ ranges->Add(CharacterRange::Singleton(new_from));
+ } else {
+ ranges->Add(CharacterRange(new_from, new_to));
+ }
+ new_from = new_to = chr;
+ }
+ }
+ if (new_to == new_from) {
+ ranges->Add(CharacterRange::Singleton(new_from));
+ } else {
+ ranges->Add(CharacterRange(new_from, new_to));
+ }
}
}
@@ -4234,7 +4351,7 @@
void Analysis::VisitText(TextNode* that) {
if (ignore_case_) {
- that->MakeCaseIndependent();
+ that->MakeCaseIndependent(is_ascii_);
}
EnsureAnalyzed(that->on_success());
if (!has_failed()) {
@@ -4452,7 +4569,7 @@
}
}
data->node = node;
- Analysis analysis(ignore_case);
+ Analysis analysis(ignore_case, is_ascii);
analysis.EnsureAnalyzed(node);
if (analysis.has_failed()) {
const char* error_message = analysis.error_message();
diff --git a/src/jsregexp.h b/src/jsregexp.h
index 84f8d98..b681119 100644
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -200,7 +200,7 @@
bool is_valid() { return from_ <= to_; }
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
bool IsSingleton() { return (from_ == to_); }
- void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
+ void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii);
static void Split(ZoneList<CharacterRange>* base,
Vector<const uc16> overlay,
ZoneList<CharacterRange>** included,
@@ -703,7 +703,7 @@
int characters_filled_in,
bool not_at_start);
ZoneList<TextElement>* elements() { return elms_; }
- void MakeCaseIndependent();
+ void MakeCaseIndependent(bool is_ascii);
virtual int GreedyLoopTextLength();
virtual TextNode* Clone() {
TextNode* result = new TextNode(*this);
@@ -1212,8 +1212,10 @@
// +-------+ +------------+
class Analysis: public NodeVisitor {
public:
- explicit Analysis(bool ignore_case)
- : ignore_case_(ignore_case), error_message_(NULL) { }
+ Analysis(bool ignore_case, bool is_ascii)
+ : ignore_case_(ignore_case),
+ is_ascii_(is_ascii),
+ error_message_(NULL) { }
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) \
@@ -1232,6 +1234,7 @@
}
private:
bool ignore_case_;
+ bool is_ascii_;
const char* error_message_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
diff --git a/src/version.cc b/src/version.cc
index 9a567ab..d4127a5 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -35,7 +35,7 @@
#define MAJOR_VERSION 1
#define MINOR_VERSION 3
#define BUILD_NUMBER 18
-#define PATCH_LEVEL 2
+#define PATCH_LEVEL 3
#define CANDIDATE_VERSION false
// Define SONAME to have the SCons build the put a specific SONAME into the
diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc
index 81c2205..3b664a1 100644
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@@ -1466,7 +1466,7 @@
ZoneScope zone_scope(DELETE_ON_EXIT);
int count = expected.length();
ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
- input.AddCaseEquivalents(list);
+ input.AddCaseEquivalents(list, false);
CHECK_EQ(count, list->length());
for (int i = 0; i < list->length(); i++) {
CHECK_EQ(expected[i].from(), list->at(i).from());
diff --git a/test/mjsunit/cyrillic.js b/test/mjsunit/cyrillic.js
new file mode 100644
index 0000000..13775b0
--- /dev/null
+++ b/test/mjsunit/cyrillic.js
@@ -0,0 +1,208 @@
+// Copyright 2009 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Test Unicode character ranges in regexps.
+
+
+// Cyrillic.
+var cyrillic = {
+ FIRST: "\u0410", // A
+ first: "\u0430", // a
+ LAST: "\u042f", // YA
+ last: "\u044f", // ya
+ MIDDLE: "\u0427", // CHE
+ middle: "\u0447", // che
+ // Actually no characters are between the cases in Cyrillic.
+ BetweenCases: false};
+
+var SIGMA = "\u03a3";
+var sigma = "\u03c3";
+var alternative_sigma = "\u03c2";
+
+// Greek.
+var greek = {
+ FIRST: "\u0391", // ALPHA
+ first: "\u03b1", // alpha
+ LAST: "\u03a9", // OMEGA
+ last: "\u03c9", // omega
+ MIDDLE: SIGMA, // SIGMA
+ middle: sigma, // sigma
+ // Epsilon acute is between ALPHA-OMEGA and alpha-omega, ie it
+ // is between OMEGA and alpha.
+ BetweenCases: "\u03ad"};
+
+
+function Range(from, to, flags) {
+ return new RegExp("[" + from + "-" + to + "]", flags);
+}
+
+// Test Cyrillic and Greek separately.
+for (var lang = 0; lang < 2; lang++) {
+ var chars = (lang == 0) ? cyrillic : greek;
+
+ for (var i = 0; i < 2; i++) {
+ var lc = (i == 0); // Lower case.
+ var first = lc ? chars.first : chars.FIRST;
+ var middle = lc ? chars.middle : chars.MIDDLE;
+ var last = lc ? chars.last : chars.LAST;
+ var first_other_case = lc ? chars.FIRST : chars.first;
+ var middle_other_case = lc ? chars.MIDDLE : chars.middle;
+ var last_other_case = lc ? chars.LAST : chars.last;
+
+ assertTrue(Range(first, last).test(first), 1);
+ assertTrue(Range(first, last).test(middle), 2);
+ assertTrue(Range(first, last).test(last), 3);
+
+ assertFalse(Range(first, last).test(first_other_case), 4);
+ assertFalse(Range(first, last).test(middle_other_case), 5);
+ assertFalse(Range(first, last).test(last_other_case), 6);
+
+ assertTrue(Range(first, last, "i").test(first), 7);
+ assertTrue(Range(first, last, "i").test(middle), 8);
+ assertTrue(Range(first, last, "i").test(last), 9);
+
+ assertTrue(Range(first, last, "i").test(first_other_case), 10);
+ assertTrue(Range(first, last, "i").test(middle_other_case), 11);
+ assertTrue(Range(first, last, "i").test(last_other_case), 12);
+
+ if (chars.BetweenCases) {
+ assertFalse(Range(first, last).test(chars.BetweenCases), 13);
+ assertFalse(Range(first, last, "i").test(chars.BetweenCases), 14);
+ }
+ }
+ if (chars.BetweenCases) {
+ assertTrue(Range(chars.FIRST, chars.last).test(chars.BetweenCases), 15);
+ assertTrue(Range(chars.FIRST, chars.last, "i").test(chars.BetweenCases), 16);
+ }
+}
+
+// Test range that covers both greek and cyrillic characters.
+for (key in greek) {
+ assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key);
+ if (cyrillic[key]) {
+ assertTrue(Range(greek.FIRST, cyrillic.last).test(cyrillic[key]), 18 + key);
+ }
+}
+
+for (var i = 0; i < 2; i++) {
+ var ignore_case = (i == 0);
+ var flag = ignore_case ? "i" : "";
+ assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.first), 19);
+ assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.middle), 20);
+ assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.last), 21);
+
+ assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.FIRST), 22);
+ assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23);
+ assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24);
+
+ // A range that covers the lower case greek letters and the upper case cyrillic
+ // letters.
+ assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25);
+ assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26);
+ assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27);
+
+ assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.first), 28);
+ assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.middle), 29);
+ assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.last), 30);
+}
+
+
+// Sigma is special because there are two lower case versions of the same upper
+// case character. JS requires that case independece means that you should
+// convert everything to upper case, so the two sigma variants are equal to each
+// other in a case independt comparison.
+for (var i = 0; i < 2; i++) {
+ var simple = (i != 0);
+ var name = simple ? "" : "[]";
+ var regex = simple ? SIGMA : "[" + SIGMA + "]";
+
+ assertFalse(new RegExp(regex).test(sigma), 31 + name);
+ assertFalse(new RegExp(regex).test(alternative_sigma), 32 + name);
+ assertTrue(new RegExp(regex).test(SIGMA), 33 + name);
+
+ assertTrue(new RegExp(regex, "i").test(sigma), 34 + name);
+ // JSC and Tracemonkey fail this one.
+ assertTrue(new RegExp(regex, "i").test(alternative_sigma), 35 + name);
+ assertTrue(new RegExp(regex, "i").test(SIGMA), 36 + name);
+
+ regex = simple ? sigma : "[" + sigma + "]";
+
+ assertTrue(new RegExp(regex).test(sigma), 41 + name);
+ assertFalse(new RegExp(regex).test(alternative_sigma), 42 + name);
+ assertFalse(new RegExp(regex).test(SIGMA), 43 + name);
+
+ assertTrue(new RegExp(regex, "i").test(sigma), 44 + name);
+ // JSC and Tracemonkey fail this one.
+ assertTrue(new RegExp(regex, "i").test(alternative_sigma), 45 + name);
+ assertTrue(new RegExp(regex, "i").test(SIGMA), 46 + name);
+
+ regex = simple ? alternative_sigma : "[" + alternative_sigma + "]";
+
+ assertFalse(new RegExp(regex).test(sigma), 51 + name);
+ assertTrue(new RegExp(regex).test(alternative_sigma), 52 + name);
+ assertFalse(new RegExp(regex).test(SIGMA), 53 + name);
+
+ // JSC and Tracemonkey fail this one.
+ assertTrue(new RegExp(regex, "i").test(sigma), 54 + name);
+ assertTrue(new RegExp(regex, "i").test(alternative_sigma), 55 + name);
+ // JSC and Tracemonkey fail this one.
+ assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name);
+}
+
+
+// Test all non-ASCII characters individually to ensure that our optimizations
+// didn't break anything.
+for (var i = 0x80; i <= 0xfffe; i++) {
+ var c = String.fromCharCode(i);
+ var c2 = String.fromCharCode(i + 1);
+ var re = new RegExp("[" + c + "-" + c2 + "]", "i");
+ assertTrue(re.test(c), 57);
+}
+
+for (var add_non_ascii_character_to_subject = 0;
+ add_non_ascii_character_to_subject < 2;
+ add_non_ascii_character_to_subject++) {
+ var suffix = add_non_ascii_character_to_subject ? "\ufffe" : "";
+ // A range that covers both ASCII and non-ASCII.
+ for (var i = 0; i < 2; i++) {
+ var full = (i != 0);
+ var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]";
+ var f = full ? "f" : "c";
+ for (var j = 0; j < 2; j++) {
+ var ignore_case = (j == 0);
+ var flag = ignore_case ? "i" : "";
+ var re = new RegExp(mixed, flag);
+ assertEquals(ignore_case || (full && add_non_ascii_character_to_subject),
+ re.test("A" + suffix),
+ 58 + flag + f);
+ assertTrue(re.test("a" + suffix), 59 + flag + f);
+ assertTrue(re.test("~" + suffix), 60 + flag + f);
+ assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f);
+ assertEquals(ignore_case || full, re.test(cyrillic.middle), 62 + flag + f);
+ }
+ }
+}
diff --git a/test/mjsunit/mjsunit.status b/test/mjsunit/mjsunit.status
index 15f62b0..7995a82 100644
--- a/test/mjsunit/mjsunit.status
+++ b/test/mjsunit/mjsunit.status
@@ -39,6 +39,9 @@
# Issue 488: this test sometimes times out.
array-constructor: PASS || TIMEOUT
+# Issue 499
+cyrillic: PASS, TIMEOUT if ($arch == arm)
+
[ $arch == arm ]
# Slow tests which times out in debug mode.
diff --git a/test/mjsunit/regress/regress-486.js b/test/mjsunit/regress/regress-486.js
new file mode 100644
index 0000000..c1e29a6
--- /dev/null
+++ b/test/mjsunit/regress/regress-486.js
@@ -0,0 +1,30 @@
+// Copyright 2009 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+var st = "\u0422\u0435\u0441\u0442"; // Test in Cyrillic characters.
+var cyrillicMatch = /^[\u0430-\u044fa-z]+$/i.test(st); // a-ja a-z.
+assertTrue(cyrillicMatch);