Blame - test/intl/string/normalization.js - platform/external/v8 - Gitiles

blob: 446d6277db174878de2a732cf0b0d449ed2f09f9 [file] [log] [blame]

Ben Murdoch	b8a8cc1	2014-11-26 15:28:44 +0000	[diff] [blame^]	1	// Copyright 2013 the V8 project authors. All rights reserved.
				2	// Redistribution and use in source and binary forms, with or without
				3	// modification, are permitted provided that the following conditions are
				4	// met:
				5	//
				6	// * Redistributions of source code must retain the above copyright
				7	// notice, this list of conditions and the following disclaimer.
				8	// * Redistributions in binary form must reproduce the above
				9	// copyright notice, this list of conditions and the following
				10	// disclaimer in the documentation and/or other materials provided
				11	// with the distribution.
				12	// * Neither the name of Google Inc. nor the names of its
				13	// contributors may be used to endorse or promote products derived
				14	// from this software without specific prior written permission.
				15	//
				16	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				20	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				21	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				22	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				23	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				24	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				25	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				26	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27
				28	// Tests the new String.prototype.normalize method.
				29
				30
				31	// Common use case when searching for 'not very exact' match.
				32	// These are examples of data one might encounter in real use.
				33	var testRealUseCases = function() {
				34	// Vietnamese legacy text, old Windows 9x / non-Unicode applications use
				35	// windows-1258 code page, which is neither precomposed, nor decomposed.
				36	assertEquals('ti\u00ea\u0301ng Vi\u00ea\u0323t'.normalize('NFKD'),
				37	'ti\u1ebfng Vi\u1ec7t'.normalize('NFKD')); // all precomposed
				38
				39	// Various kinds of spaces
				40	assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
				41	'Google\u00a0Maps'.normalize('NFKD')); // non-breaking space
				42	assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
				43	'Google\u2002Maps'.normalize('NFKD')); // en-space
				44	assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
				45	'Google\u2003Maps'.normalize('NFKD')); // em-space
				46	assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
				47	'Google\u3000Maps'.normalize('NFKC')); // ideographic space
				48
				49	// Latin small ligature "fi"
				50	assertEquals('fi'.normalize('NFKD'), '\ufb01'.normalize('NFKD'));
				51
				52	// ŀ, Latin small L with middle dot, used in Catalan and often represented
				53	// as decomposed for non-Unicode environments ( l + ·)
				54	assertEquals('l\u00b7'.normalize('NFKD'), '\u0140'.normalize('NFKD'));
				55
				56	// Legacy text, Japanese narrow Kana (MS-DOS & Win 3.x time)
				57	assertEquals('\u30d1\u30bd\u30b3\u30f3'.normalize('NFKD'), // パソコン : wide
				58	'\uff8a\uff9f\uff7f\uff7a\uff9d'.normalize('NFKD')); // ﾊﾟｿｺﾝ : narrow
				59	// Also for Japanese, Latin fullwidth forms vs. ASCII
				60	assertEquals('ABCD'.normalize('NFKD'),
				61	'\uff21\uff22\uff23\uff24'.normalize('NFKD')); // ＡＢＣＤ, fullwidth
				62	}();
				63
				64
				65	var testEdgeCases = function() {
				66	// Make sure we throw RangeError, as the standard requires.
				67	assertThrows('"".normalize(1234)', RangeError);
				68	assertThrows('"".normalize("BAD")', RangeError);
				69
				70	// The standard does not say what kind of exceptions we should throw, so we
				71	// will not be specific. But we still test that we throw errors.
				72	assertThrows('s.normalize()'); // s is not defined
				73	assertThrows('var s = null; s.normalize()');
				74	assertThrows('var s = undefined; s.normalize()');
				75	assertThrows('var s = 1234; s.normalize()'); // no normalize for non-strings
				76	}();
				77
				78
				79	// Several kinds of mappings. No need to be comprehensive, we don't test
				80	// the ICU functionality, we only test C - JavaScript 'glue'
				81	var testData = [
				82	// org, default, NFC, NFD, NKFC, NKFD
				83	['\u00c7', // Ç : Combining sequence, Latin 1
				84	'\u00c7', '\u0043\u0327',
				85	'\u00c7', '\u0043\u0327'],
				86	['\u0218', // Ș : Combining sequence, non-Latin 1
				87	'\u0218', '\u0053\u0326',
				88	'\u0218', '\u0053\u0326'],
				89	['\uac00', // 가 : Hangul
				90	'\uac00', '\u1100\u1161',
				91	'\uac00', '\u1100\u1161'],
				92	['\uff76', // ｶ : Narrow Kana
				93	'\uff76', '\uff76',
				94	'\u30ab', '\u30ab'],
				95	['\u00bc', // ¼ : Fractions
				96	'\u00bc', '\u00bc',
				97	'\u0031\u2044\u0034', '\u0031\u2044\u0034'],
				98	['\u01c6', // ǆ : Latin ligature
				99	'\u01c6', '\u01c6',
				100	'\u0064\u017e', '\u0064\u007a\u030c'],
				101	['s\u0307\u0323', // s + dot above + dot below, ordering of combining marks
				102	'\u1e69', 's\u0323\u0307',
				103	'\u1e69', 's\u0323\u0307'],
				104	['\u3300', // ㌀ : Squared characters
				105	'\u3300', '\u3300',
				106	'\u30a2\u30d1\u30fc\u30c8', // アパート
				107	'\u30a2\u30cf\u309a\u30fc\u30c8'], // アパート
				108	['\ufe37', // ︷ : Vertical forms
				109	'\ufe37', '\ufe37',
				110	'{' , '{'],
				111	['\u2079', // ⁹ : superscript 9
				112	'\u2079', '\u2079',
				113	'9', '9'],
				114	['\ufee5\ufee6\ufee7\ufee8', // Arabic forms
				115	'\ufee5\ufee6\ufee7\ufee8', '\ufee5\ufee6\ufee7\ufee8',
				116	'\u0646\u0646\u0646\u0646', '\u0646\u0646\u0646\u0646'],
				117	['\u2460', // ① : Circled
				118	'\u2460', '\u2460',
				119	'1', '1'],
				120	['\u210c', // ℌ : Font variants
				121	'\u210c', '\u210c',
				122	'H', 'H'],
				123	['\u2126', // Ω : Singleton, OHM sign vs. Greek capital letter OMEGA
				124	'\u03a9', '\u03a9',
				125	'\u03a9', '\u03a9'],
				126	['\ufdfb', // Long ligature, ARABIC LIGATURE JALLAJALALOUHOU
				127	'\ufdfb', '\ufdfb',
				128	'\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647',
				129	'\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647']
				130	];
				131
				132	var testArray = function() {
				133	var kNFC = 1, kNFD = 2, kNFKC = 3, kNFKD = 4;
				134	for (var i = 0; i < testData.length; ++i) {
				135	// the original, NFC and NFD should normalize to the same thing
				136	for (var column = 0; column < 3; ++column) {
				137	var str = testData[i][column];
				138	assertEquals(str.normalize(), testData[i][kNFC]); // defaults to NFC
				139	assertEquals(str.normalize('NFC'), testData[i][kNFC]);
				140	assertEquals(str.normalize('NFD'), testData[i][kNFD]);
				141	assertEquals(str.normalize('NFKC'), testData[i][kNFKC]);
				142	assertEquals(str.normalize('NFKD'), testData[i][kNFKD]);
				143	}
				144	}
				145	}();