Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame^] | 1 | // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 | // Redistribution and use in source and binary forms, with or without |
| 3 | // modification, are permitted provided that the following conditions are |
| 4 | // met: |
| 5 | // |
| 6 | // * Redistributions of source code must retain the above copyright |
| 7 | // notice, this list of conditions and the following disclaimer. |
| 8 | // * Redistributions in binary form must reproduce the above |
| 9 | // copyright notice, this list of conditions and the following |
| 10 | // disclaimer in the documentation and/or other materials provided |
| 11 | // with the distribution. |
| 12 | // * Neither the name of Google Inc. nor the names of its |
| 13 | // contributors may be used to endorse or promote products derived |
| 14 | // from this software without specific prior written permission. |
| 15 | // |
| 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | |
| 28 | // Tests the new String.prototype.normalize method. |
| 29 | |
| 30 | |
| 31 | // Common use case when searching for 'not very exact' match. |
| 32 | // These are examples of data one might encounter in real use. |
| 33 | var testRealUseCases = function() { |
| 34 | // Vietnamese legacy text, old Windows 9x / non-Unicode applications use |
| 35 | // windows-1258 code page, which is neither precomposed, nor decomposed. |
| 36 | assertEquals('ti\u00ea\u0301ng Vi\u00ea\u0323t'.normalize('NFKD'), |
| 37 | 'ti\u1ebfng Vi\u1ec7t'.normalize('NFKD')); // all precomposed |
| 38 | |
| 39 | // Various kinds of spaces |
| 40 | assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space |
| 41 | 'Google\u00a0Maps'.normalize('NFKD')); // non-breaking space |
| 42 | assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space |
| 43 | 'Google\u2002Maps'.normalize('NFKD')); // en-space |
| 44 | assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space |
| 45 | 'Google\u2003Maps'.normalize('NFKD')); // em-space |
| 46 | assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space |
| 47 | 'Google\u3000Maps'.normalize('NFKC')); // ideographic space |
| 48 | |
| 49 | // Latin small ligature "fi" |
| 50 | assertEquals('fi'.normalize('NFKD'), '\ufb01'.normalize('NFKD')); |
| 51 | |
| 52 | // ŀ, Latin small L with middle dot, used in Catalan and often represented |
| 53 | // as decomposed for non-Unicode environments ( l + ·) |
| 54 | assertEquals('l\u00b7'.normalize('NFKD'), '\u0140'.normalize('NFKD')); |
| 55 | |
| 56 | // Legacy text, Japanese narrow Kana (MS-DOS & Win 3.x time) |
| 57 | assertEquals('\u30d1\u30bd\u30b3\u30f3'.normalize('NFKD'), // パソコン : wide |
| 58 | '\uff8a\uff9f\uff7f\uff7a\uff9d'.normalize('NFKD')); // パソコン : narrow |
| 59 | // Also for Japanese, Latin fullwidth forms vs. ASCII |
| 60 | assertEquals('ABCD'.normalize('NFKD'), |
| 61 | '\uff21\uff22\uff23\uff24'.normalize('NFKD')); // ABCD, fullwidth |
| 62 | }(); |
| 63 | |
| 64 | |
| 65 | var testEdgeCases = function() { |
| 66 | // Make sure we throw RangeError, as the standard requires. |
| 67 | assertThrows('"".normalize(1234)', RangeError); |
| 68 | assertThrows('"".normalize("BAD")', RangeError); |
| 69 | |
| 70 | // The standard does not say what kind of exceptions we should throw, so we |
| 71 | // will not be specific. But we still test that we throw errors. |
| 72 | assertThrows('s.normalize()'); // s is not defined |
| 73 | assertThrows('var s = null; s.normalize()'); |
| 74 | assertThrows('var s = undefined; s.normalize()'); |
| 75 | assertThrows('var s = 1234; s.normalize()'); // no normalize for non-strings |
| 76 | }(); |
| 77 | |
| 78 | |
| 79 | // Several kinds of mappings. No need to be comprehensive, we don't test |
| 80 | // the ICU functionality, we only test C - JavaScript 'glue' |
| 81 | var testData = [ |
| 82 | // org, default, NFC, NFD, NKFC, NKFD |
| 83 | ['\u00c7', // Ç : Combining sequence, Latin 1 |
| 84 | '\u00c7', '\u0043\u0327', |
| 85 | '\u00c7', '\u0043\u0327'], |
| 86 | ['\u0218', // Ș : Combining sequence, non-Latin 1 |
| 87 | '\u0218', '\u0053\u0326', |
| 88 | '\u0218', '\u0053\u0326'], |
| 89 | ['\uac00', // 가 : Hangul |
| 90 | '\uac00', '\u1100\u1161', |
| 91 | '\uac00', '\u1100\u1161'], |
| 92 | ['\uff76', // カ : Narrow Kana |
| 93 | '\uff76', '\uff76', |
| 94 | '\u30ab', '\u30ab'], |
| 95 | ['\u00bc', // ¼ : Fractions |
| 96 | '\u00bc', '\u00bc', |
| 97 | '\u0031\u2044\u0034', '\u0031\u2044\u0034'], |
| 98 | ['\u01c6', // dž : Latin ligature |
| 99 | '\u01c6', '\u01c6', |
| 100 | '\u0064\u017e', '\u0064\u007a\u030c'], |
| 101 | ['s\u0307\u0323', // s + dot above + dot below, ordering of combining marks |
| 102 | '\u1e69', 's\u0323\u0307', |
| 103 | '\u1e69', 's\u0323\u0307'], |
| 104 | ['\u3300', // ㌀ : Squared characters |
| 105 | '\u3300', '\u3300', |
| 106 | '\u30a2\u30d1\u30fc\u30c8', // アパート |
| 107 | '\u30a2\u30cf\u309a\u30fc\u30c8'], // アパート |
| 108 | ['\ufe37', // ︷ : Vertical forms |
| 109 | '\ufe37', '\ufe37', |
| 110 | '{' , '{'], |
| 111 | ['\u2079', // ⁹ : superscript 9 |
| 112 | '\u2079', '\u2079', |
| 113 | '9', '9'], |
| 114 | ['\ufee5\ufee6\ufee7\ufee8', // Arabic forms |
| 115 | '\ufee5\ufee6\ufee7\ufee8', '\ufee5\ufee6\ufee7\ufee8', |
| 116 | '\u0646\u0646\u0646\u0646', '\u0646\u0646\u0646\u0646'], |
| 117 | ['\u2460', // ① : Circled |
| 118 | '\u2460', '\u2460', |
| 119 | '1', '1'], |
| 120 | ['\u210c', // ℌ : Font variants |
| 121 | '\u210c', '\u210c', |
| 122 | 'H', 'H'], |
| 123 | ['\u2126', // Ω : Singleton, OHM sign vs. Greek capital letter OMEGA |
| 124 | '\u03a9', '\u03a9', |
| 125 | '\u03a9', '\u03a9'], |
| 126 | ['\ufdfb', // Long ligature, ARABIC LIGATURE JALLAJALALOUHOU |
| 127 | '\ufdfb', '\ufdfb', |
| 128 | '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647', |
| 129 | '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647'] |
| 130 | ]; |
| 131 | |
| 132 | var testArray = function() { |
| 133 | var kNFC = 1, kNFD = 2, kNFKC = 3, kNFKD = 4; |
| 134 | for (var i = 0; i < testData.length; ++i) { |
| 135 | // the original, NFC and NFD should normalize to the same thing |
| 136 | for (var column = 0; column < 3; ++column) { |
| 137 | var str = testData[i][column]; |
| 138 | assertEquals(str.normalize(), testData[i][kNFC]); // defaults to NFC |
| 139 | assertEquals(str.normalize('NFC'), testData[i][kNFC]); |
| 140 | assertEquals(str.normalize('NFD'), testData[i][kNFD]); |
| 141 | assertEquals(str.normalize('NFKC'), testData[i][kNFKC]); |
| 142 | assertEquals(str.normalize('NFKD'), testData[i][kNFKD]); |
| 143 | } |
| 144 | } |
| 145 | }(); |