J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Portions Copyright 2005 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved |
| 31 | * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved |
| 32 | * |
| 33 | * The original version of this source code and documentation |
| 34 | * is copyrighted and owned by Taligent, Inc., a wholly-owned |
| 35 | * subsidiary of IBM. These materials are provided under terms |
| 36 | * of a License Agreement between Taligent and Sun. This technology |
| 37 | * is protected by multiple US and International patents. |
| 38 | * |
| 39 | * This notice and attribution to Taligent may not be removed. |
| 40 | * Taligent is a registered trademark of Taligent, Inc. |
| 41 | * |
| 42 | */ |
| 43 | |
| 44 | package sun.text.resources; |
| 45 | |
| 46 | import java.util.ListResourceBundle; |
| 47 | |
| 48 | public class CollationData_th extends ListResourceBundle { |
| 49 | |
| 50 | protected final Object[][] getContents() { |
| 51 | return new Object[][] { |
| 52 | { "Rule", |
| 53 | "! " // First turn on the SE Asian Vowel/Consonant |
| 54 | // swapping rule |
| 55 | + "& Z " // Put in all of the consonants, after Z |
| 56 | + "< \u0E01 " // KO KAI |
| 57 | + "< \u0E02 " // KHO KHAI |
| 58 | + "< \u0E03 " // KHO KHUAT |
| 59 | + "< \u0E04 " // KHO KHWAI |
| 60 | + "< \u0E05 " // KHO KHON |
| 61 | + "< \u0E06 " // KHO RAKHANG |
| 62 | + "< \u0E07 " // NGO NGU |
| 63 | + "< \u0E08 " // CHO CHAN |
| 64 | + "< \u0E09 " // CHO CHING |
| 65 | + "< \u0E0A " // CHO CHANG |
| 66 | + "< \u0E0B " // SO SO |
| 67 | + "< \u0E0C " // CHO CHOE |
| 68 | + "< \u0E0D " // YO YING |
| 69 | + "< \u0E0E " // DO CHADA |
| 70 | + "< \u0E0F " // TO PATAK |
| 71 | + "< \u0E10 " // THO THAN |
| 72 | + "< \u0E11 " // THO NANGMONTHO |
| 73 | + "< \u0E12 " // THO PHUTHAO |
| 74 | + "< \u0E13 " // NO NEN |
| 75 | + "< \u0E14 " // DO DEK |
| 76 | + "< \u0E15 " // TO TAO |
| 77 | + "< \u0E16 " // THO THUNG |
| 78 | + "< \u0E17 " // THO THAHAN |
| 79 | + "< \u0E18 " // THO THONG |
| 80 | + "< \u0E19 " // NO NU |
| 81 | + "< \u0E1A " // BO BAIMAI |
| 82 | + "< \u0E1B " // PO PLA |
| 83 | + "< \u0E1C " // PHO PHUNG |
| 84 | + "< \u0E1D " // FO FA |
| 85 | + "< \u0E1E " // PHO PHAN |
| 86 | + "< \u0E1F " // FO FAN |
| 87 | + "< \u0E20 " // PHO SAMPHAO |
| 88 | + "< \u0E21 " // MO MA |
| 89 | + "< \u0E22 " // YO YAK |
| 90 | + "< \u0E23 " // RO RUA |
| 91 | + "< \u0E24 " // RU |
| 92 | + "< \u0E25 " // LO LING |
| 93 | + "< \u0E26 " // LU |
| 94 | + "< \u0E27 " // WO WAEN |
| 95 | + "< \u0E28 " // SO SALA |
| 96 | + "< \u0E29 " // SO RUSI |
| 97 | + "< \u0E2A " // SO SUA |
| 98 | + "< \u0E2B " // HO HIP |
| 99 | + "< \u0E2C " // LO CHULA |
| 100 | + "< \u0E2D " // O ANG |
| 101 | + "< \u0E2E " // HO NOKHUK |
| 102 | |
| 103 | // |
| 104 | // Normal vowels |
| 105 | // |
| 106 | + "< \u0E30 " // SARA A |
| 107 | + "< \u0E31 " // MAI HAN-AKAT |
| 108 | + "< \u0E32 " // SARA AA |
| 109 | |
| 110 | // Normalizer will decompose this character to \u0e4d\u0e32. This is |
| 111 | // a Bad Thing, because we want the separate characters to sort |
| 112 | // differently than this individual one. Since there's no public way to |
| 113 | // set the decomposition to be used when creating a collator, there's |
| 114 | // no way around this right now. |
| 115 | // It's best to go ahead and leave the character in, because it occurs |
| 116 | // this way a lot more often than it occurs as separate characters. |
| 117 | + "< \u0E33 " // SARA AM |
| 118 | |
| 119 | + "< \u0E34 " // SARA I |
| 120 | |
| 121 | + "< \u0E35 " // SARA II |
| 122 | + "< \u0E36 " // SARA UE |
| 123 | + "< \u0E37 " // SARA UEE |
| 124 | + "< \u0E38 " // SARA U |
| 125 | + "< \u0E39 " // SARA UU |
| 126 | |
| 127 | // |
| 128 | // Preceding vowels |
| 129 | // |
| 130 | + "< \u0E40 " // SARA E |
| 131 | + "< \u0E41 " // SARA AE |
| 132 | + "< \u0E42 " // SARA O |
| 133 | + "< \u0E43 " // SARA AI MAIMUAN |
| 134 | + "< \u0E44 " // SARA AI MAIMALAI |
| 135 | |
| 136 | // |
| 137 | // Digits |
| 138 | // |
| 139 | + "< \u0E50 " // DIGIT ZERO |
| 140 | + "< \u0E51 " // DIGIT ONE |
| 141 | + "< \u0E52 " // DIGIT TWO |
| 142 | + "< \u0E53 " // DIGIT THREE |
| 143 | + "< \u0E54 " // DIGIT FOUR |
| 144 | + "< \u0E55 " // DIGIT FIVE |
| 145 | + "< \u0E56 " // DIGIT SIX |
| 146 | + "< \u0E57 " // DIGIT SEVEN |
| 147 | + "< \u0E58 " // DIGIT EIGHT |
| 148 | + "< \u0E59 " // DIGIT NINE |
| 149 | |
| 150 | // Sorta tonal marks, but maybe not really |
| 151 | + "< \u0E4D " // NIKHAHIT |
| 152 | |
| 153 | // |
| 154 | // Thai symbols are supposed to sort "after white space". |
| 155 | // I'm treating this as making them sort just after the normal Latin-1 |
| 156 | // symbols, which are in turn after the white space. |
| 157 | // |
| 158 | + "&'\u007d'" // right-brace |
| 159 | + "< \u0E2F " // PAIYANNOI (ellipsis, abbreviation) |
| 160 | + "< \u0E46 " // MAIYAMOK |
| 161 | + "< \u0E4F " // FONGMAN |
| 162 | + "< \u0E5A " // ANGKHANKHU |
| 163 | + "< \u0E5B " // KHOMUT |
| 164 | + "< \u0E3F " // CURRENCY SYMBOL BAHT |
| 165 | |
| 166 | // These symbols are supposed to be "after all characters" |
| 167 | + "< \u0E4E " // YAMAKKAN |
| 168 | |
| 169 | // This rare symbol also comes after all characters. But when it is |
| 170 | // used in combination with RU and LU, the combination is treated as |
| 171 | // a separate letter, ala "CH" sorting after "C" in traditional Spanish. |
| 172 | + "< \u0E45 " // LAKKHANGYAO |
| 173 | + "& \u0E24 < \u0E24\u0E45 " |
| 174 | + "& \u0E26 < \u0E26\u0E45 " |
| 175 | |
| 176 | // Tonal marks are primary ignorables but are treated as secondary |
| 177 | // differences |
| 178 | + "& \u0301 " // acute accent |
| 179 | + "; \u0E47 " // MAITAIKHU |
| 180 | + "; \u0E48 " // MAI EK |
| 181 | + "; \u0E49 " // MAI THO |
| 182 | + "; \u0E4A " // MAI TRI |
| 183 | + "; \u0E4B " // MAI CHATTAWA |
| 184 | + "; \u0E4C " // THANTHAKHAT |
| 185 | |
| 186 | |
| 187 | // These are supposed to be ignored, so I'm treating them as controls |
| 188 | + "& \u0001 " |
| 189 | + "= \u0E3A " // PHINTHU |
| 190 | + "= '.' " // period |
| 191 | } |
| 192 | }; |
| 193 | } |
| 194 | } |