blob: a12eb7d9134a2612350992efadf063e3a7093ae5 [file] [log] [blame]
Fredrik Roubert0596fae2017-04-18 21:34:02 +02001// © 2016 and later: Unicode, Inc. and others.
Fredrik Roubert64339d32016-10-21 19:43:16 +02002// License & terms of use: http://www.unicode.org/copyright.html
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -07003/*
4**********************************************************************
ccorneliusf9878a22014-11-20 18:09:39 -08005* Copyright (c) 2002-2014, International Business Machines
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -07006* Corporation and others. All Rights Reserved.
7**********************************************************************
8* Author: Alan Liu
9* Created: October 30 2002
10* Since: ICU 2.4
clairehob26ce3a2012-01-10 17:54:41 -080011* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -070012**********************************************************************
13*/
14#include "propname.h"
15#include "unicode/uchar.h"
16#include "unicode/udata.h"
Craig Cornelius83a171d2012-10-09 17:03:29 -070017#include "unicode/uscript.h"
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -070018#include "umutex.h"
19#include "cmemory.h"
20#include "cstring.h"
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -070021#include "uarrsort.h"
clairehob26ce3a2012-01-10 17:54:41 -080022#include "uinvchar.h"
23
24#define INCLUDED_FROM_PROPNAME_CPP
25#include "propname_data.h"
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -070026
27U_CDECL_BEGIN
28
29/**
30 * Get the next non-ignorable ASCII character from a property name
31 * and lowercases it.
32 * @return ((advance count for the name)<<8)|character
33 */
34static inline int32_t
35getASCIIPropertyNameChar(const char *name) {
36 int32_t i;
37 char c;
38
39 /* Ignore delimiters '-', '_', and ASCII White_Space */
40 for(i=0;
41 (c=name[i++])==0x2d || c==0x5f ||
42 c==0x20 || (0x09<=c && c<=0x0d);
43 ) {}
44
45 if(c!=0) {
46 return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
47 } else {
48 return i<<8;
49 }
50}
51
52/**
53 * Get the next non-ignorable EBCDIC character from a property name
54 * and lowercases it.
55 * @return ((advance count for the name)<<8)|character
56 */
57static inline int32_t
58getEBCDICPropertyNameChar(const char *name) {
59 int32_t i;
60 char c;
61
62 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
63 for(i=0;
64 (c=name[i++])==0x60 || c==0x6d ||
65 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
66 ) {}
67
68 if(c!=0) {
69 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
70 } else {
71 return i<<8;
72 }
73}
74
75/**
76 * Unicode property names and property value names are compared "loosely".
77 *
78 * UCD.html 4.0.1 says:
79 * For all property names, property value names, and for property values for
80 * Enumerated, Binary, or Catalog properties, use the following
81 * loose matching rule:
82 *
83 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
84 *
85 * This function does just that, for (char *) name strings.
86 * It is almost identical to ucnv_compareNames() but also ignores
87 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
88 *
89 * @internal
90 */
91
92U_CAPI int32_t U_EXPORT2
93uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
94 int32_t rc, r1, r2;
95
96 for(;;) {
97 r1=getASCIIPropertyNameChar(name1);
98 r2=getASCIIPropertyNameChar(name2);
99
100 /* If we reach the ends of both strings then they match */
101 if(((r1|r2)&0xff)==0) {
102 return 0;
103 }
clairehob26ce3a2012-01-10 17:54:41 -0800104
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700105 /* Compare the lowercased characters */
106 if(r1!=r2) {
107 rc=(r1&0xff)-(r2&0xff);
108 if(rc!=0) {
109 return rc;
110 }
111 }
112
113 name1+=r1>>8;
114 name2+=r2>>8;
115 }
116}
117
118U_CAPI int32_t U_EXPORT2
119uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
120 int32_t rc, r1, r2;
121
122 for(;;) {
123 r1=getEBCDICPropertyNameChar(name1);
124 r2=getEBCDICPropertyNameChar(name2);
125
126 /* If we reach the ends of both strings then they match */
127 if(((r1|r2)&0xff)==0) {
128 return 0;
129 }
clairehob26ce3a2012-01-10 17:54:41 -0800130
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700131 /* Compare the lowercased characters */
132 if(r1!=r2) {
133 rc=(r1&0xff)-(r2&0xff);
134 if(rc!=0) {
135 return rc;
136 }
137 }
138
139 name1+=r1>>8;
140 name2+=r2>>8;
141 }
142}
143
144U_CDECL_END
145
146U_NAMESPACE_BEGIN
147
clairehob26ce3a2012-01-10 17:54:41 -0800148int32_t PropNameData::findProperty(int32_t property) {
149 int32_t i=1; // valueMaps index, initially after numRanges
150 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
151 // Read and skip the start and limit of this range.
152 int32_t start=valueMaps[i];
153 int32_t limit=valueMaps[i+1];
154 i+=2;
155 if(property<start) {
156 break;
157 }
158 if(property<limit) {
159 return i+(property-start)*2;
160 }
161 i+=(limit-start)*2; // Skip all entries for this range.
162 }
163 return 0;
164}
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700165
clairehob26ce3a2012-01-10 17:54:41 -0800166int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
167 if(valueMapIndex==0) {
168 return 0; // The property does not have named values.
169 }
170 ++valueMapIndex; // Skip the BytesTrie offset.
171 int32_t numRanges=valueMaps[valueMapIndex++];
172 if(numRanges<0x10) {
173 // Ranges of values.
174 for(; numRanges>0; --numRanges) {
175 // Read and skip the start and limit of this range.
176 int32_t start=valueMaps[valueMapIndex];
177 int32_t limit=valueMaps[valueMapIndex+1];
178 valueMapIndex+=2;
179 if(value<start) {
180 break;
181 }
182 if(value<limit) {
183 return valueMaps[valueMapIndex+value-start];
184 }
185 valueMapIndex+=limit-start; // Skip all entries for this range.
186 }
187 } else {
188 // List of values.
189 int32_t valuesStart=valueMapIndex;
190 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
191 do {
192 int32_t v=valueMaps[valueMapIndex];
193 if(value<v) {
194 break;
195 }
196 if(value==v) {
197 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
198 }
199 } while(++valueMapIndex<nameGroupOffsetsStart);
200 }
201 return 0;
202}
203
204const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
205 int32_t numNames=*nameGroup++;
206 if(nameIndex<0 || numNames<=nameIndex) {
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700207 return NULL;
208 }
clairehob26ce3a2012-01-10 17:54:41 -0800209 // Skip nameIndex names.
210 for(; nameIndex>0; --nameIndex) {
211 nameGroup=uprv_strchr(nameGroup, 0)+1;
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700212 }
clairehob26ce3a2012-01-10 17:54:41 -0800213 if(*nameGroup==0) {
214 return NULL; // no name (Property[Value]Aliases.txt has "n/a")
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700215 }
clairehob26ce3a2012-01-10 17:54:41 -0800216 return nameGroup;
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700217}
218
clairehob26ce3a2012-01-10 17:54:41 -0800219UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
220 if(name==NULL) {
221 return FALSE;
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700222 }
clairehob26ce3a2012-01-10 17:54:41 -0800223 UStringTrieResult result=USTRINGTRIE_NO_VALUE;
224 char c;
225 while((c=*name++)!=0) {
226 c=uprv_invCharToLowercaseAscii(c);
227 // Ignore delimiters '-', '_', and ASCII White_Space.
228 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
229 continue;
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700230 }
clairehob26ce3a2012-01-10 17:54:41 -0800231 if(!USTRINGTRIE_HAS_NEXT(result)) {
232 return FALSE;
233 }
234 result=trie.next((uint8_t)c);
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700235 }
clairehob26ce3a2012-01-10 17:54:41 -0800236 return USTRINGTRIE_HAS_VALUE(result);
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700237}
238
clairehob26ce3a2012-01-10 17:54:41 -0800239const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
240 int32_t valueMapIndex=findProperty(property);
241 if(valueMapIndex==0) {
242 return NULL; // Not a known property.
243 }
244 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700245}
246
clairehob26ce3a2012-01-10 17:54:41 -0800247const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
248 int32_t valueMapIndex=findProperty(property);
249 if(valueMapIndex==0) {
250 return NULL; // Not a known property.
251 }
252 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
253 if(nameGroupOffset==0) {
254 return NULL;
255 }
256 return getName(nameGroups+nameGroupOffset, nameChoice);
257}
258
259int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
260 BytesTrie trie(bytesTries+bytesTrieOffset);
261 if(containsName(trie, alias)) {
262 return trie.getValue();
263 } else {
264 return UCHAR_INVALID_CODE;
265 }
266}
267
268int32_t PropNameData::getPropertyEnum(const char *alias) {
269 return getPropertyOrValueEnum(0, alias);
270}
271
272int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
273 int32_t valueMapIndex=findProperty(property);
274 if(valueMapIndex==0) {
275 return UCHAR_INVALID_CODE; // Not a known property.
276 }
277 valueMapIndex=valueMaps[valueMapIndex+1];
278 if(valueMapIndex==0) {
279 return UCHAR_INVALID_CODE; // The property does not have named values.
280 }
281 // valueMapIndex is the start of the property's valueMap,
282 // where the first word is the BytesTrie offset.
283 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
284}
285U_NAMESPACE_END
286
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700287//----------------------------------------------------------------------
288// Public API implementation
289
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700290U_CAPI const char* U_EXPORT2
291u_getPropertyName(UProperty property,
292 UPropertyNameChoice nameChoice) {
clairehob26ce3a2012-01-10 17:54:41 -0800293 U_NAMESPACE_USE
294 return PropNameData::getPropertyName(property, nameChoice);
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700295}
296
297U_CAPI UProperty U_EXPORT2
298u_getPropertyEnum(const char* alias) {
clairehob26ce3a2012-01-10 17:54:41 -0800299 U_NAMESPACE_USE
300 return (UProperty)PropNameData::getPropertyEnum(alias);
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700301}
302
303U_CAPI const char* U_EXPORT2
304u_getPropertyValueName(UProperty property,
305 int32_t value,
306 UPropertyNameChoice nameChoice) {
clairehob26ce3a2012-01-10 17:54:41 -0800307 U_NAMESPACE_USE
308 return PropNameData::getPropertyValueName(property, value, nameChoice);
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700309}
310
311U_CAPI int32_t U_EXPORT2
312u_getPropertyValueEnum(UProperty property,
313 const char* alias) {
clairehob26ce3a2012-01-10 17:54:41 -0800314 U_NAMESPACE_USE
315 return PropNameData::getPropertyValueEnum(property, alias);
Jean-Baptiste Querub13da9d2009-07-17 17:53:22 -0700316}
Craig Cornelius83a171d2012-10-09 17:03:29 -0700317
318U_CAPI const char* U_EXPORT2
319uscript_getName(UScriptCode scriptCode){
320 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
321 U_LONG_PROPERTY_NAME);
322}
323
324U_CAPI const char* U_EXPORT2
325uscript_getShortName(UScriptCode scriptCode){
326 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
327 U_SHORT_PROPERTY_NAME);
328}