blob: 5691fe9a77fba4608744418ef7f02d1f50da7b52 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2004-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*/
9
10#ifndef ULOCIMP_H
11#define ULOCIMP_H
12
13#include "unicode/bytestream.h"
14#include "unicode/uloc.h"
15
Victor Changce4bf3c2021-01-19 16:34:24 +000016#include "charstr.h"
17
Victor Chang73229502020-09-17 13:39:19 +010018/**
19 * Create an iterator over the specified keywords list
20 * @param keywordList double-null terminated list. Will be copied.
21 * @param keywordListSize size in bytes of keywordList
22 * @param status err code
23 * @return enumeration (owned by caller) of the keyword list.
24 * @internal ICU 3.0
25 */
26U_CAPI UEnumeration* U_EXPORT2
27uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
28
29/**
30 * Look up a resource bundle table item with fallback on the table level.
31 * This is accessible so it can be called by C++ code.
32 */
33U_CAPI const UChar * U_EXPORT2
34uloc_getTableStringWithFallback(
35 const char *path,
36 const char *locale,
37 const char *tableKey,
38 const char *subTableKey,
39 const char *itemKey,
40 int32_t *pLength,
41 UErrorCode *pErrorCode);
42
Victor Changce4bf3c2021-01-19 16:34:24 +000043/*returns true if a is an ID separator false otherwise*/
Victor Chang73229502020-09-17 13:39:19 +010044#define _isIDSeparator(a) (a == '_' || a == '-')
45
46U_CFUNC const char*
47uloc_getCurrentCountryID(const char* oldID);
48
49U_CFUNC const char*
50uloc_getCurrentLanguageID(const char* oldID);
51
Victor Changce4bf3c2021-01-19 16:34:24 +000052U_CFUNC void
53ulocimp_getKeywords(const char *localeID,
54 char prev,
55 icu::ByteSink& sink,
56 UBool valuesToo,
57 UErrorCode *status);
58
59icu::CharString U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +010060ulocimp_getLanguage(const char *localeID,
Victor Changce4bf3c2021-01-19 16:34:24 +000061 const char **pEnd,
62 UErrorCode &status);
Victor Chang73229502020-09-17 13:39:19 +010063
Victor Changce4bf3c2021-01-19 16:34:24 +000064icu::CharString U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +010065ulocimp_getScript(const char *localeID,
Victor Changce4bf3c2021-01-19 16:34:24 +000066 const char **pEnd,
67 UErrorCode &status);
Victor Chang73229502020-09-17 13:39:19 +010068
Victor Changce4bf3c2021-01-19 16:34:24 +000069icu::CharString U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +010070ulocimp_getCountry(const char *localeID,
Victor Changce4bf3c2021-01-19 16:34:24 +000071 const char **pEnd,
72 UErrorCode &status);
Victor Chang73229502020-09-17 13:39:19 +010073
Victor Changce4bf3c2021-01-19 16:34:24 +000074U_CAPI void U_EXPORT2
Victor Changd8aa9d52021-01-05 23:49:57 +000075ulocimp_getName(const char* localeID,
76 icu::ByteSink& sink,
77 UErrorCode* err);
78
Victor Changce4bf3c2021-01-19 16:34:24 +000079U_CAPI void U_EXPORT2
Victor Changd8aa9d52021-01-05 23:49:57 +000080ulocimp_getBaseName(const char* localeID,
81 icu::ByteSink& sink,
82 UErrorCode* err);
83
Victor Changce4bf3c2021-01-19 16:34:24 +000084U_CAPI void U_EXPORT2
Victor Changd8aa9d52021-01-05 23:49:57 +000085ulocimp_canonicalize(const char* localeID,
86 icu::ByteSink& sink,
87 UErrorCode* err);
88
Victor Changce4bf3c2021-01-19 16:34:24 +000089U_CAPI void U_EXPORT2
90ulocimp_getKeywordValue(const char* localeID,
91 const char* keywordName,
92 icu::ByteSink& sink,
93 UErrorCode* status);
94
Victor Chang73229502020-09-17 13:39:19 +010095/**
96 * Writes a well-formed language tag for this locale ID.
97 *
Victor Changce4bf3c2021-01-19 16:34:24 +000098 * **Note**: When `strict` is false, any locale fields which do not satisfy the
Victor Chang73229502020-09-17 13:39:19 +010099 * BCP47 syntax requirement will be omitted from the result. When `strict` is
Victor Changce4bf3c2021-01-19 16:34:24 +0000100 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
Victor Chang73229502020-09-17 13:39:19 +0100101 * fields do not satisfy the BCP47 syntax requirement.
102 *
103 * @param localeID the input locale ID
104 * @param sink the output sink receiving the BCP47 language
105 * tag for this Locale.
106 * @param strict boolean value indicating if the function returns
107 * an error for an ill-formed input locale ID.
108 * @param err error information if receiving the language
109 * tag failed.
110 * @return The length of the BCP47 language tag.
111 *
112 * @internal ICU 64
113 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000114U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100115ulocimp_toLanguageTag(const char* localeID,
116 icu::ByteSink& sink,
117 UBool strict,
118 UErrorCode* err);
119
120/**
121 * Returns a locale ID for the specified BCP47 language tag string.
122 * If the specified language tag contains any ill-formed subtags,
123 * the first such subtag and all following subtags are ignored.
124 * <p>
Victor Changce4bf3c2021-01-19 16:34:24 +0000125 * This implements the 'Language-Tag' production of BCP 47, and so
126 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
127 * (regular and irregular) as well as private use language tags.
128 *
129 * Private use tags are represented as 'x-whatever',
130 * and legacy tags are converted to their canonical replacements where they exist.
131 *
132 * Note that a few legacy tags have no modern replacement;
133 * these will be converted using the fallback described in
Victor Chang73229502020-09-17 13:39:19 +0100134 * the first paragraph, so some information might be lost.
Victor Changce4bf3c2021-01-19 16:34:24 +0000135 *
Victor Chang73229502020-09-17 13:39:19 +0100136 * @param langtag the input BCP47 language tag.
137 * @param tagLen the length of langtag, or -1 to call uprv_strlen().
138 * @param sink the output sink receiving a locale ID for the
139 * specified BCP47 language tag.
140 * @param parsedLength if not NULL, successfully parsed length
141 * for the input language tag is set.
142 * @param err error information if receiving the locald ID
143 * failed.
144 * @internal ICU 63
145 */
146U_CAPI void U_EXPORT2
147ulocimp_forLanguageTag(const char* langtag,
148 int32_t tagLen,
149 icu::ByteSink& sink,
150 int32_t* parsedLength,
151 UErrorCode* err);
152
153/**
154 * Get the region to use for supplemental data lookup. Uses
155 * (1) any region specified by locale tag "rg"; if none then
156 * (2) any unicode_region_tag in the locale ID; if none then
Victor Changce4bf3c2021-01-19 16:34:24 +0000157 * (3) if inferRegion is true, the region suggested by
Victor Chang73229502020-09-17 13:39:19 +0100158 * getLikelySubtags on the localeID.
159 * If no region is found, returns length 0.
160 *
161 * @param localeID
162 * The complete locale ID (with keywords) from which
163 * to get the region to use for supplemental data.
164 * @param inferRegion
Victor Changce4bf3c2021-01-19 16:34:24 +0000165 * If true, will try to infer region from localeID if
Victor Chang73229502020-09-17 13:39:19 +0100166 * no other region is found.
167 * @param region
168 * Buffer in which to put the region ID found; should
169 * have a capacity at least ULOC_COUNTRY_CAPACITY.
170 * @param regionCapacity
171 * The actual capacity of the region buffer.
172 * @param status
173 * Pointer to in/out UErrorCode value for latest status.
174 * @return
175 * The length of any region code found, or 0 if none.
176 * @internal ICU 57
177 */
178U_CAPI int32_t U_EXPORT2
179ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
180 char *region, int32_t regionCapacity, UErrorCode* status);
181
182/**
183 * Add the likely subtags for a provided locale ID, per the algorithm described
184 * in the following CLDR technical report:
185 *
186 * http://www.unicode.org/reports/tr35/#Likely_Subtags
187 *
188 * If localeID is already in the maximal form, or there is no data available
189 * for maximization, it will be copied to the output buffer. For example,
190 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
191 *
192 * Examples:
193 *
194 * "en" maximizes to "en_Latn_US"
195 *
196 * "de" maximizes to "de_Latn_US"
197 *
198 * "sr" maximizes to "sr_Cyrl_RS"
199 *
200 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
201 *
202 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
203 *
204 * @param localeID The locale to maximize
205 * @param sink The output sink receiving the maximized locale
206 * @param err Error information if maximizing the locale failed. If the length
207 * of the localeID and the null-terminator is greater than the maximum allowed size,
208 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
209 * @internal ICU 64
210 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000211U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100212ulocimp_addLikelySubtags(const char* localeID,
213 icu::ByteSink& sink,
214 UErrorCode* err);
215
216/**
217 * Minimize the subtags for a provided locale ID, per the algorithm described
218 * in the following CLDR technical report:
219 *
220 * http://www.unicode.org/reports/tr35/#Likely_Subtags
221 *
222 * If localeID is already in the minimal form, or there is no data available
223 * for minimization, it will be copied to the output buffer. Since the
224 * minimization algorithm relies on proper maximization, see the comments
225 * for ulocimp_addLikelySubtags for reasons why there might not be any data.
226 *
227 * Examples:
228 *
229 * "en_Latn_US" minimizes to "en"
230 *
231 * "de_Latn_US" minimizes to "de"
232 *
233 * "sr_Cyrl_RS" minimizes to "sr"
234 *
235 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
236 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
237 *
238 * @param localeID The locale to minimize
239 * @param sink The output sink receiving the maximized locale
240 * @param err Error information if minimizing the locale failed. If the length
241 * of the localeID and the null-terminator is greater than the maximum allowed size,
242 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
243 * @internal ICU 64
244 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000245U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100246ulocimp_minimizeSubtags(const char* localeID,
247 icu::ByteSink& sink,
248 UErrorCode* err);
249
250U_CAPI const char * U_EXPORT2
251locale_getKeywordsStart(const char *localeID);
252
253U_CFUNC UBool
254ultag_isExtensionSubtags(const char* s, int32_t len);
255
256U_CFUNC UBool
257ultag_isLanguageSubtag(const char* s, int32_t len);
258
259U_CFUNC UBool
260ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
261
262U_CFUNC UBool
263ultag_isRegionSubtag(const char* s, int32_t len);
264
265U_CFUNC UBool
266ultag_isScriptSubtag(const char* s, int32_t len);
267
268U_CFUNC UBool
269ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
270
271U_CFUNC UBool
272ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
273
274U_CFUNC UBool
275ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
276
277U_CFUNC UBool
278ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
279
280U_CFUNC UBool
281ultag_isUnicodeLocaleKey(const char* s, int32_t len);
282
283U_CFUNC UBool
284ultag_isUnicodeLocaleType(const char* s, int32_t len);
285
286U_CFUNC UBool
287ultag_isVariantSubtags(const char* s, int32_t len);
288
289U_CFUNC const char*
290ulocimp_toBcpKey(const char* key);
291
292U_CFUNC const char*
293ulocimp_toLegacyKey(const char* key);
294
295U_CFUNC const char*
296ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
297
298U_CFUNC const char*
299ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
300
Victor Changce4bf3c2021-01-19 16:34:24 +0000301/* Function for testing purpose */
302U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
303
304// Return true if the value is already canonicalized.
305U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
306
Victor Chang73229502020-09-17 13:39:19 +0100307#endif