blob: 9b378a57a407fa02bdaeda64443c70dc76947632 [file] [log] [blame]
Fredrik Roubert64339d32016-10-21 19:43:16 +02001// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
claireho50294ea2010-05-03 15:44:48 -07003/*
4*******************************************************************************
5*
Fredrik Roubert8de051c2016-03-10 13:13:27 +01006* Copyright (C) 1997-2016, International Business Machines
claireho50294ea2010-05-03 15:44:48 -07007* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: loclikely.cpp
11* encoding: US-ASCII
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2010feb25
16* created by: Markus W. Scherer
17*
18* Code for likely and minimized locale subtags, separated out from other .cpp files
19* that then do not depend on resource bundle code and likely-subtags data.
20*/
21
22#include "unicode/utypes.h"
ccorneliusf9878a22014-11-20 18:09:39 -080023#include "unicode/locid.h"
claireho50294ea2010-05-03 15:44:48 -070024#include "unicode/putil.h"
25#include "unicode/uloc.h"
26#include "unicode/ures.h"
ccorneliusf9878a22014-11-20 18:09:39 -080027#include "unicode/uscript.h"
claireho50294ea2010-05-03 15:44:48 -070028#include "cmemory.h"
29#include "cstring.h"
30#include "ulocimp.h"
31#include "ustr_imp.h"
32
33/**
34 * This function looks for the localeID in the likelySubtags resource.
35 *
36 * @param localeID The tag to find.
37 * @param buffer A buffer to hold the matching entry
38 * @param bufferLength The length of the output buffer
39 * @return A pointer to "buffer" if found, or a null pointer if not.
40 */
41static const char* U_CALLCONV
42findLikelySubtags(const char* localeID,
43 char* buffer,
44 int32_t bufferLength,
45 UErrorCode* err) {
46 const char* result = NULL;
47
48 if (!U_FAILURE(*err)) {
49 int32_t resLen = 0;
50 const UChar* s = NULL;
51 UErrorCode tmpErr = U_ZERO_ERROR;
52 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
53 if (U_SUCCESS(tmpErr)) {
54 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
55
56 if (U_FAILURE(tmpErr)) {
57 /*
58 * If a resource is missing, it's not really an error, it's
59 * just that we don't have any data for that particular locale ID.
60 */
61 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
62 *err = tmpErr;
63 }
64 }
65 else if (resLen >= bufferLength) {
66 /* The buffer should never overflow. */
67 *err = U_INTERNAL_PROGRAM_ERROR;
68 }
69 else {
70 u_UCharsToChars(s, buffer, resLen + 1);
71 result = buffer;
72 }
73
74 ures_close(subtags);
75 } else {
76 *err = tmpErr;
77 }
78 }
79
80 return result;
81}
82
83/**
84 * Append a tag to a buffer, adding the separator if necessary. The buffer
85 * must be large enough to contain the resulting tag plus any separator
86 * necessary. The tag must not be a zero-length string.
87 *
88 * @param tag The tag to add.
89 * @param tagLength The length of the tag.
90 * @param buffer The output buffer.
91 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
92 **/
93static void U_CALLCONV
94appendTag(
95 const char* tag,
96 int32_t tagLength,
97 char* buffer,
98 int32_t* bufferLength) {
99
100 if (*bufferLength > 0) {
101 buffer[*bufferLength] = '_';
102 ++(*bufferLength);
103 }
104
105 uprv_memmove(
106 &buffer[*bufferLength],
107 tag,
108 tagLength);
109
110 *bufferLength += tagLength;
111}
112
113/**
114 * These are the canonical strings for unknown languages, scripts and regions.
115 **/
116static const char* const unknownLanguage = "und";
117static const char* const unknownScript = "Zzzz";
118static const char* const unknownRegion = "ZZ";
119
120/**
121 * Create a tag string from the supplied parameters. The lang, script and region
122 * parameters may be NULL pointers. If they are, their corresponding length parameters
123 * must be less than or equal to 0.
124 *
125 * If any of the language, script or region parameters are empty, and the alternateTags
126 * parameter is not NULL, it will be parsed for potential language, script and region tags
127 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
128 * it contains no language tag, the default tag for the unknown language is used.
129 *
130 * If the length of the new string exceeds the capacity of the output buffer,
131 * the function copies as many bytes to the output buffer as it can, and returns
132 * the error U_BUFFER_OVERFLOW_ERROR.
133 *
134 * If an illegal argument is provided, the function returns the error
135 * U_ILLEGAL_ARGUMENT_ERROR.
136 *
137 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
138 * the tag string fits in the output buffer, but the null terminator doesn't.
139 *
140 * @param lang The language tag to use.
141 * @param langLength The length of the language tag.
142 * @param script The script tag to use.
143 * @param scriptLength The length of the script tag.
144 * @param region The region tag to use.
145 * @param regionLength The length of the region tag.
146 * @param trailing Any trailing data to append to the new tag.
147 * @param trailingLength The length of the trailing data.
148 * @param alternateTags A string containing any alternate tags.
149 * @param tag The output buffer.
150 * @param tagCapacity The capacity of the output buffer.
151 * @param err A pointer to a UErrorCode for error reporting.
152 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
153 **/
154static int32_t U_CALLCONV
155createTagStringWithAlternates(
156 const char* lang,
157 int32_t langLength,
158 const char* script,
159 int32_t scriptLength,
160 const char* region,
161 int32_t regionLength,
162 const char* trailing,
163 int32_t trailingLength,
164 const char* alternateTags,
165 char* tag,
166 int32_t tagCapacity,
167 UErrorCode* err) {
168
169 if (U_FAILURE(*err)) {
170 goto error;
171 }
172 else if (tag == NULL ||
173 tagCapacity <= 0 ||
174 langLength >= ULOC_LANG_CAPACITY ||
175 scriptLength >= ULOC_SCRIPT_CAPACITY ||
176 regionLength >= ULOC_COUNTRY_CAPACITY) {
177 goto error;
178 }
179 else {
180 /**
181 * ULOC_FULLNAME_CAPACITY will provide enough capacity
182 * that we can build a string that contains the language,
183 * script and region code without worrying about overrunning
184 * the user-supplied buffer.
185 **/
186 char tagBuffer[ULOC_FULLNAME_CAPACITY];
187 int32_t tagLength = 0;
188 int32_t capacityRemaining = tagCapacity;
189 UBool regionAppended = FALSE;
190
191 if (langLength > 0) {
192 appendTag(
193 lang,
194 langLength,
195 tagBuffer,
196 &tagLength);
197 }
198 else if (alternateTags == NULL) {
199 /*
200 * Append the value for an unknown language, if
201 * we found no language.
202 */
203 appendTag(
204 unknownLanguage,
205 (int32_t)uprv_strlen(unknownLanguage),
206 tagBuffer,
207 &tagLength);
208 }
209 else {
210 /*
211 * Parse the alternateTags string for the language.
212 */
213 char alternateLang[ULOC_LANG_CAPACITY];
214 int32_t alternateLangLength = sizeof(alternateLang);
215
216 alternateLangLength =
217 uloc_getLanguage(
218 alternateTags,
219 alternateLang,
220 alternateLangLength,
221 err);
222 if(U_FAILURE(*err) ||
223 alternateLangLength >= ULOC_LANG_CAPACITY) {
224 goto error;
225 }
226 else if (alternateLangLength == 0) {
227 /*
228 * Append the value for an unknown language, if
229 * we found no language.
230 */
231 appendTag(
232 unknownLanguage,
233 (int32_t)uprv_strlen(unknownLanguage),
234 tagBuffer,
235 &tagLength);
236 }
237 else {
238 appendTag(
239 alternateLang,
240 alternateLangLength,
241 tagBuffer,
242 &tagLength);
243 }
244 }
245
246 if (scriptLength > 0) {
247 appendTag(
248 script,
249 scriptLength,
250 tagBuffer,
251 &tagLength);
252 }
253 else if (alternateTags != NULL) {
254 /*
255 * Parse the alternateTags string for the script.
256 */
257 char alternateScript[ULOC_SCRIPT_CAPACITY];
258
259 const int32_t alternateScriptLength =
260 uloc_getScript(
261 alternateTags,
262 alternateScript,
263 sizeof(alternateScript),
264 err);
265
266 if (U_FAILURE(*err) ||
267 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
268 goto error;
269 }
270 else if (alternateScriptLength > 0) {
271 appendTag(
272 alternateScript,
273 alternateScriptLength,
274 tagBuffer,
275 &tagLength);
276 }
277 }
278
279 if (regionLength > 0) {
280 appendTag(
281 region,
282 regionLength,
283 tagBuffer,
284 &tagLength);
285
286 regionAppended = TRUE;
287 }
288 else if (alternateTags != NULL) {
289 /*
290 * Parse the alternateTags string for the region.
291 */
292 char alternateRegion[ULOC_COUNTRY_CAPACITY];
293
294 const int32_t alternateRegionLength =
295 uloc_getCountry(
296 alternateTags,
297 alternateRegion,
298 sizeof(alternateRegion),
299 err);
300 if (U_FAILURE(*err) ||
301 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
302 goto error;
303 }
304 else if (alternateRegionLength > 0) {
305 appendTag(
306 alternateRegion,
307 alternateRegionLength,
308 tagBuffer,
309 &tagLength);
310
311 regionAppended = TRUE;
312 }
313 }
314
315 {
316 const int32_t toCopy =
317 tagLength >= tagCapacity ? tagCapacity : tagLength;
318
319 /**
320 * Copy the partial tag from our internal buffer to the supplied
321 * target.
322 **/
323 uprv_memcpy(
324 tag,
325 tagBuffer,
326 toCopy);
327
328 capacityRemaining -= toCopy;
329 }
330
331 if (trailingLength > 0) {
clairehob26ce3a2012-01-10 17:54:41 -0800332 if (*trailing != '@' && capacityRemaining > 0) {
claireho50294ea2010-05-03 15:44:48 -0700333 tag[tagLength++] = '_';
334 --capacityRemaining;
clairehob26ce3a2012-01-10 17:54:41 -0800335 if (capacityRemaining > 0 && !regionAppended) {
336 /* extra separator is required */
337 tag[tagLength++] = '_';
338 --capacityRemaining;
339 }
claireho50294ea2010-05-03 15:44:48 -0700340 }
341
342 if (capacityRemaining > 0) {
343 /*
344 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
345 * don't know if the user-supplied buffers overlap.
346 */
347 const int32_t toCopy =
348 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
349
350 uprv_memmove(
351 &tag[tagLength],
352 trailing,
353 toCopy);
354 }
355 }
356
357 tagLength += trailingLength;
358
359 return u_terminateChars(
360 tag,
361 tagCapacity,
362 tagLength,
363 err);
364 }
365
366error:
367
368 /**
369 * An overflow indicates the locale ID passed in
370 * is ill-formed. If we got here, and there was
371 * no previous error, it's an implicit overflow.
372 **/
373 if (*err == U_BUFFER_OVERFLOW_ERROR ||
374 U_SUCCESS(*err)) {
375 *err = U_ILLEGAL_ARGUMENT_ERROR;
376 }
377
378 return -1;
379}
380
381/**
382 * Create a tag string from the supplied parameters. The lang, script and region
383 * parameters may be NULL pointers. If they are, their corresponding length parameters
384 * must be less than or equal to 0. If the lang parameter is an empty string, the
385 * default value for an unknown language is written to the output buffer.
386 *
387 * If the length of the new string exceeds the capacity of the output buffer,
388 * the function copies as many bytes to the output buffer as it can, and returns
389 * the error U_BUFFER_OVERFLOW_ERROR.
390 *
391 * If an illegal argument is provided, the function returns the error
392 * U_ILLEGAL_ARGUMENT_ERROR.
393 *
394 * @param lang The language tag to use.
395 * @param langLength The length of the language tag.
396 * @param script The script tag to use.
397 * @param scriptLength The length of the script tag.
398 * @param region The region tag to use.
399 * @param regionLength The length of the region tag.
400 * @param trailing Any trailing data to append to the new tag.
401 * @param trailingLength The length of the trailing data.
402 * @param tag The output buffer.
403 * @param tagCapacity The capacity of the output buffer.
404 * @param err A pointer to a UErrorCode for error reporting.
405 * @return The length of the tag string, which may be greater than tagCapacity.
406 **/
407static int32_t U_CALLCONV
408createTagString(
409 const char* lang,
410 int32_t langLength,
411 const char* script,
412 int32_t scriptLength,
413 const char* region,
414 int32_t regionLength,
415 const char* trailing,
416 int32_t trailingLength,
417 char* tag,
418 int32_t tagCapacity,
419 UErrorCode* err)
420{
421 return createTagStringWithAlternates(
422 lang,
423 langLength,
424 script,
425 scriptLength,
426 region,
427 regionLength,
428 trailing,
429 trailingLength,
430 NULL,
431 tag,
432 tagCapacity,
433 err);
434}
435
436/**
437 * Parse the language, script, and region subtags from a tag string, and copy the
438 * results into the corresponding output parameters. The buffers are null-terminated,
439 * unless overflow occurs.
440 *
441 * The langLength, scriptLength, and regionLength parameters are input/output
442 * parameters, and must contain the capacity of their corresponding buffers on
443 * input. On output, they will contain the actual length of the buffers, not
444 * including the null terminator.
445 *
446 * If the length of any of the output subtags exceeds the capacity of the corresponding
447 * buffer, the function copies as many bytes to the output buffer as it can, and returns
448 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
449 * occurs.
450 *
451 * If an illegal argument is provided, the function returns the error
452 * U_ILLEGAL_ARGUMENT_ERROR.
453 *
454 * @param localeID The locale ID to parse.
455 * @param lang The language tag buffer.
456 * @param langLength The length of the language tag.
457 * @param script The script tag buffer.
458 * @param scriptLength The length of the script tag.
459 * @param region The region tag buffer.
460 * @param regionLength The length of the region tag.
461 * @param err A pointer to a UErrorCode for error reporting.
462 * @return The number of chars of the localeID parameter consumed.
463 **/
464static int32_t U_CALLCONV
465parseTagString(
466 const char* localeID,
467 char* lang,
468 int32_t* langLength,
469 char* script,
470 int32_t* scriptLength,
471 char* region,
472 int32_t* regionLength,
473 UErrorCode* err)
474{
475 const char* position = localeID;
476 int32_t subtagLength = 0;
477
478 if(U_FAILURE(*err) ||
479 localeID == NULL ||
480 lang == NULL ||
481 langLength == NULL ||
482 script == NULL ||
483 scriptLength == NULL ||
484 region == NULL ||
485 regionLength == NULL) {
486 goto error;
487 }
488
489 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
490 u_terminateChars(lang, *langLength, subtagLength, err);
491
492 /*
493 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
494 * to be an error, because it indicates the user-supplied tag is
495 * not well-formed.
496 */
497 if(U_FAILURE(*err)) {
498 goto error;
499 }
500
501 *langLength = subtagLength;
502
503 /*
504 * If no language was present, use the value of unknownLanguage
505 * instead. Otherwise, move past any separator.
506 */
507 if (*langLength == 0) {
508 uprv_strcpy(
509 lang,
510 unknownLanguage);
511 *langLength = (int32_t)uprv_strlen(lang);
512 }
Fredrik Roubertc0d7fcc2017-04-26 20:12:01 +0000513 if (_isIDSeparator(*position)) {
claireho50294ea2010-05-03 15:44:48 -0700514 ++position;
515 }
516
517 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
518 u_terminateChars(script, *scriptLength, subtagLength, err);
519
520 if(U_FAILURE(*err)) {
521 goto error;
522 }
523
524 *scriptLength = subtagLength;
525
526 if (*scriptLength > 0) {
527 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
528 /**
529 * If the script part is the "unknown" script, then don't return it.
530 **/
531 *scriptLength = 0;
532 }
533
534 /*
535 * Move past any separator.
536 */
537 if (_isIDSeparator(*position)) {
538 ++position;
539 }
540 }
541
542 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
543 u_terminateChars(region, *regionLength, subtagLength, err);
544
545 if(U_FAILURE(*err)) {
546 goto error;
547 }
548
549 *regionLength = subtagLength;
550
551 if (*regionLength > 0) {
552 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
553 /**
554 * If the region part is the "unknown" region, then don't return it.
555 **/
556 *regionLength = 0;
557 }
clairehob26ce3a2012-01-10 17:54:41 -0800558 } else if (*position != 0 && *position != '@') {
559 /* back up over consumed trailing separator */
560 --position;
claireho50294ea2010-05-03 15:44:48 -0700561 }
562
563exit:
564
565 return (int32_t)(position - localeID);
566
567error:
568
569 /**
570 * If we get here, we have no explicit error, it's the result of an
571 * illegal argument.
572 **/
573 if (!U_FAILURE(*err)) {
574 *err = U_ILLEGAL_ARGUMENT_ERROR;
575 }
576
577 goto exit;
578}
579
580static int32_t U_CALLCONV
581createLikelySubtagsString(
582 const char* lang,
583 int32_t langLength,
584 const char* script,
585 int32_t scriptLength,
586 const char* region,
587 int32_t regionLength,
588 const char* variants,
589 int32_t variantsLength,
590 char* tag,
591 int32_t tagCapacity,
592 UErrorCode* err)
593{
594 /**
595 * ULOC_FULLNAME_CAPACITY will provide enough capacity
596 * that we can build a string that contains the language,
597 * script and region code without worrying about overrunning
598 * the user-supplied buffer.
599 **/
600 char tagBuffer[ULOC_FULLNAME_CAPACITY];
601 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
claireho50294ea2010-05-03 15:44:48 -0700602
603 if(U_FAILURE(*err)) {
604 goto error;
605 }
606
607 /**
608 * Try the language with the script and region first.
609 **/
610 if (scriptLength > 0 && regionLength > 0) {
611
612 const char* likelySubtags = NULL;
613
Craig Cornelius103e9ff2012-10-09 17:03:29 -0700614 createTagString(
claireho50294ea2010-05-03 15:44:48 -0700615 lang,
616 langLength,
617 script,
618 scriptLength,
619 region,
620 regionLength,
621 NULL,
622 0,
623 tagBuffer,
624 sizeof(tagBuffer),
625 err);
626 if(U_FAILURE(*err)) {
627 goto error;
628 }
629
630 likelySubtags =
631 findLikelySubtags(
632 tagBuffer,
633 likelySubtagsBuffer,
634 sizeof(likelySubtagsBuffer),
635 err);
636 if(U_FAILURE(*err)) {
637 goto error;
638 }
639
640 if (likelySubtags != NULL) {
641 /* Always use the language tag from the
642 maximal string, since it may be more
643 specific than the one provided. */
644 return createTagStringWithAlternates(
645 NULL,
646 0,
647 NULL,
648 0,
649 NULL,
650 0,
651 variants,
652 variantsLength,
653 likelySubtags,
654 tag,
655 tagCapacity,
656 err);
657 }
658 }
659
660 /**
661 * Try the language with just the script.
662 **/
663 if (scriptLength > 0) {
664
665 const char* likelySubtags = NULL;
666
Craig Cornelius103e9ff2012-10-09 17:03:29 -0700667 createTagString(
claireho50294ea2010-05-03 15:44:48 -0700668 lang,
669 langLength,
670 script,
671 scriptLength,
672 NULL,
673 0,
674 NULL,
675 0,
676 tagBuffer,
677 sizeof(tagBuffer),
678 err);
679 if(U_FAILURE(*err)) {
680 goto error;
681 }
682
683 likelySubtags =
684 findLikelySubtags(
685 tagBuffer,
686 likelySubtagsBuffer,
687 sizeof(likelySubtagsBuffer),
688 err);
689 if(U_FAILURE(*err)) {
690 goto error;
691 }
692
693 if (likelySubtags != NULL) {
694 /* Always use the language tag from the
695 maximal string, since it may be more
696 specific than the one provided. */
697 return createTagStringWithAlternates(
698 NULL,
699 0,
700 NULL,
701 0,
702 region,
703 regionLength,
704 variants,
705 variantsLength,
706 likelySubtags,
707 tag,
708 tagCapacity,
709 err);
710 }
711 }
712
713 /**
714 * Try the language with just the region.
715 **/
716 if (regionLength > 0) {
717
718 const char* likelySubtags = NULL;
719
720 createTagString(
721 lang,
722 langLength,
723 NULL,
724 0,
725 region,
726 regionLength,
727 NULL,
728 0,
729 tagBuffer,
730 sizeof(tagBuffer),
731 err);
732 if(U_FAILURE(*err)) {
733 goto error;
734 }
735
736 likelySubtags =
737 findLikelySubtags(
738 tagBuffer,
739 likelySubtagsBuffer,
740 sizeof(likelySubtagsBuffer),
741 err);
742 if(U_FAILURE(*err)) {
743 goto error;
744 }
745
746 if (likelySubtags != NULL) {
747 /* Always use the language tag from the
748 maximal string, since it may be more
749 specific than the one provided. */
750 return createTagStringWithAlternates(
751 NULL,
752 0,
753 script,
754 scriptLength,
755 NULL,
756 0,
757 variants,
758 variantsLength,
759 likelySubtags,
760 tag,
761 tagCapacity,
762 err);
763 }
764 }
765
766 /**
767 * Finally, try just the language.
768 **/
769 {
770 const char* likelySubtags = NULL;
771
772 createTagString(
773 lang,
774 langLength,
775 NULL,
776 0,
777 NULL,
778 0,
779 NULL,
780 0,
781 tagBuffer,
782 sizeof(tagBuffer),
783 err);
784 if(U_FAILURE(*err)) {
785 goto error;
786 }
787
788 likelySubtags =
789 findLikelySubtags(
790 tagBuffer,
791 likelySubtagsBuffer,
792 sizeof(likelySubtagsBuffer),
793 err);
794 if(U_FAILURE(*err)) {
795 goto error;
796 }
797
798 if (likelySubtags != NULL) {
799 /* Always use the language tag from the
800 maximal string, since it may be more
801 specific than the one provided. */
802 return createTagStringWithAlternates(
803 NULL,
804 0,
805 script,
806 scriptLength,
807 region,
808 regionLength,
809 variants,
810 variantsLength,
811 likelySubtags,
812 tag,
813 tagCapacity,
814 err);
815 }
816 }
817
818 return u_terminateChars(
819 tag,
820 tagCapacity,
821 0,
822 err);
823
824error:
825
826 if (!U_FAILURE(*err)) {
827 *err = U_ILLEGAL_ARGUMENT_ERROR;
828 }
829
830 return -1;
831}
832
833#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
834 { int32_t count = 0; \
835 int32_t i; \
836 for (i = 0; i < trailingLength; i++) { \
837 if (trailing[i] == '-' || trailing[i] == '_') { \
838 count = 0; \
839 if (count > 8) { \
840 goto error; \
841 } \
842 } else if (trailing[i] == '@') { \
843 break; \
844 } else if (count > 8) { \
845 goto error; \
846 } else { \
847 count++; \
848 } \
849 } \
850 }
851
852static int32_t
853_uloc_addLikelySubtags(const char* localeID,
854 char* maximizedLocaleID,
855 int32_t maximizedLocaleIDCapacity,
856 UErrorCode* err)
857{
858 char lang[ULOC_LANG_CAPACITY];
859 int32_t langLength = sizeof(lang);
860 char script[ULOC_SCRIPT_CAPACITY];
861 int32_t scriptLength = sizeof(script);
862 char region[ULOC_COUNTRY_CAPACITY];
863 int32_t regionLength = sizeof(region);
864 const char* trailing = "";
865 int32_t trailingLength = 0;
866 int32_t trailingIndex = 0;
867 int32_t resultLength = 0;
868
869 if(U_FAILURE(*err)) {
870 goto error;
871 }
872 else if (localeID == NULL ||
873 maximizedLocaleID == NULL ||
874 maximizedLocaleIDCapacity <= 0) {
875 goto error;
876 }
877
878 trailingIndex = parseTagString(
879 localeID,
880 lang,
881 &langLength,
882 script,
883 &scriptLength,
884 region,
885 &regionLength,
886 err);
887 if(U_FAILURE(*err)) {
888 /* Overflow indicates an illegal argument error */
889 if (*err == U_BUFFER_OVERFLOW_ERROR) {
890 *err = U_ILLEGAL_ARGUMENT_ERROR;
891 }
892
893 goto error;
894 }
895
896 /* Find the length of the trailing portion. */
clairehob26ce3a2012-01-10 17:54:41 -0800897 while (_isIDSeparator(localeID[trailingIndex])) {
898 trailingIndex++;
899 }
claireho50294ea2010-05-03 15:44:48 -0700900 trailing = &localeID[trailingIndex];
901 trailingLength = (int32_t)uprv_strlen(trailing);
902
903 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
904
905 resultLength =
906 createLikelySubtagsString(
907 lang,
908 langLength,
909 script,
910 scriptLength,
911 region,
912 regionLength,
913 trailing,
914 trailingLength,
915 maximizedLocaleID,
916 maximizedLocaleIDCapacity,
917 err);
918
919 if (resultLength == 0) {
920 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
921
922 /*
923 * If we get here, we need to return localeID.
924 */
925 uprv_memcpy(
926 maximizedLocaleID,
927 localeID,
928 localIDLength <= maximizedLocaleIDCapacity ?
929 localIDLength : maximizedLocaleIDCapacity);
930
931 resultLength =
932 u_terminateChars(
933 maximizedLocaleID,
934 maximizedLocaleIDCapacity,
935 localIDLength,
936 err);
937 }
938
939 return resultLength;
940
941error:
942
943 if (!U_FAILURE(*err)) {
944 *err = U_ILLEGAL_ARGUMENT_ERROR;
945 }
946
947 return -1;
948}
949
950static int32_t
951_uloc_minimizeSubtags(const char* localeID,
952 char* minimizedLocaleID,
953 int32_t minimizedLocaleIDCapacity,
954 UErrorCode* err)
955{
956 /**
957 * ULOC_FULLNAME_CAPACITY will provide enough capacity
958 * that we can build a string that contains the language,
959 * script and region code without worrying about overrunning
960 * the user-supplied buffer.
961 **/
962 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
963 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
964
965 char lang[ULOC_LANG_CAPACITY];
966 int32_t langLength = sizeof(lang);
967 char script[ULOC_SCRIPT_CAPACITY];
968 int32_t scriptLength = sizeof(script);
969 char region[ULOC_COUNTRY_CAPACITY];
970 int32_t regionLength = sizeof(region);
971 const char* trailing = "";
972 int32_t trailingLength = 0;
973 int32_t trailingIndex = 0;
974
975 if(U_FAILURE(*err)) {
976 goto error;
977 }
978 else if (localeID == NULL ||
979 minimizedLocaleID == NULL ||
980 minimizedLocaleIDCapacity <= 0) {
981 goto error;
982 }
983
984 trailingIndex =
985 parseTagString(
986 localeID,
987 lang,
988 &langLength,
989 script,
990 &scriptLength,
991 region,
992 &regionLength,
993 err);
994 if(U_FAILURE(*err)) {
995
996 /* Overflow indicates an illegal argument error */
997 if (*err == U_BUFFER_OVERFLOW_ERROR) {
998 *err = U_ILLEGAL_ARGUMENT_ERROR;
999 }
1000
1001 goto error;
1002 }
1003
clairehob26ce3a2012-01-10 17:54:41 -08001004 /* Find the spot where the variants or the keywords begin, if any. */
1005 while (_isIDSeparator(localeID[trailingIndex])) {
1006 trailingIndex++;
1007 }
claireho50294ea2010-05-03 15:44:48 -07001008 trailing = &localeID[trailingIndex];
1009 trailingLength = (int32_t)uprv_strlen(trailing);
1010
1011 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1012
1013 createTagString(
1014 lang,
1015 langLength,
1016 script,
1017 scriptLength,
1018 region,
1019 regionLength,
1020 NULL,
1021 0,
1022 maximizedTagBuffer,
1023 maximizedTagBufferLength,
1024 err);
1025 if(U_FAILURE(*err)) {
1026 goto error;
1027 }
1028
1029 /**
1030 * First, we need to first get the maximization
1031 * from AddLikelySubtags.
1032 **/
1033 maximizedTagBufferLength =
1034 uloc_addLikelySubtags(
1035 maximizedTagBuffer,
1036 maximizedTagBuffer,
1037 maximizedTagBufferLength,
1038 err);
1039
1040 if(U_FAILURE(*err)) {
1041 goto error;
1042 }
1043
1044 /**
1045 * Start first with just the language.
1046 **/
1047 {
1048 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1049
1050 const int32_t tagBufferLength =
1051 createLikelySubtagsString(
1052 lang,
1053 langLength,
1054 NULL,
1055 0,
1056 NULL,
1057 0,
1058 NULL,
1059 0,
1060 tagBuffer,
1061 sizeof(tagBuffer),
1062 err);
1063
1064 if(U_FAILURE(*err)) {
1065 goto error;
1066 }
1067 else if (uprv_strnicmp(
1068 maximizedTagBuffer,
1069 tagBuffer,
1070 tagBufferLength) == 0) {
1071
1072 return createTagString(
1073 lang,
1074 langLength,
1075 NULL,
1076 0,
1077 NULL,
1078 0,
1079 trailing,
1080 trailingLength,
1081 minimizedLocaleID,
1082 minimizedLocaleIDCapacity,
1083 err);
1084 }
1085 }
1086
1087 /**
1088 * Next, try the language and region.
1089 **/
1090 if (regionLength > 0) {
1091
1092 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1093
1094 const int32_t tagBufferLength =
1095 createLikelySubtagsString(
1096 lang,
1097 langLength,
1098 NULL,
1099 0,
1100 region,
1101 regionLength,
1102 NULL,
1103 0,
1104 tagBuffer,
1105 sizeof(tagBuffer),
1106 err);
1107
1108 if(U_FAILURE(*err)) {
1109 goto error;
1110 }
1111 else if (uprv_strnicmp(
1112 maximizedTagBuffer,
1113 tagBuffer,
1114 tagBufferLength) == 0) {
1115
1116 return createTagString(
1117 lang,
1118 langLength,
1119 NULL,
1120 0,
1121 region,
1122 regionLength,
1123 trailing,
1124 trailingLength,
1125 minimizedLocaleID,
1126 minimizedLocaleIDCapacity,
1127 err);
1128 }
1129 }
1130
1131 /**
1132 * Finally, try the language and script. This is our last chance,
1133 * since trying with all three subtags would only yield the
1134 * maximal version that we already have.
1135 **/
1136 if (scriptLength > 0 && regionLength > 0) {
1137 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1138
1139 const int32_t tagBufferLength =
1140 createLikelySubtagsString(
1141 lang,
1142 langLength,
1143 script,
1144 scriptLength,
1145 NULL,
1146 0,
1147 NULL,
1148 0,
1149 tagBuffer,
1150 sizeof(tagBuffer),
1151 err);
1152
1153 if(U_FAILURE(*err)) {
1154 goto error;
1155 }
1156 else if (uprv_strnicmp(
1157 maximizedTagBuffer,
1158 tagBuffer,
1159 tagBufferLength) == 0) {
1160
1161 return createTagString(
1162 lang,
1163 langLength,
1164 script,
1165 scriptLength,
1166 NULL,
1167 0,
1168 trailing,
1169 trailingLength,
1170 minimizedLocaleID,
1171 minimizedLocaleIDCapacity,
1172 err);
1173 }
1174 }
1175
1176 {
1177 /**
1178 * If we got here, return the locale ID parameter.
1179 **/
1180 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1181
1182 uprv_memcpy(
1183 minimizedLocaleID,
1184 localeID,
1185 localeIDLength <= minimizedLocaleIDCapacity ?
1186 localeIDLength : minimizedLocaleIDCapacity);
1187
1188 return u_terminateChars(
1189 minimizedLocaleID,
1190 minimizedLocaleIDCapacity,
1191 localeIDLength,
1192 err);
1193 }
1194
1195error:
1196
1197 if (!U_FAILURE(*err)) {
1198 *err = U_ILLEGAL_ARGUMENT_ERROR;
1199 }
1200
1201 return -1;
1202
1203
1204}
1205
1206static UBool
1207do_canonicalize(const char* localeID,
1208 char* buffer,
1209 int32_t bufferCapacity,
1210 UErrorCode* err)
1211{
1212 uloc_canonicalize(
1213 localeID,
1214 buffer,
1215 bufferCapacity,
1216 err);
1217
1218 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1219 *err == U_BUFFER_OVERFLOW_ERROR) {
1220 *err = U_ILLEGAL_ARGUMENT_ERROR;
1221
1222 return FALSE;
1223 }
1224 else if (U_FAILURE(*err)) {
1225
1226 return FALSE;
1227 }
1228 else {
1229 return TRUE;
1230 }
1231}
1232
Craig Cornelius54dcd9b2013-02-15 14:03:14 -08001233U_CAPI int32_t U_EXPORT2
claireho50294ea2010-05-03 15:44:48 -07001234uloc_addLikelySubtags(const char* localeID,
1235 char* maximizedLocaleID,
1236 int32_t maximizedLocaleIDCapacity,
1237 UErrorCode* err)
1238{
1239 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1240
1241 if (!do_canonicalize(
1242 localeID,
1243 localeBuffer,
1244 sizeof(localeBuffer),
1245 err)) {
1246 return -1;
1247 }
1248 else {
1249 return _uloc_addLikelySubtags(
1250 localeBuffer,
1251 maximizedLocaleID,
1252 maximizedLocaleIDCapacity,
1253 err);
1254 }
1255}
1256
Craig Cornelius54dcd9b2013-02-15 14:03:14 -08001257U_CAPI int32_t U_EXPORT2
claireho50294ea2010-05-03 15:44:48 -07001258uloc_minimizeSubtags(const char* localeID,
1259 char* minimizedLocaleID,
1260 int32_t minimizedLocaleIDCapacity,
1261 UErrorCode* err)
1262{
1263 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1264
1265 if (!do_canonicalize(
1266 localeID,
1267 localeBuffer,
1268 sizeof(localeBuffer),
1269 err)) {
1270 return -1;
1271 }
1272 else {
1273 return _uloc_minimizeSubtags(
1274 localeBuffer,
1275 minimizedLocaleID,
1276 minimizedLocaleIDCapacity,
1277 err);
1278 }
1279}
ccorneliusf9878a22014-11-20 18:09:39 -08001280
1281// Pairs of (language subtag, + or -) for finding out fast if common languages
1282// are LTR (minus) or RTL (plus).
1283static const char* LANG_DIR_STRING =
1284 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1285
1286// Implemented here because this calls uloc_addLikelySubtags().
1287U_CAPI UBool U_EXPORT2
1288uloc_isRightToLeft(const char *locale) {
1289 UErrorCode errorCode = U_ZERO_ERROR;
1290 char script[8];
1291 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1292 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1293 scriptLength == 0) {
1294 // Fastpath: We know the likely scripts and their writing direction
1295 // for some common languages.
1296 errorCode = U_ZERO_ERROR;
1297 char lang[8];
1298 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1299 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1300 langLength == 0) {
1301 return FALSE;
1302 }
1303 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1304 if (langPtr != NULL) {
1305 switch (langPtr[langLength]) {
1306 case '-': return FALSE;
1307 case '+': return TRUE;
1308 default: break; // partial match of a longer code
1309 }
1310 }
1311 // Otherwise, find the likely script.
1312 errorCode = U_ZERO_ERROR;
1313 char likely[ULOC_FULLNAME_CAPACITY];
1314 (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1315 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1316 return FALSE;
1317 }
1318 scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1319 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1320 scriptLength == 0) {
1321 return FALSE;
1322 }
1323 }
1324 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1325 return uscript_isRightToLeft(scriptCode);
1326}
1327
1328U_NAMESPACE_BEGIN
1329
1330UBool
1331Locale::isRightToLeft() const {
1332 return uloc_isRightToLeft(getBaseName());
1333}
1334
Fredrik Roubert64339d32016-10-21 19:43:16 +02001335U_NAMESPACE_END
1336
Fredrik Roubert8de051c2016-03-10 13:13:27 +01001337// The following must at least allow for rg key value (6) plus terminator (1).
1338#define ULOC_RG_BUFLEN 8
1339
1340U_CAPI int32_t U_EXPORT2
1341ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1342 char *region, int32_t regionCapacity, UErrorCode* status) {
1343 if (U_FAILURE(*status)) {
1344 return 0;
1345 }
1346 char rgBuf[ULOC_RG_BUFLEN];
1347 UErrorCode rgStatus = U_ZERO_ERROR;
1348
1349 // First check for rg keyword value
1350 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1351 if (U_FAILURE(rgStatus) || rgLen != 6) {
1352 rgLen = 0;
1353 } else {
1354 // rgBuf guaranteed to be zero terminated here, with text len 6
1355 char *rgPtr = rgBuf;
1356 for (; *rgPtr!= 0; rgPtr++) {
1357 *rgPtr = uprv_toupper(*rgPtr);
1358 }
1359 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1360 }
1361
1362 if (rgLen == 0) {
1363 // No valid rg keyword value, try for unicode_region_subtag
1364 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1365 if (U_FAILURE(*status)) {
1366 rgLen = 0;
1367 } else if (rgLen == 0 && inferRegion) {
1368 // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1369 char locBuf[ULOC_FULLNAME_CAPACITY];
1370 rgStatus = U_ZERO_ERROR;
1371 (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
1372 if (U_SUCCESS(rgStatus)) {
1373 rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
1374 if (U_FAILURE(*status)) {
1375 rgLen = 0;
1376 }
1377 }
1378 }
1379 }
1380
1381 rgBuf[rgLen] = 0;
1382 uprv_strncpy(region, rgBuf, regionCapacity);
1383 return u_terminateChars(region, regionCapacity, rgLen, status);
1384}
1385