blob: 14974e8ac44195fcd90b7d133d3d50e439bfb7b7 [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5
6#ifdef V8_I18N_SUPPORT
Emily Bernierd0a1eb72015-03-24 16:35:39 -04007#include "src/runtime/runtime-utils.h"
8
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00009#include "src/api.h"
10#include "src/api-natives.h"
11#include "src/arguments.h"
12#include "src/factory.h"
13#include "src/i18n.h"
14#include "src/isolate-inl.h"
15#include "src/messages.h"
16
Emily Bernierd0a1eb72015-03-24 16:35:39 -040017#include "unicode/brkiter.h"
18#include "unicode/calendar.h"
19#include "unicode/coll.h"
20#include "unicode/curramt.h"
21#include "unicode/datefmt.h"
22#include "unicode/dcfmtsym.h"
23#include "unicode/decimfmt.h"
24#include "unicode/dtfmtsym.h"
25#include "unicode/dtptngen.h"
26#include "unicode/locid.h"
Ben Murdochc5610432016-08-08 18:44:38 +010027#include "unicode/normalizer2.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040028#include "unicode/numfmt.h"
29#include "unicode/numsys.h"
30#include "unicode/rbbi.h"
31#include "unicode/smpdtfmt.h"
32#include "unicode/timezone.h"
Ben Murdochc5610432016-08-08 18:44:38 +010033#include "unicode/translit.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040034#include "unicode/uchar.h"
35#include "unicode/ucol.h"
36#include "unicode/ucurr.h"
37#include "unicode/uloc.h"
Ben Murdochc5610432016-08-08 18:44:38 +010038#include "unicode/unistr.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040039#include "unicode/unum.h"
40#include "unicode/uversion.h"
41
42
43namespace v8 {
44namespace internal {
Ben Murdochc5610432016-08-08 18:44:38 +010045namespace {
46
47const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
48 base::SmartArrayPointer<uc16>* dest,
49 int32_t length) {
50 DCHECK(flat.IsFlat());
51 if (flat.IsOneByte()) {
52 if (dest->is_empty()) {
53 dest->Reset(NewArray<uc16>(length));
54 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
55 }
56 return reinterpret_cast<const UChar*>(dest->get());
57 } else {
58 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
59 }
60}
61
62} // namespace
Emily Bernierd0a1eb72015-03-24 16:35:39 -040063
64RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
65 HandleScope scope(isolate);
66 Factory* factory = isolate->factory();
67
68 DCHECK(args.length() == 1);
69 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
70
71 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
72
73 // Return value which denotes invalid language tag.
74 const char* const kInvalidTag = "invalid-tag";
75
76 UErrorCode error = U_ZERO_ERROR;
77 char icu_result[ULOC_FULLNAME_CAPACITY];
78 int icu_length = 0;
79
80 uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
81 &icu_length, &error);
82 if (U_FAILURE(error) || icu_length == 0) {
83 return *factory->NewStringFromAsciiChecked(kInvalidTag);
84 }
85
86 char result[ULOC_FULLNAME_CAPACITY];
87
88 // Force strict BCP47 rules.
89 uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
90
91 if (U_FAILURE(error)) {
92 return *factory->NewStringFromAsciiChecked(kInvalidTag);
93 }
94
95 return *factory->NewStringFromAsciiChecked(result);
96}
97
98
99RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
100 HandleScope scope(isolate);
101 Factory* factory = isolate->factory();
102
103 DCHECK(args.length() == 1);
104 CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
105
106 const icu::Locale* available_locales = NULL;
107 int32_t count = 0;
108
109 if (service->IsUtf8EqualTo(CStrVector("collator"))) {
110 available_locales = icu::Collator::getAvailableLocales(count);
111 } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
112 available_locales = icu::NumberFormat::getAvailableLocales(count);
113 } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
114 available_locales = icu::DateFormat::getAvailableLocales(count);
115 } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
116 available_locales = icu::BreakIterator::getAvailableLocales(count);
117 }
118
119 UErrorCode error = U_ZERO_ERROR;
120 char result[ULOC_FULLNAME_CAPACITY];
121 Handle<JSObject> locales = factory->NewJSObject(isolate->object_function());
122
123 for (int32_t i = 0; i < count; ++i) {
124 const char* icu_name = available_locales[i].getName();
125
126 error = U_ZERO_ERROR;
127 // No need to force strict BCP47 rules.
128 uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
129 if (U_FAILURE(error)) {
130 // This shouldn't happen, but lets not break the user.
131 continue;
132 }
133
134 RETURN_FAILURE_ON_EXCEPTION(
135 isolate, JSObject::SetOwnPropertyIgnoreAttributes(
136 locales, factory->NewStringFromAsciiChecked(result),
137 factory->NewNumber(i), NONE));
138 }
139
140 return *locales;
141}
142
143
144RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
145 HandleScope scope(isolate);
146 Factory* factory = isolate->factory();
147
148 DCHECK(args.length() == 0);
149
150 icu::Locale default_locale;
151
152 // Set the locale
153 char result[ULOC_FULLNAME_CAPACITY];
154 UErrorCode status = U_ZERO_ERROR;
155 uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
156 FALSE, &status);
157 if (U_SUCCESS(status)) {
158 return *factory->NewStringFromAsciiChecked(result);
159 }
160
161 return *factory->NewStringFromStaticChars("und");
162}
163
164
165RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
166 HandleScope scope(isolate);
167 Factory* factory = isolate->factory();
168
169 DCHECK(args.length() == 1);
170
171 CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
172
173 uint32_t length = static_cast<uint32_t>(input->length()->Number());
174 // Set some limit to prevent fuzz tests from going OOM.
175 // Can be bumped when callers' requirements change.
176 RUNTIME_ASSERT(length < 100);
177 Handle<FixedArray> output = factory->NewFixedArray(length);
178 Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
179 Handle<Name> base = factory->NewStringFromStaticChars("base");
180 for (unsigned int i = 0; i < length; ++i) {
181 Handle<Object> locale_id;
Ben Murdochda12d292016-06-02 14:46:10 +0100182 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
183 isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400184 if (!locale_id->IsString()) {
185 return isolate->Throw(*factory->illegal_argument_string());
186 }
187
188 v8::String::Utf8Value utf8_locale_id(
189 v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
190
191 UErrorCode error = U_ZERO_ERROR;
192
193 // Convert from BCP47 to ICU format.
194 // de-DE-u-co-phonebk -> de_DE@collation=phonebook
195 char icu_locale[ULOC_FULLNAME_CAPACITY];
196 int icu_locale_length = 0;
197 uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
198 &icu_locale_length, &error);
199 if (U_FAILURE(error) || icu_locale_length == 0) {
200 return isolate->Throw(*factory->illegal_argument_string());
201 }
202
203 // Maximize the locale.
204 // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
205 char icu_max_locale[ULOC_FULLNAME_CAPACITY];
206 uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
207 &error);
208
209 // Remove extensions from maximized locale.
210 // de_Latn_DE@collation=phonebook -> de_Latn_DE
211 char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
212 uloc_getBaseName(icu_max_locale, icu_base_max_locale,
213 ULOC_FULLNAME_CAPACITY, &error);
214
215 // Get original name without extensions.
216 // de_DE@collation=phonebook -> de_DE
217 char icu_base_locale[ULOC_FULLNAME_CAPACITY];
218 uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
219 &error);
220
221 // Convert from ICU locale format to BCP47 format.
222 // de_Latn_DE -> de-Latn-DE
223 char base_max_locale[ULOC_FULLNAME_CAPACITY];
224 uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
225 ULOC_FULLNAME_CAPACITY, FALSE, &error);
226
227 // de_DE -> de-DE
228 char base_locale[ULOC_FULLNAME_CAPACITY];
229 uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
230 FALSE, &error);
231
232 if (U_FAILURE(error)) {
233 return isolate->Throw(*factory->illegal_argument_string());
234 }
235
236 Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
237 Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
238 JSObject::AddProperty(result, maximized, value, NONE);
239 value = factory->NewStringFromAsciiChecked(base_locale);
240 JSObject::AddProperty(result, base, value, NONE);
241 output->set(i, *result);
242 }
243
244 Handle<JSArray> result = factory->NewJSArrayWithElements(output);
245 result->set_length(Smi::FromInt(length));
246 return *result;
247}
248
249
250RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
251 HandleScope scope(isolate);
252
253 DCHECK(args.length() == 1);
254
255 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
256
257 if (!input->IsJSObject()) return isolate->heap()->false_value();
258 Handle<JSObject> obj = Handle<JSObject>::cast(input);
259
260 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000261 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400262 return isolate->heap()->ToBoolean(!tag->IsUndefined());
263}
264
265
266RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
267 HandleScope scope(isolate);
268
269 DCHECK(args.length() == 2);
270
271 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
272 CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
273
274 if (!input->IsJSObject()) return isolate->heap()->false_value();
275 Handle<JSObject> obj = Handle<JSObject>::cast(input);
276
277 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000278 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400279 return isolate->heap()->ToBoolean(tag->IsString() &&
280 String::cast(*tag)->Equals(*expected_type));
281}
282
283
284RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
285 HandleScope scope(isolate);
286
287 DCHECK(args.length() == 3);
288
289 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
290 CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
291 CONVERT_ARG_HANDLE_CHECKED(JSObject, impl, 2);
292
293 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
294 JSObject::SetProperty(input, marker, type, STRICT).Assert();
295
296 marker = isolate->factory()->intl_impl_object_symbol();
297 JSObject::SetProperty(input, marker, impl, STRICT).Assert();
298
299 return isolate->heap()->undefined_value();
300}
301
302
303RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject) {
304 HandleScope scope(isolate);
305
306 DCHECK(args.length() == 1);
307
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000308 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400309
310 if (!input->IsJSObject()) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000311 THROW_NEW_ERROR_RETURN_FAILURE(
312 isolate, NewTypeError(MessageTemplate::kNotIntlObject, input));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400313 }
314
315 Handle<JSObject> obj = Handle<JSObject>::cast(input);
316
317 Handle<Symbol> marker = isolate->factory()->intl_impl_object_symbol();
318
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000319 Handle<Object> impl = JSReceiver::GetDataProperty(obj, marker);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400320 if (impl->IsTheHole()) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000321 THROW_NEW_ERROR_RETURN_FAILURE(
322 isolate, NewTypeError(MessageTemplate::kNotIntlObject, obj));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400323 }
324 return *impl;
325}
326
327
328RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
329 HandleScope scope(isolate);
330
331 DCHECK(args.length() == 3);
332
333 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
334 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
335 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
336
337 Handle<ObjectTemplateInfo> date_format_template = I18N::GetTemplate(isolate);
338
339 // Create an empty object wrapper.
340 Handle<JSObject> local_object;
341 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
342 isolate, local_object,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000343 ApiNatives::InstantiateObject(date_format_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400344
345 // Set date time formatter as internal field of the resulting JS object.
346 icu::SimpleDateFormat* date_format =
347 DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved);
348
349 if (!date_format) return isolate->ThrowIllegalOperation();
350
351 local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
352
353 Factory* factory = isolate->factory();
354 Handle<String> key = factory->NewStringFromStaticChars("dateFormat");
355 Handle<String> value = factory->NewStringFromStaticChars("valid");
356 JSObject::AddProperty(local_object, key, value, NONE);
357
358 // Make object handle weak so we can delete the data format once GC kicks in.
359 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100360 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
361 DateFormat::DeleteDateFormat,
362 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400363 return *local_object;
364}
365
366
367RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
368 HandleScope scope(isolate);
369
370 DCHECK(args.length() == 2);
371
372 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
373 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
374
375 Handle<Object> value;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000376 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400377
378 icu::SimpleDateFormat* date_format =
379 DateFormat::UnpackDateFormat(isolate, date_format_holder);
380 if (!date_format) return isolate->ThrowIllegalOperation();
381
382 icu::UnicodeString result;
383 date_format->format(value->Number(), result);
384
385 Handle<String> result_str;
386 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
387 isolate, result_str,
388 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
389 reinterpret_cast<const uint16_t*>(result.getBuffer()),
390 result.length())));
391 return *result_str;
392}
393
394
395RUNTIME_FUNCTION(Runtime_InternalDateParse) {
396 HandleScope scope(isolate);
397
398 DCHECK(args.length() == 2);
399
400 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
401 CONVERT_ARG_HANDLE_CHECKED(String, date_string, 1);
402
403 v8::String::Utf8Value utf8_date(v8::Utils::ToLocal(date_string));
404 icu::UnicodeString u_date(icu::UnicodeString::fromUTF8(*utf8_date));
405 icu::SimpleDateFormat* date_format =
406 DateFormat::UnpackDateFormat(isolate, date_format_holder);
407 if (!date_format) return isolate->ThrowIllegalOperation();
408
409 UErrorCode status = U_ZERO_ERROR;
410 UDate date = date_format->parse(u_date, status);
411 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
412
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000413 Handle<JSDate> result;
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400414 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000415 isolate, result,
416 JSDate::New(isolate->date_function(), isolate->date_function(),
417 static_cast<double>(date)));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400418 return *result;
419}
420
421
422RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
423 HandleScope scope(isolate);
424
425 DCHECK(args.length() == 3);
426
427 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
428 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
429 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
430
431 Handle<ObjectTemplateInfo> number_format_template =
432 I18N::GetTemplate(isolate);
433
434 // Create an empty object wrapper.
435 Handle<JSObject> local_object;
436 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
437 isolate, local_object,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000438 ApiNatives::InstantiateObject(number_format_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400439
440 // Set number formatter as internal field of the resulting JS object.
441 icu::DecimalFormat* number_format =
442 NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
443
444 if (!number_format) return isolate->ThrowIllegalOperation();
445
446 local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
447
448 Factory* factory = isolate->factory();
449 Handle<String> key = factory->NewStringFromStaticChars("numberFormat");
450 Handle<String> value = factory->NewStringFromStaticChars("valid");
451 JSObject::AddProperty(local_object, key, value, NONE);
452
453 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100454 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
455 NumberFormat::DeleteNumberFormat,
456 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400457 return *local_object;
458}
459
460
461RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
462 HandleScope scope(isolate);
463
464 DCHECK(args.length() == 2);
465
466 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
467 CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
468
469 Handle<Object> value;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000470 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400471
472 icu::DecimalFormat* number_format =
473 NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
474 if (!number_format) return isolate->ThrowIllegalOperation();
475
476 icu::UnicodeString result;
477 number_format->format(value->Number(), result);
478
479 Handle<String> result_str;
480 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
481 isolate, result_str,
482 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
483 reinterpret_cast<const uint16_t*>(result.getBuffer()),
484 result.length())));
485 return *result_str;
486}
487
488
489RUNTIME_FUNCTION(Runtime_InternalNumberParse) {
490 HandleScope scope(isolate);
491
492 DCHECK(args.length() == 2);
493
494 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
495 CONVERT_ARG_HANDLE_CHECKED(String, number_string, 1);
496
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000497 isolate->CountUsage(v8::Isolate::UseCounterFeature::kIntlV8Parse);
498
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400499 v8::String::Utf8Value utf8_number(v8::Utils::ToLocal(number_string));
500 icu::UnicodeString u_number(icu::UnicodeString::fromUTF8(*utf8_number));
501 icu::DecimalFormat* number_format =
502 NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
503 if (!number_format) return isolate->ThrowIllegalOperation();
504
505 UErrorCode status = U_ZERO_ERROR;
506 icu::Formattable result;
507 // ICU 4.6 doesn't support parseCurrency call. We need to wait for ICU49
508 // to be part of Chrome.
509 // TODO(cira): Include currency parsing code using parseCurrency call.
510 // We need to check if the formatter parses all currencies or only the
511 // one it was constructed with (it will impact the API - how to return ISO
512 // code and the value).
513 number_format->parse(u_number, result, status);
514 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
515
516 switch (result.getType()) {
517 case icu::Formattable::kDouble:
518 return *isolate->factory()->NewNumber(result.getDouble());
519 case icu::Formattable::kLong:
520 return *isolate->factory()->NewNumberFromInt(result.getLong());
521 case icu::Formattable::kInt64:
522 return *isolate->factory()->NewNumber(
523 static_cast<double>(result.getInt64()));
524 default:
525 return isolate->heap()->undefined_value();
526 }
527}
528
529
530RUNTIME_FUNCTION(Runtime_CreateCollator) {
531 HandleScope scope(isolate);
532
533 DCHECK(args.length() == 3);
534
535 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
536 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
537 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
538
539 Handle<ObjectTemplateInfo> collator_template = I18N::GetTemplate(isolate);
540
541 // Create an empty object wrapper.
542 Handle<JSObject> local_object;
543 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000544 isolate, local_object, ApiNatives::InstantiateObject(collator_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400545
546 // Set collator as internal field of the resulting JS object.
547 icu::Collator* collator =
548 Collator::InitializeCollator(isolate, locale, options, resolved);
549
550 if (!collator) return isolate->ThrowIllegalOperation();
551
552 local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
553
554 Factory* factory = isolate->factory();
555 Handle<String> key = factory->NewStringFromStaticChars("collator");
556 Handle<String> value = factory->NewStringFromStaticChars("valid");
557 JSObject::AddProperty(local_object, key, value, NONE);
558
559 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100560 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
561 Collator::DeleteCollator,
562 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400563 return *local_object;
564}
565
566
567RUNTIME_FUNCTION(Runtime_InternalCompare) {
568 HandleScope scope(isolate);
569
570 DCHECK(args.length() == 3);
571
572 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
573 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
574 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
575
576 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
577 if (!collator) return isolate->ThrowIllegalOperation();
578
Ben Murdochc5610432016-08-08 18:44:38 +0100579 string1 = String::Flatten(string1);
580 string2 = String::Flatten(string2);
581 DisallowHeapAllocation no_gc;
582 int32_t length1 = string1->length();
583 int32_t length2 = string2->length();
584 String::FlatContent flat1 = string1->GetFlatContent();
585 String::FlatContent flat2 = string2->GetFlatContent();
586 base::SmartArrayPointer<uc16> sap1;
587 base::SmartArrayPointer<uc16> sap2;
588 const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1);
589 const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400590 UErrorCode status = U_ZERO_ERROR;
591 UCollationResult result =
Ben Murdochc5610432016-08-08 18:44:38 +0100592 collator->compare(string_val1, length1, string_val2, length2, status);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400593 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
594
595 return *isolate->factory()->NewNumberFromInt(result);
596}
597
598
599RUNTIME_FUNCTION(Runtime_StringNormalize) {
600 HandleScope scope(isolate);
Ben Murdochc5610432016-08-08 18:44:38 +0100601 static const struct {
602 const char* name;
603 UNormalization2Mode mode;
604 } normalizationForms[] = {
605 {"nfc", UNORM2_COMPOSE},
606 {"nfc", UNORM2_DECOMPOSE},
607 {"nfkc", UNORM2_COMPOSE},
608 {"nfkc", UNORM2_DECOMPOSE},
609 };
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400610
611 DCHECK(args.length() == 2);
612
Ben Murdochc5610432016-08-08 18:44:38 +0100613 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400614 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
615 RUNTIME_ASSERT(form_id >= 0 &&
616 static_cast<size_t>(form_id) < arraysize(normalizationForms));
617
Ben Murdochc5610432016-08-08 18:44:38 +0100618 int length = s->length();
619 s = String::Flatten(s);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400620 icu::UnicodeString result;
Ben Murdochc5610432016-08-08 18:44:38 +0100621 base::SmartArrayPointer<uc16> sap;
622 UErrorCode status = U_ZERO_ERROR;
623 {
624 DisallowHeapAllocation no_gc;
625 String::FlatContent flat = s->GetFlatContent();
626 const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
627 icu::UnicodeString input(false, src, length);
628 // Getting a singleton. Should not free it.
629 const icu::Normalizer2* normalizer =
630 icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
631 normalizationForms[form_id].mode, status);
632 DCHECK(U_SUCCESS(status));
633 RUNTIME_ASSERT(normalizer != nullptr);
634 int32_t normalized_prefix_length =
635 normalizer->spanQuickCheckYes(input, status);
636 // Quick return if the input is already normalized.
637 if (length == normalized_prefix_length) return *s;
638 icu::UnicodeString unnormalized =
639 input.tempSubString(normalized_prefix_length);
640 // Read-only alias of the normalized prefix.
641 result.setTo(false, input.getBuffer(), normalized_prefix_length);
642 // copy-on-write; normalize the suffix and append to |result|.
643 normalizer->normalizeSecondAndAppend(result, unnormalized, status);
644 }
645
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400646 if (U_FAILURE(status)) {
647 return isolate->heap()->undefined_value();
648 }
649
650 Handle<String> result_str;
651 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
652 isolate, result_str,
653 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
654 reinterpret_cast<const uint16_t*>(result.getBuffer()),
655 result.length())));
656 return *result_str;
657}
658
659
660RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
661 HandleScope scope(isolate);
662
663 DCHECK(args.length() == 3);
664
665 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
666 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
667 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
668
669 Handle<ObjectTemplateInfo> break_iterator_template =
670 I18N::GetTemplate2(isolate);
671
672 // Create an empty object wrapper.
673 Handle<JSObject> local_object;
674 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
675 isolate, local_object,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000676 ApiNatives::InstantiateObject(break_iterator_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400677
678 // Set break iterator as internal field of the resulting JS object.
679 icu::BreakIterator* break_iterator = BreakIterator::InitializeBreakIterator(
680 isolate, locale, options, resolved);
681
682 if (!break_iterator) return isolate->ThrowIllegalOperation();
683
684 local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator));
685 // Make sure that the pointer to adopted text is NULL.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000686 local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400687
688 Factory* factory = isolate->factory();
689 Handle<String> key = factory->NewStringFromStaticChars("breakIterator");
690 Handle<String> value = factory->NewStringFromStaticChars("valid");
691 JSObject::AddProperty(local_object, key, value, NONE);
692
693 // Make object handle weak so we can delete the break iterator once GC kicks
694 // in.
695 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100696 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
697 BreakIterator::DeleteBreakIterator,
698 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400699 return *local_object;
700}
701
702
703RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
704 HandleScope scope(isolate);
705
706 DCHECK(args.length() == 2);
707
708 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
709 CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
710
711 icu::BreakIterator* break_iterator =
712 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
713 if (!break_iterator) return isolate->ThrowIllegalOperation();
714
715 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
716 break_iterator_holder->GetInternalField(1));
717 delete u_text;
718
Ben Murdochc5610432016-08-08 18:44:38 +0100719 int length = text->length();
720 text = String::Flatten(text);
721 DisallowHeapAllocation no_gc;
722 String::FlatContent flat = text->GetFlatContent();
723 base::SmartArrayPointer<uc16> sap;
724 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
725 u_text = new icu::UnicodeString(text_value, length);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400726 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
727
728 break_iterator->setText(*u_text);
729
730 return isolate->heap()->undefined_value();
731}
732
733
734RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
735 HandleScope scope(isolate);
736
737 DCHECK(args.length() == 1);
738
739 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
740
741 icu::BreakIterator* break_iterator =
742 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
743 if (!break_iterator) return isolate->ThrowIllegalOperation();
744
745 return *isolate->factory()->NewNumberFromInt(break_iterator->first());
746}
747
748
749RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
750 HandleScope scope(isolate);
751
752 DCHECK(args.length() == 1);
753
754 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
755
756 icu::BreakIterator* break_iterator =
757 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
758 if (!break_iterator) return isolate->ThrowIllegalOperation();
759
760 return *isolate->factory()->NewNumberFromInt(break_iterator->next());
761}
762
763
764RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
765 HandleScope scope(isolate);
766
767 DCHECK(args.length() == 1);
768
769 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
770
771 icu::BreakIterator* break_iterator =
772 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
773 if (!break_iterator) return isolate->ThrowIllegalOperation();
774
775 return *isolate->factory()->NewNumberFromInt(break_iterator->current());
776}
777
778
779RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
780 HandleScope scope(isolate);
781
782 DCHECK(args.length() == 1);
783
784 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
785
786 icu::BreakIterator* break_iterator =
787 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
788 if (!break_iterator) return isolate->ThrowIllegalOperation();
789
790 // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
791 icu::RuleBasedBreakIterator* rule_based_iterator =
792 static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
793 int32_t status = rule_based_iterator->getRuleStatus();
794 // Keep return values in sync with JavaScript BreakType enum.
795 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
796 return *isolate->factory()->NewStringFromStaticChars("none");
797 } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
798 return *isolate->factory()->number_string();
799 } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
800 return *isolate->factory()->NewStringFromStaticChars("letter");
801 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
802 return *isolate->factory()->NewStringFromStaticChars("kana");
803 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
804 return *isolate->factory()->NewStringFromStaticChars("ideo");
805 } else {
806 return *isolate->factory()->NewStringFromStaticChars("unknown");
807 }
808}
Ben Murdochc5610432016-08-08 18:44:38 +0100809
810namespace {
811void ConvertCaseWithTransliterator(icu::UnicodeString* input,
812 const char* transliterator_id) {
813 UErrorCode status = U_ZERO_ERROR;
814 base::SmartPointer<icu::Transliterator> translit(
815 icu::Transliterator::createInstance(
816 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD,
817 status));
818 if (U_FAILURE(status)) return;
819 translit->transliterate(*input);
820}
821
822MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
823 bool is_to_upper, const char* lang) {
824 int32_t src_length = s->length();
825
826 // Greek uppercasing has to be done via transliteration.
827 // TODO(jshin): Drop this special-casing once ICU's regular case conversion
828 // API supports Greek uppercasing. See
829 // http://bugs.icu-project.org/trac/ticket/10582 .
830 // In the meantime, if there's no Greek character in |s|, call this
831 // function again with the root locale (lang="").
832 // ICU's C API for transliteration is nasty and we just use C++ API.
833 if (V8_UNLIKELY(is_to_upper && lang[0] == 'e' && lang[1] == 'l')) {
834 icu::UnicodeString converted;
835 base::SmartArrayPointer<uc16> sap;
836 {
837 DisallowHeapAllocation no_gc;
838 String::FlatContent flat = s->GetFlatContent();
839 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
840 // Starts with the source string (read-only alias with copy-on-write
841 // semantics) and will be modified to contain the converted result.
842 // Using read-only alias at first saves one copy operation if
843 // transliteration does not change the input, which is rather rare.
844 // Moreover, transliteration takes rather long so that saving one copy
845 // helps only a little bit.
846 converted.setTo(false, src, src_length);
847 ConvertCaseWithTransliterator(&converted, "el-Upper");
848 // If no change is made, just return |s|.
849 if (converted.getBuffer() == src) return *s;
850 }
851 Handle<String> result;
852 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
853 isolate, result,
854 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
855 reinterpret_cast<const uint16_t*>(converted.getBuffer()),
856 converted.length())));
857 return *result;
858 }
859
860 auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
861
862 int32_t dest_length = src_length;
863 UErrorCode status;
864 Handle<SeqTwoByteString> result;
865 base::SmartArrayPointer<uc16> sap;
866
867 // This is not a real loop. It'll be executed only once (no overflow) or
868 // twice (overflow).
869 for (int i = 0; i < 2; ++i) {
870 result =
871 isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked();
872 DisallowHeapAllocation no_gc;
873 String::FlatContent flat = s->GetFlatContent();
874 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
875 status = U_ZERO_ERROR;
876 dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
877 dest_length, src, src_length, lang, &status);
878 if (status != U_BUFFER_OVERFLOW_ERROR) break;
879 }
880
881 // In most cases, the output will fill the destination buffer completely
882 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
883 // Only in rare cases, it'll be shorter than the destination buffer and
884 // |result| has to be truncated.
885 DCHECK(U_SUCCESS(status));
886 if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
887 DCHECK(dest_length == result->length());
888 return *result;
889 }
890 if (U_SUCCESS(status)) {
891 DCHECK(dest_length < result->length());
892 return *Handle<SeqTwoByteString>::cast(
893 SeqString::Truncate(result, dest_length));
894 }
895 return *s;
896}
897
898inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
899
900const uint8_t kToLower[256] = {
901 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
902 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
903 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
904 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
905 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
906 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
907 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
908 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
909 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
910 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
911 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
912 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
913 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
914 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
915 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
916 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
917 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
918 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
919 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
920 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
921 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
922 0xFC, 0xFD, 0xFE, 0xFF,
923};
924
925inline uint16_t ToLatin1Lower(uint16_t ch) {
926 return static_cast<uint16_t>(kToLower[ch]);
927}
928
929inline uint16_t ToASCIIUpper(uint16_t ch) {
930 return ch & ~((ch >= 'a' && ch <= 'z') << 5);
931}
932
933// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
934inline uint16_t ToLatin1Upper(uint16_t ch) {
935 DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
936 return ch &
937 ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xE7))
938 << 5);
939}
940
941template <typename Char>
942bool ToUpperFastASCII(const Vector<const Char>& src,
943 Handle<SeqOneByteString> result) {
944 // Do a faster loop for the case where all the characters are ASCII.
945 uint16_t ored = 0;
946 int32_t index = 0;
947 for (auto it = src.begin(); it != src.end(); ++it) {
948 uint16_t ch = static_cast<uint16_t>(*it);
949 ored |= ch;
950 result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
951 }
952 return !(ored & ~0x7F);
953}
954
955const uint16_t sharp_s = 0xDF;
956
957template <typename Char>
958bool ToUpperOneByte(const Vector<const Char>& src,
959 Handle<SeqOneByteString> result, int* sharp_s_count) {
960 // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
961
962 // There are two special cases.
963 // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
964 // 2. Lower case sharp-S converts to "SS" (two characters)
965 *sharp_s_count = 0;
966 int32_t index = 0;
967 for (auto it = src.begin(); it != src.end(); ++it) {
968 uint16_t ch = static_cast<uint16_t>(*it);
969 if (V8_UNLIKELY(ch == sharp_s)) {
970 ++(*sharp_s_count);
971 continue;
972 }
973 if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
974 // Since this upper-cased character does not fit in an 8-bit string, we
975 // need to take the 16-bit path.
976 return false;
977 }
978 result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
979 }
980
981 return true;
982}
983
984template <typename Char>
985void ToUpperWithSharpS(const Vector<const Char>& src,
986 Handle<SeqOneByteString> result) {
987 int32_t dest_index = 0;
988 for (auto it = src.begin(); it != src.end(); ++it) {
989 uint16_t ch = static_cast<uint16_t>(*it);
990 if (ch == sharp_s) {
991 result->SeqOneByteStringSet(dest_index++, 'S');
992 result->SeqOneByteStringSet(dest_index++, 'S');
993 } else {
994 result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
995 }
996 }
997}
998
999} // namespace
1000
1001RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
1002 HandleScope scope(isolate);
1003 DCHECK_EQ(args.length(), 1);
1004 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1005
1006 int length = s->length();
1007 s = String::Flatten(s);
1008 // First scan the string for uppercase and non-ASCII characters:
1009 if (s->HasOnlyOneByteChars()) {
1010 unsigned first_index_to_lower = length;
1011 for (int index = 0; index < length; ++index) {
1012 // Blink specializes this path for one-byte strings, so it
1013 // does not need to do a generic get, but can do the equivalent
1014 // of SeqOneByteStringGet.
1015 uint16_t ch = s->Get(index);
1016 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
1017 first_index_to_lower = index;
1018 break;
1019 }
1020 }
1021
1022 // Nothing to do if the string is all ASCII with no uppercase.
1023 if (first_index_to_lower == length) return *s;
1024
1025 // We depend here on the invariant that the length of a Latin1
1026 // string is invariant under ToLowerCase, and the result always
1027 // fits in the Latin1 range in the *root locale*. It does not hold
1028 // for ToUpperCase even in the root locale.
1029 Handle<SeqOneByteString> result;
1030 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1031 isolate, result, isolate->factory()->NewRawOneByteString(length));
1032
1033 DisallowHeapAllocation no_gc;
1034 String::FlatContent flat = s->GetFlatContent();
1035 if (flat.IsOneByte()) {
1036 const uint8_t* src = flat.ToOneByteVector().start();
1037 CopyChars(result->GetChars(), src, first_index_to_lower);
1038 for (int index = first_index_to_lower; index < length; ++index) {
1039 uint16_t ch = static_cast<uint16_t>(src[index]);
1040 result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1041 }
1042 } else {
1043 const uint16_t* src = flat.ToUC16Vector().start();
1044 CopyChars(result->GetChars(), src, first_index_to_lower);
1045 for (int index = first_index_to_lower; index < length; ++index) {
1046 uint16_t ch = src[index];
1047 result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1048 }
1049 }
1050
1051 return *result;
1052 }
1053
1054 // Blink had an additional case here for ASCII 2-byte strings, but
1055 // that is subsumed by the above code (assuming there isn't a false
1056 // negative for HasOnlyOneByteChars).
1057
1058 // Do a slower implementation for cases that include non-ASCII characters.
1059 return LocaleConvertCase(s, isolate, false, "");
1060}
1061
1062RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
1063 HandleScope scope(isolate);
1064 DCHECK_EQ(args.length(), 1);
1065 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1066
1067 // This function could be optimized for no-op cases the way lowercase
1068 // counterpart is, but in empirical testing, few actual calls to upper()
1069 // are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
1070
1071 int32_t length = s->length();
1072 s = String::Flatten(s);
1073
1074 if (s->HasOnlyOneByteChars()) {
1075 Handle<SeqOneByteString> result;
1076 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1077 isolate, result, isolate->factory()->NewRawOneByteString(length));
1078
1079 int sharp_s_count;
1080 bool is_result_single_byte;
1081 {
1082 DisallowHeapAllocation no_gc;
1083 String::FlatContent flat = s->GetFlatContent();
1084 // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
1085 // could be removed because ToUpperOneByte is pretty fast now (it
1086 // does not call ICU API any more.).
1087 if (flat.IsOneByte()) {
1088 Vector<const uint8_t> src = flat.ToOneByteVector();
1089 if (ToUpperFastASCII(src, result)) return *result;
1090 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1091 } else {
1092 DCHECK(flat.IsTwoByte());
1093 Vector<const uint16_t> src = flat.ToUC16Vector();
1094 if (ToUpperFastASCII(src, result)) return *result;
1095 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1096 }
1097 }
1098
1099 // Go to the full Unicode path if there are characters whose uppercase
1100 // is beyond the Latin-1 range (cannot be represented in OneByteString).
1101 if (V8_UNLIKELY(!is_result_single_byte)) {
1102 return LocaleConvertCase(s, isolate, true, "");
1103 }
1104
1105 if (sharp_s_count == 0) return *result;
1106
1107 // We have sharp_s_count sharp-s characters, but the result is still
1108 // in the Latin-1 range.
1109 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1110 isolate, result,
1111 isolate->factory()->NewRawOneByteString(length + sharp_s_count));
1112 DisallowHeapAllocation no_gc;
1113 String::FlatContent flat = s->GetFlatContent();
1114 if (flat.IsOneByte()) {
1115 ToUpperWithSharpS(flat.ToOneByteVector(), result);
1116 } else {
1117 ToUpperWithSharpS(flat.ToUC16Vector(), result);
1118 }
1119
1120 return *result;
1121 }
1122
1123 return LocaleConvertCase(s, isolate, true, "");
1124}
1125
1126RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
1127 HandleScope scope(isolate);
1128 DCHECK_EQ(args.length(), 3);
1129 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1130 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
1131 CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2);
1132
1133 // All the languages requiring special handling ("az", "el", "lt", "tr")
1134 // have a 2-letter language code.
1135 DCHECK(lang->length() == 2);
1136 uint8_t lang_str[3];
1137 memcpy(lang_str, lang->GetChars(), 2);
1138 lang_str[2] = 0;
1139 s = String::Flatten(s);
1140 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1141 // in the root locale needs to be adjusted for az, lt and tr because even case
1142 // mapping of ASCII range characters are different in those locales.
1143 // Greek (el) does not require any adjustment, though.
1144 return LocaleConvertCase(s, isolate, is_upper,
1145 reinterpret_cast<const char*>(lang_str));
1146}
1147
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001148} // namespace internal
1149} // namespace v8
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001150
1151#endif // V8_I18N_SUPPORT