blob: d4c6034166e3fd471a5e064bf49fe1b8550b095f [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5
6#ifdef V8_I18N_SUPPORT
Emily Bernierd0a1eb72015-03-24 16:35:39 -04007#include "src/runtime/runtime-utils.h"
8
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00009#include "src/api.h"
10#include "src/api-natives.h"
11#include "src/arguments.h"
12#include "src/factory.h"
13#include "src/i18n.h"
14#include "src/isolate-inl.h"
15#include "src/messages.h"
16
Emily Bernierd0a1eb72015-03-24 16:35:39 -040017#include "unicode/brkiter.h"
18#include "unicode/calendar.h"
19#include "unicode/coll.h"
20#include "unicode/curramt.h"
21#include "unicode/datefmt.h"
22#include "unicode/dcfmtsym.h"
23#include "unicode/decimfmt.h"
24#include "unicode/dtfmtsym.h"
25#include "unicode/dtptngen.h"
26#include "unicode/locid.h"
Ben Murdochc5610432016-08-08 18:44:38 +010027#include "unicode/normalizer2.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040028#include "unicode/numfmt.h"
29#include "unicode/numsys.h"
30#include "unicode/rbbi.h"
31#include "unicode/smpdtfmt.h"
32#include "unicode/timezone.h"
Ben Murdochc5610432016-08-08 18:44:38 +010033#include "unicode/translit.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040034#include "unicode/uchar.h"
35#include "unicode/ucol.h"
36#include "unicode/ucurr.h"
37#include "unicode/uloc.h"
Ben Murdochc5610432016-08-08 18:44:38 +010038#include "unicode/unistr.h"
Emily Bernierd0a1eb72015-03-24 16:35:39 -040039#include "unicode/unum.h"
40#include "unicode/uversion.h"
41
42
43namespace v8 {
44namespace internal {
Ben Murdochc5610432016-08-08 18:44:38 +010045namespace {
46
47const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
48 base::SmartArrayPointer<uc16>* dest,
49 int32_t length) {
50 DCHECK(flat.IsFlat());
51 if (flat.IsOneByte()) {
52 if (dest->is_empty()) {
53 dest->Reset(NewArray<uc16>(length));
54 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
55 }
56 return reinterpret_cast<const UChar*>(dest->get());
57 } else {
58 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
59 }
60}
61
62} // namespace
Emily Bernierd0a1eb72015-03-24 16:35:39 -040063
64RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
65 HandleScope scope(isolate);
66 Factory* factory = isolate->factory();
67
68 DCHECK(args.length() == 1);
69 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
70
71 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
72
73 // Return value which denotes invalid language tag.
74 const char* const kInvalidTag = "invalid-tag";
75
76 UErrorCode error = U_ZERO_ERROR;
77 char icu_result[ULOC_FULLNAME_CAPACITY];
78 int icu_length = 0;
79
80 uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
81 &icu_length, &error);
82 if (U_FAILURE(error) || icu_length == 0) {
83 return *factory->NewStringFromAsciiChecked(kInvalidTag);
84 }
85
86 char result[ULOC_FULLNAME_CAPACITY];
87
88 // Force strict BCP47 rules.
89 uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
90
91 if (U_FAILURE(error)) {
92 return *factory->NewStringFromAsciiChecked(kInvalidTag);
93 }
94
95 return *factory->NewStringFromAsciiChecked(result);
96}
97
98
99RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
100 HandleScope scope(isolate);
101 Factory* factory = isolate->factory();
102
103 DCHECK(args.length() == 1);
104 CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
105
106 const icu::Locale* available_locales = NULL;
107 int32_t count = 0;
108
109 if (service->IsUtf8EqualTo(CStrVector("collator"))) {
110 available_locales = icu::Collator::getAvailableLocales(count);
111 } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
112 available_locales = icu::NumberFormat::getAvailableLocales(count);
113 } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
114 available_locales = icu::DateFormat::getAvailableLocales(count);
115 } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
116 available_locales = icu::BreakIterator::getAvailableLocales(count);
117 }
118
119 UErrorCode error = U_ZERO_ERROR;
120 char result[ULOC_FULLNAME_CAPACITY];
121 Handle<JSObject> locales = factory->NewJSObject(isolate->object_function());
122
123 for (int32_t i = 0; i < count; ++i) {
124 const char* icu_name = available_locales[i].getName();
125
126 error = U_ZERO_ERROR;
127 // No need to force strict BCP47 rules.
128 uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
129 if (U_FAILURE(error)) {
130 // This shouldn't happen, but lets not break the user.
131 continue;
132 }
133
134 RETURN_FAILURE_ON_EXCEPTION(
135 isolate, JSObject::SetOwnPropertyIgnoreAttributes(
136 locales, factory->NewStringFromAsciiChecked(result),
137 factory->NewNumber(i), NONE));
138 }
139
140 return *locales;
141}
142
143
144RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
145 HandleScope scope(isolate);
146 Factory* factory = isolate->factory();
147
148 DCHECK(args.length() == 0);
149
150 icu::Locale default_locale;
151
152 // Set the locale
153 char result[ULOC_FULLNAME_CAPACITY];
154 UErrorCode status = U_ZERO_ERROR;
155 uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
156 FALSE, &status);
157 if (U_SUCCESS(status)) {
158 return *factory->NewStringFromAsciiChecked(result);
159 }
160
161 return *factory->NewStringFromStaticChars("und");
162}
163
164
165RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
166 HandleScope scope(isolate);
167 Factory* factory = isolate->factory();
168
169 DCHECK(args.length() == 1);
170
171 CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
172
173 uint32_t length = static_cast<uint32_t>(input->length()->Number());
174 // Set some limit to prevent fuzz tests from going OOM.
175 // Can be bumped when callers' requirements change.
176 RUNTIME_ASSERT(length < 100);
177 Handle<FixedArray> output = factory->NewFixedArray(length);
178 Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
179 Handle<Name> base = factory->NewStringFromStaticChars("base");
180 for (unsigned int i = 0; i < length; ++i) {
181 Handle<Object> locale_id;
Ben Murdochda12d292016-06-02 14:46:10 +0100182 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
183 isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400184 if (!locale_id->IsString()) {
185 return isolate->Throw(*factory->illegal_argument_string());
186 }
187
188 v8::String::Utf8Value utf8_locale_id(
189 v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
190
191 UErrorCode error = U_ZERO_ERROR;
192
193 // Convert from BCP47 to ICU format.
194 // de-DE-u-co-phonebk -> de_DE@collation=phonebook
195 char icu_locale[ULOC_FULLNAME_CAPACITY];
196 int icu_locale_length = 0;
197 uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
198 &icu_locale_length, &error);
199 if (U_FAILURE(error) || icu_locale_length == 0) {
200 return isolate->Throw(*factory->illegal_argument_string());
201 }
202
203 // Maximize the locale.
204 // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
205 char icu_max_locale[ULOC_FULLNAME_CAPACITY];
206 uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
207 &error);
208
209 // Remove extensions from maximized locale.
210 // de_Latn_DE@collation=phonebook -> de_Latn_DE
211 char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
212 uloc_getBaseName(icu_max_locale, icu_base_max_locale,
213 ULOC_FULLNAME_CAPACITY, &error);
214
215 // Get original name without extensions.
216 // de_DE@collation=phonebook -> de_DE
217 char icu_base_locale[ULOC_FULLNAME_CAPACITY];
218 uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
219 &error);
220
221 // Convert from ICU locale format to BCP47 format.
222 // de_Latn_DE -> de-Latn-DE
223 char base_max_locale[ULOC_FULLNAME_CAPACITY];
224 uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
225 ULOC_FULLNAME_CAPACITY, FALSE, &error);
226
227 // de_DE -> de-DE
228 char base_locale[ULOC_FULLNAME_CAPACITY];
229 uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
230 FALSE, &error);
231
232 if (U_FAILURE(error)) {
233 return isolate->Throw(*factory->illegal_argument_string());
234 }
235
236 Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
237 Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
238 JSObject::AddProperty(result, maximized, value, NONE);
239 value = factory->NewStringFromAsciiChecked(base_locale);
240 JSObject::AddProperty(result, base, value, NONE);
241 output->set(i, *result);
242 }
243
244 Handle<JSArray> result = factory->NewJSArrayWithElements(output);
245 result->set_length(Smi::FromInt(length));
246 return *result;
247}
248
249
250RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
251 HandleScope scope(isolate);
252
253 DCHECK(args.length() == 1);
254
255 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
256
257 if (!input->IsJSObject()) return isolate->heap()->false_value();
258 Handle<JSObject> obj = Handle<JSObject>::cast(input);
259
260 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000261 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100262 return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400263}
264
265
266RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
267 HandleScope scope(isolate);
268
269 DCHECK(args.length() == 2);
270
271 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
272 CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
273
274 if (!input->IsJSObject()) return isolate->heap()->false_value();
275 Handle<JSObject> obj = Handle<JSObject>::cast(input);
276
277 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000278 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400279 return isolate->heap()->ToBoolean(tag->IsString() &&
280 String::cast(*tag)->Equals(*expected_type));
281}
282
283
284RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
285 HandleScope scope(isolate);
286
287 DCHECK(args.length() == 3);
288
289 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
290 CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
291 CONVERT_ARG_HANDLE_CHECKED(JSObject, impl, 2);
292
293 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
294 JSObject::SetProperty(input, marker, type, STRICT).Assert();
295
296 marker = isolate->factory()->intl_impl_object_symbol();
297 JSObject::SetProperty(input, marker, impl, STRICT).Assert();
298
299 return isolate->heap()->undefined_value();
300}
301
302
303RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject) {
304 HandleScope scope(isolate);
305
306 DCHECK(args.length() == 1);
307
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000308 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400309
310 if (!input->IsJSObject()) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000311 THROW_NEW_ERROR_RETURN_FAILURE(
312 isolate, NewTypeError(MessageTemplate::kNotIntlObject, input));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400313 }
314
315 Handle<JSObject> obj = Handle<JSObject>::cast(input);
316
317 Handle<Symbol> marker = isolate->factory()->intl_impl_object_symbol();
318
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000319 Handle<Object> impl = JSReceiver::GetDataProperty(obj, marker);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100320 if (impl->IsTheHole(isolate)) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000321 THROW_NEW_ERROR_RETURN_FAILURE(
322 isolate, NewTypeError(MessageTemplate::kNotIntlObject, obj));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400323 }
324 return *impl;
325}
326
327
328RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
329 HandleScope scope(isolate);
330
331 DCHECK(args.length() == 3);
332
333 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
334 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
335 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
336
337 Handle<ObjectTemplateInfo> date_format_template = I18N::GetTemplate(isolate);
338
339 // Create an empty object wrapper.
340 Handle<JSObject> local_object;
341 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
342 isolate, local_object,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000343 ApiNatives::InstantiateObject(date_format_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400344
345 // Set date time formatter as internal field of the resulting JS object.
346 icu::SimpleDateFormat* date_format =
347 DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved);
348
349 if (!date_format) return isolate->ThrowIllegalOperation();
350
351 local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
352
353 Factory* factory = isolate->factory();
354 Handle<String> key = factory->NewStringFromStaticChars("dateFormat");
355 Handle<String> value = factory->NewStringFromStaticChars("valid");
356 JSObject::AddProperty(local_object, key, value, NONE);
357
358 // Make object handle weak so we can delete the data format once GC kicks in.
359 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100360 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
361 DateFormat::DeleteDateFormat,
362 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400363 return *local_object;
364}
365
366
367RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
368 HandleScope scope(isolate);
369
370 DCHECK(args.length() == 2);
371
372 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
373 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
374
375 Handle<Object> value;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000376 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400377
378 icu::SimpleDateFormat* date_format =
379 DateFormat::UnpackDateFormat(isolate, date_format_holder);
380 if (!date_format) return isolate->ThrowIllegalOperation();
381
382 icu::UnicodeString result;
383 date_format->format(value->Number(), result);
384
Ben Murdoch61f157c2016-09-16 13:49:30 +0100385 RETURN_RESULT_OR_FAILURE(
386 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
387 reinterpret_cast<const uint16_t*>(result.getBuffer()),
388 result.length())));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400389}
390
391
392RUNTIME_FUNCTION(Runtime_InternalDateParse) {
393 HandleScope scope(isolate);
394
395 DCHECK(args.length() == 2);
396
397 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
398 CONVERT_ARG_HANDLE_CHECKED(String, date_string, 1);
399
400 v8::String::Utf8Value utf8_date(v8::Utils::ToLocal(date_string));
401 icu::UnicodeString u_date(icu::UnicodeString::fromUTF8(*utf8_date));
402 icu::SimpleDateFormat* date_format =
403 DateFormat::UnpackDateFormat(isolate, date_format_holder);
404 if (!date_format) return isolate->ThrowIllegalOperation();
405
406 UErrorCode status = U_ZERO_ERROR;
407 UDate date = date_format->parse(u_date, status);
408 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
409
Ben Murdoch61f157c2016-09-16 13:49:30 +0100410 RETURN_RESULT_OR_FAILURE(
411 isolate, JSDate::New(isolate->date_function(), isolate->date_function(),
412 static_cast<double>(date)));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400413}
414
415
416RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
417 HandleScope scope(isolate);
418
419 DCHECK(args.length() == 3);
420
421 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
422 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
423 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
424
425 Handle<ObjectTemplateInfo> number_format_template =
426 I18N::GetTemplate(isolate);
427
428 // Create an empty object wrapper.
429 Handle<JSObject> local_object;
430 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
431 isolate, local_object,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000432 ApiNatives::InstantiateObject(number_format_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400433
434 // Set number formatter as internal field of the resulting JS object.
435 icu::DecimalFormat* number_format =
436 NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
437
438 if (!number_format) return isolate->ThrowIllegalOperation();
439
440 local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
441
442 Factory* factory = isolate->factory();
443 Handle<String> key = factory->NewStringFromStaticChars("numberFormat");
444 Handle<String> value = factory->NewStringFromStaticChars("valid");
445 JSObject::AddProperty(local_object, key, value, NONE);
446
447 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100448 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
449 NumberFormat::DeleteNumberFormat,
450 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400451 return *local_object;
452}
453
454
455RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
456 HandleScope scope(isolate);
457
458 DCHECK(args.length() == 2);
459
460 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
461 CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
462
463 Handle<Object> value;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000464 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400465
466 icu::DecimalFormat* number_format =
467 NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
468 if (!number_format) return isolate->ThrowIllegalOperation();
469
470 icu::UnicodeString result;
471 number_format->format(value->Number(), result);
472
Ben Murdoch61f157c2016-09-16 13:49:30 +0100473 RETURN_RESULT_OR_FAILURE(
474 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
475 reinterpret_cast<const uint16_t*>(result.getBuffer()),
476 result.length())));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400477}
478
479
480RUNTIME_FUNCTION(Runtime_InternalNumberParse) {
481 HandleScope scope(isolate);
482
483 DCHECK(args.length() == 2);
484
485 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
486 CONVERT_ARG_HANDLE_CHECKED(String, number_string, 1);
487
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000488 isolate->CountUsage(v8::Isolate::UseCounterFeature::kIntlV8Parse);
489
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400490 v8::String::Utf8Value utf8_number(v8::Utils::ToLocal(number_string));
491 icu::UnicodeString u_number(icu::UnicodeString::fromUTF8(*utf8_number));
492 icu::DecimalFormat* number_format =
493 NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
494 if (!number_format) return isolate->ThrowIllegalOperation();
495
496 UErrorCode status = U_ZERO_ERROR;
497 icu::Formattable result;
498 // ICU 4.6 doesn't support parseCurrency call. We need to wait for ICU49
499 // to be part of Chrome.
500 // TODO(cira): Include currency parsing code using parseCurrency call.
501 // We need to check if the formatter parses all currencies or only the
502 // one it was constructed with (it will impact the API - how to return ISO
503 // code and the value).
504 number_format->parse(u_number, result, status);
505 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
506
507 switch (result.getType()) {
508 case icu::Formattable::kDouble:
509 return *isolate->factory()->NewNumber(result.getDouble());
510 case icu::Formattable::kLong:
511 return *isolate->factory()->NewNumberFromInt(result.getLong());
512 case icu::Formattable::kInt64:
513 return *isolate->factory()->NewNumber(
514 static_cast<double>(result.getInt64()));
515 default:
516 return isolate->heap()->undefined_value();
517 }
518}
519
520
521RUNTIME_FUNCTION(Runtime_CreateCollator) {
522 HandleScope scope(isolate);
523
524 DCHECK(args.length() == 3);
525
526 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
527 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
528 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
529
530 Handle<ObjectTemplateInfo> collator_template = I18N::GetTemplate(isolate);
531
532 // Create an empty object wrapper.
533 Handle<JSObject> local_object;
534 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000535 isolate, local_object, ApiNatives::InstantiateObject(collator_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400536
537 // Set collator as internal field of the resulting JS object.
538 icu::Collator* collator =
539 Collator::InitializeCollator(isolate, locale, options, resolved);
540
541 if (!collator) return isolate->ThrowIllegalOperation();
542
543 local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
544
545 Factory* factory = isolate->factory();
546 Handle<String> key = factory->NewStringFromStaticChars("collator");
547 Handle<String> value = factory->NewStringFromStaticChars("valid");
548 JSObject::AddProperty(local_object, key, value, NONE);
549
550 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100551 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
552 Collator::DeleteCollator,
553 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400554 return *local_object;
555}
556
557
558RUNTIME_FUNCTION(Runtime_InternalCompare) {
559 HandleScope scope(isolate);
560
561 DCHECK(args.length() == 3);
562
563 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
564 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
565 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
566
567 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
568 if (!collator) return isolate->ThrowIllegalOperation();
569
Ben Murdochc5610432016-08-08 18:44:38 +0100570 string1 = String::Flatten(string1);
571 string2 = String::Flatten(string2);
572 DisallowHeapAllocation no_gc;
573 int32_t length1 = string1->length();
574 int32_t length2 = string2->length();
575 String::FlatContent flat1 = string1->GetFlatContent();
576 String::FlatContent flat2 = string2->GetFlatContent();
577 base::SmartArrayPointer<uc16> sap1;
578 base::SmartArrayPointer<uc16> sap2;
579 const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1);
580 const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400581 UErrorCode status = U_ZERO_ERROR;
582 UCollationResult result =
Ben Murdochc5610432016-08-08 18:44:38 +0100583 collator->compare(string_val1, length1, string_val2, length2, status);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400584 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
585
586 return *isolate->factory()->NewNumberFromInt(result);
587}
588
589
590RUNTIME_FUNCTION(Runtime_StringNormalize) {
591 HandleScope scope(isolate);
Ben Murdochc5610432016-08-08 18:44:38 +0100592 static const struct {
593 const char* name;
594 UNormalization2Mode mode;
595 } normalizationForms[] = {
596 {"nfc", UNORM2_COMPOSE},
597 {"nfc", UNORM2_DECOMPOSE},
598 {"nfkc", UNORM2_COMPOSE},
599 {"nfkc", UNORM2_DECOMPOSE},
600 };
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400601
602 DCHECK(args.length() == 2);
603
Ben Murdochc5610432016-08-08 18:44:38 +0100604 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400605 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
606 RUNTIME_ASSERT(form_id >= 0 &&
607 static_cast<size_t>(form_id) < arraysize(normalizationForms));
608
Ben Murdochc5610432016-08-08 18:44:38 +0100609 int length = s->length();
610 s = String::Flatten(s);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400611 icu::UnicodeString result;
Ben Murdochc5610432016-08-08 18:44:38 +0100612 base::SmartArrayPointer<uc16> sap;
613 UErrorCode status = U_ZERO_ERROR;
614 {
615 DisallowHeapAllocation no_gc;
616 String::FlatContent flat = s->GetFlatContent();
617 const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
618 icu::UnicodeString input(false, src, length);
619 // Getting a singleton. Should not free it.
620 const icu::Normalizer2* normalizer =
621 icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
622 normalizationForms[form_id].mode, status);
623 DCHECK(U_SUCCESS(status));
624 RUNTIME_ASSERT(normalizer != nullptr);
625 int32_t normalized_prefix_length =
626 normalizer->spanQuickCheckYes(input, status);
627 // Quick return if the input is already normalized.
628 if (length == normalized_prefix_length) return *s;
629 icu::UnicodeString unnormalized =
630 input.tempSubString(normalized_prefix_length);
631 // Read-only alias of the normalized prefix.
632 result.setTo(false, input.getBuffer(), normalized_prefix_length);
633 // copy-on-write; normalize the suffix and append to |result|.
634 normalizer->normalizeSecondAndAppend(result, unnormalized, status);
635 }
636
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400637 if (U_FAILURE(status)) {
638 return isolate->heap()->undefined_value();
639 }
640
Ben Murdoch61f157c2016-09-16 13:49:30 +0100641 RETURN_RESULT_OR_FAILURE(
642 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
643 reinterpret_cast<const uint16_t*>(result.getBuffer()),
644 result.length())));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400645}
646
647
648RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
649 HandleScope scope(isolate);
650
651 DCHECK(args.length() == 3);
652
653 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
654 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
655 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
656
657 Handle<ObjectTemplateInfo> break_iterator_template =
658 I18N::GetTemplate2(isolate);
659
660 // Create an empty object wrapper.
661 Handle<JSObject> local_object;
662 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
663 isolate, local_object,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000664 ApiNatives::InstantiateObject(break_iterator_template));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400665
666 // Set break iterator as internal field of the resulting JS object.
667 icu::BreakIterator* break_iterator = BreakIterator::InitializeBreakIterator(
668 isolate, locale, options, resolved);
669
670 if (!break_iterator) return isolate->ThrowIllegalOperation();
671
672 local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator));
673 // Make sure that the pointer to adopted text is NULL.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000674 local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400675
676 Factory* factory = isolate->factory();
677 Handle<String> key = factory->NewStringFromStaticChars("breakIterator");
678 Handle<String> value = factory->NewStringFromStaticChars("valid");
679 JSObject::AddProperty(local_object, key, value, NONE);
680
681 // Make object handle weak so we can delete the break iterator once GC kicks
682 // in.
683 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
Ben Murdochc5610432016-08-08 18:44:38 +0100684 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
685 BreakIterator::DeleteBreakIterator,
686 WeakCallbackType::kInternalFields);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400687 return *local_object;
688}
689
690
691RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
692 HandleScope scope(isolate);
693
694 DCHECK(args.length() == 2);
695
696 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
697 CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
698
699 icu::BreakIterator* break_iterator =
700 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
701 if (!break_iterator) return isolate->ThrowIllegalOperation();
702
703 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
704 break_iterator_holder->GetInternalField(1));
705 delete u_text;
706
Ben Murdochc5610432016-08-08 18:44:38 +0100707 int length = text->length();
708 text = String::Flatten(text);
709 DisallowHeapAllocation no_gc;
710 String::FlatContent flat = text->GetFlatContent();
711 base::SmartArrayPointer<uc16> sap;
712 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
713 u_text = new icu::UnicodeString(text_value, length);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400714 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
715
716 break_iterator->setText(*u_text);
717
718 return isolate->heap()->undefined_value();
719}
720
721
722RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
723 HandleScope scope(isolate);
724
725 DCHECK(args.length() == 1);
726
727 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
728
729 icu::BreakIterator* break_iterator =
730 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
731 if (!break_iterator) return isolate->ThrowIllegalOperation();
732
733 return *isolate->factory()->NewNumberFromInt(break_iterator->first());
734}
735
736
737RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
738 HandleScope scope(isolate);
739
740 DCHECK(args.length() == 1);
741
742 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
743
744 icu::BreakIterator* break_iterator =
745 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
746 if (!break_iterator) return isolate->ThrowIllegalOperation();
747
748 return *isolate->factory()->NewNumberFromInt(break_iterator->next());
749}
750
751
752RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
753 HandleScope scope(isolate);
754
755 DCHECK(args.length() == 1);
756
757 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
758
759 icu::BreakIterator* break_iterator =
760 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
761 if (!break_iterator) return isolate->ThrowIllegalOperation();
762
763 return *isolate->factory()->NewNumberFromInt(break_iterator->current());
764}
765
766
767RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
768 HandleScope scope(isolate);
769
770 DCHECK(args.length() == 1);
771
772 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
773
774 icu::BreakIterator* break_iterator =
775 BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
776 if (!break_iterator) return isolate->ThrowIllegalOperation();
777
778 // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
779 icu::RuleBasedBreakIterator* rule_based_iterator =
780 static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
781 int32_t status = rule_based_iterator->getRuleStatus();
782 // Keep return values in sync with JavaScript BreakType enum.
783 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
784 return *isolate->factory()->NewStringFromStaticChars("none");
785 } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
786 return *isolate->factory()->number_string();
787 } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
788 return *isolate->factory()->NewStringFromStaticChars("letter");
789 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
790 return *isolate->factory()->NewStringFromStaticChars("kana");
791 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
792 return *isolate->factory()->NewStringFromStaticChars("ideo");
793 } else {
794 return *isolate->factory()->NewStringFromStaticChars("unknown");
795 }
796}
Ben Murdochc5610432016-08-08 18:44:38 +0100797
798namespace {
799void ConvertCaseWithTransliterator(icu::UnicodeString* input,
800 const char* transliterator_id) {
801 UErrorCode status = U_ZERO_ERROR;
802 base::SmartPointer<icu::Transliterator> translit(
803 icu::Transliterator::createInstance(
804 icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD,
805 status));
806 if (U_FAILURE(status)) return;
807 translit->transliterate(*input);
808}
809
810MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
811 bool is_to_upper, const char* lang) {
812 int32_t src_length = s->length();
813
814 // Greek uppercasing has to be done via transliteration.
815 // TODO(jshin): Drop this special-casing once ICU's regular case conversion
816 // API supports Greek uppercasing. See
817 // http://bugs.icu-project.org/trac/ticket/10582 .
818 // In the meantime, if there's no Greek character in |s|, call this
819 // function again with the root locale (lang="").
820 // ICU's C API for transliteration is nasty and we just use C++ API.
821 if (V8_UNLIKELY(is_to_upper && lang[0] == 'e' && lang[1] == 'l')) {
822 icu::UnicodeString converted;
823 base::SmartArrayPointer<uc16> sap;
824 {
825 DisallowHeapAllocation no_gc;
826 String::FlatContent flat = s->GetFlatContent();
827 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
828 // Starts with the source string (read-only alias with copy-on-write
829 // semantics) and will be modified to contain the converted result.
830 // Using read-only alias at first saves one copy operation if
831 // transliteration does not change the input, which is rather rare.
832 // Moreover, transliteration takes rather long so that saving one copy
833 // helps only a little bit.
834 converted.setTo(false, src, src_length);
835 ConvertCaseWithTransliterator(&converted, "el-Upper");
836 // If no change is made, just return |s|.
837 if (converted.getBuffer() == src) return *s;
838 }
Ben Murdoch61f157c2016-09-16 13:49:30 +0100839 RETURN_RESULT_OR_FAILURE(
840 isolate,
Ben Murdochc5610432016-08-08 18:44:38 +0100841 isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
842 reinterpret_cast<const uint16_t*>(converted.getBuffer()),
843 converted.length())));
Ben Murdochc5610432016-08-08 18:44:38 +0100844 }
845
846 auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
847
848 int32_t dest_length = src_length;
849 UErrorCode status;
850 Handle<SeqTwoByteString> result;
851 base::SmartArrayPointer<uc16> sap;
852
853 // This is not a real loop. It'll be executed only once (no overflow) or
854 // twice (overflow).
855 for (int i = 0; i < 2; ++i) {
856 result =
857 isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked();
858 DisallowHeapAllocation no_gc;
859 String::FlatContent flat = s->GetFlatContent();
860 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
861 status = U_ZERO_ERROR;
862 dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
863 dest_length, src, src_length, lang, &status);
864 if (status != U_BUFFER_OVERFLOW_ERROR) break;
865 }
866
867 // In most cases, the output will fill the destination buffer completely
868 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
869 // Only in rare cases, it'll be shorter than the destination buffer and
870 // |result| has to be truncated.
871 DCHECK(U_SUCCESS(status));
872 if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
873 DCHECK(dest_length == result->length());
874 return *result;
875 }
876 if (U_SUCCESS(status)) {
877 DCHECK(dest_length < result->length());
878 return *Handle<SeqTwoByteString>::cast(
879 SeqString::Truncate(result, dest_length));
880 }
881 return *s;
882}
883
884inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
885
886const uint8_t kToLower[256] = {
887 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
888 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
889 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
890 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
891 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
892 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
893 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
894 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
895 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
896 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
897 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
898 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
899 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
900 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
901 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
902 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
903 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
904 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
905 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
906 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
907 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
908 0xFC, 0xFD, 0xFE, 0xFF,
909};
910
911inline uint16_t ToLatin1Lower(uint16_t ch) {
912 return static_cast<uint16_t>(kToLower[ch]);
913}
914
915inline uint16_t ToASCIIUpper(uint16_t ch) {
916 return ch & ~((ch >= 'a' && ch <= 'z') << 5);
917}
918
919// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
920inline uint16_t ToLatin1Upper(uint16_t ch) {
921 DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
922 return ch &
923 ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xE7))
924 << 5);
925}
926
927template <typename Char>
928bool ToUpperFastASCII(const Vector<const Char>& src,
929 Handle<SeqOneByteString> result) {
930 // Do a faster loop for the case where all the characters are ASCII.
931 uint16_t ored = 0;
932 int32_t index = 0;
933 for (auto it = src.begin(); it != src.end(); ++it) {
934 uint16_t ch = static_cast<uint16_t>(*it);
935 ored |= ch;
936 result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
937 }
938 return !(ored & ~0x7F);
939}
940
941const uint16_t sharp_s = 0xDF;
942
943template <typename Char>
944bool ToUpperOneByte(const Vector<const Char>& src,
945 Handle<SeqOneByteString> result, int* sharp_s_count) {
946 // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
947
948 // There are two special cases.
949 // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
950 // 2. Lower case sharp-S converts to "SS" (two characters)
951 *sharp_s_count = 0;
952 int32_t index = 0;
953 for (auto it = src.begin(); it != src.end(); ++it) {
954 uint16_t ch = static_cast<uint16_t>(*it);
955 if (V8_UNLIKELY(ch == sharp_s)) {
956 ++(*sharp_s_count);
957 continue;
958 }
959 if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
960 // Since this upper-cased character does not fit in an 8-bit string, we
961 // need to take the 16-bit path.
962 return false;
963 }
964 result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
965 }
966
967 return true;
968}
969
970template <typename Char>
971void ToUpperWithSharpS(const Vector<const Char>& src,
972 Handle<SeqOneByteString> result) {
973 int32_t dest_index = 0;
974 for (auto it = src.begin(); it != src.end(); ++it) {
975 uint16_t ch = static_cast<uint16_t>(*it);
976 if (ch == sharp_s) {
977 result->SeqOneByteStringSet(dest_index++, 'S');
978 result->SeqOneByteStringSet(dest_index++, 'S');
979 } else {
980 result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
981 }
982 }
983}
984
985} // namespace
986
987RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
988 HandleScope scope(isolate);
989 DCHECK_EQ(args.length(), 1);
990 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
991
992 int length = s->length();
993 s = String::Flatten(s);
994 // First scan the string for uppercase and non-ASCII characters:
995 if (s->HasOnlyOneByteChars()) {
996 unsigned first_index_to_lower = length;
997 for (int index = 0; index < length; ++index) {
998 // Blink specializes this path for one-byte strings, so it
999 // does not need to do a generic get, but can do the equivalent
1000 // of SeqOneByteStringGet.
1001 uint16_t ch = s->Get(index);
1002 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
1003 first_index_to_lower = index;
1004 break;
1005 }
1006 }
1007
1008 // Nothing to do if the string is all ASCII with no uppercase.
1009 if (first_index_to_lower == length) return *s;
1010
1011 // We depend here on the invariant that the length of a Latin1
1012 // string is invariant under ToLowerCase, and the result always
1013 // fits in the Latin1 range in the *root locale*. It does not hold
1014 // for ToUpperCase even in the root locale.
1015 Handle<SeqOneByteString> result;
1016 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1017 isolate, result, isolate->factory()->NewRawOneByteString(length));
1018
1019 DisallowHeapAllocation no_gc;
1020 String::FlatContent flat = s->GetFlatContent();
1021 if (flat.IsOneByte()) {
1022 const uint8_t* src = flat.ToOneByteVector().start();
1023 CopyChars(result->GetChars(), src, first_index_to_lower);
1024 for (int index = first_index_to_lower; index < length; ++index) {
1025 uint16_t ch = static_cast<uint16_t>(src[index]);
1026 result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1027 }
1028 } else {
1029 const uint16_t* src = flat.ToUC16Vector().start();
1030 CopyChars(result->GetChars(), src, first_index_to_lower);
1031 for (int index = first_index_to_lower; index < length; ++index) {
1032 uint16_t ch = src[index];
1033 result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1034 }
1035 }
1036
1037 return *result;
1038 }
1039
1040 // Blink had an additional case here for ASCII 2-byte strings, but
1041 // that is subsumed by the above code (assuming there isn't a false
1042 // negative for HasOnlyOneByteChars).
1043
1044 // Do a slower implementation for cases that include non-ASCII characters.
1045 return LocaleConvertCase(s, isolate, false, "");
1046}
1047
1048RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
1049 HandleScope scope(isolate);
1050 DCHECK_EQ(args.length(), 1);
1051 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1052
1053 // This function could be optimized for no-op cases the way lowercase
1054 // counterpart is, but in empirical testing, few actual calls to upper()
1055 // are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
1056
1057 int32_t length = s->length();
1058 s = String::Flatten(s);
1059
1060 if (s->HasOnlyOneByteChars()) {
1061 Handle<SeqOneByteString> result;
1062 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1063 isolate, result, isolate->factory()->NewRawOneByteString(length));
1064
1065 int sharp_s_count;
1066 bool is_result_single_byte;
1067 {
1068 DisallowHeapAllocation no_gc;
1069 String::FlatContent flat = s->GetFlatContent();
1070 // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
1071 // could be removed because ToUpperOneByte is pretty fast now (it
1072 // does not call ICU API any more.).
1073 if (flat.IsOneByte()) {
1074 Vector<const uint8_t> src = flat.ToOneByteVector();
1075 if (ToUpperFastASCII(src, result)) return *result;
1076 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1077 } else {
1078 DCHECK(flat.IsTwoByte());
1079 Vector<const uint16_t> src = flat.ToUC16Vector();
1080 if (ToUpperFastASCII(src, result)) return *result;
1081 is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1082 }
1083 }
1084
1085 // Go to the full Unicode path if there are characters whose uppercase
1086 // is beyond the Latin-1 range (cannot be represented in OneByteString).
1087 if (V8_UNLIKELY(!is_result_single_byte)) {
1088 return LocaleConvertCase(s, isolate, true, "");
1089 }
1090
1091 if (sharp_s_count == 0) return *result;
1092
1093 // We have sharp_s_count sharp-s characters, but the result is still
1094 // in the Latin-1 range.
1095 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1096 isolate, result,
1097 isolate->factory()->NewRawOneByteString(length + sharp_s_count));
1098 DisallowHeapAllocation no_gc;
1099 String::FlatContent flat = s->GetFlatContent();
1100 if (flat.IsOneByte()) {
1101 ToUpperWithSharpS(flat.ToOneByteVector(), result);
1102 } else {
1103 ToUpperWithSharpS(flat.ToUC16Vector(), result);
1104 }
1105
1106 return *result;
1107 }
1108
1109 return LocaleConvertCase(s, isolate, true, "");
1110}
1111
1112RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
1113 HandleScope scope(isolate);
1114 DCHECK_EQ(args.length(), 3);
1115 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1116 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
1117 CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2);
1118
1119 // All the languages requiring special handling ("az", "el", "lt", "tr")
1120 // have a 2-letter language code.
1121 DCHECK(lang->length() == 2);
1122 uint8_t lang_str[3];
1123 memcpy(lang_str, lang->GetChars(), 2);
1124 lang_str[2] = 0;
1125 s = String::Flatten(s);
1126 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1127 // in the root locale needs to be adjusted for az, lt and tr because even case
1128 // mapping of ASCII range characters are different in those locales.
1129 // Greek (el) does not require any adjustment, though.
1130 return LocaleConvertCase(s, isolate, is_upper,
1131 reinterpret_cast<const char*>(lang_str));
1132}
1133
Ben Murdoch61f157c2016-09-16 13:49:30 +01001134RUNTIME_FUNCTION(Runtime_DateCacheVersion) {
1135 HandleScope scope(isolate);
1136 DCHECK_EQ(0, args.length());
1137 if (isolate->serializer_enabled()) return isolate->heap()->undefined_value();
1138 if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) {
1139 Handle<FixedArray> date_cache_version =
1140 isolate->factory()->NewFixedArray(1, TENURED);
1141 date_cache_version->set(0, Smi::FromInt(0));
1142 isolate->eternal_handles()->CreateSingleton(
1143 isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION);
1144 }
1145 Handle<FixedArray> date_cache_version =
1146 Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton(
1147 EternalHandles::DATE_CACHE_VERSION));
1148 return date_cache_version->get(0);
1149}
1150
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001151} // namespace internal
1152} // namespace v8
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001153
1154#endif // V8_I18N_SUPPORT