Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1 | // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | |
| 6 | #ifdef V8_I18N_SUPPORT |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 7 | #include "src/runtime/runtime-utils.h" |
| 8 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 9 | #include "src/api.h" |
| 10 | #include "src/api-natives.h" |
| 11 | #include "src/arguments.h" |
| 12 | #include "src/factory.h" |
| 13 | #include "src/i18n.h" |
| 14 | #include "src/isolate-inl.h" |
| 15 | #include "src/messages.h" |
| 16 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 17 | #include "unicode/brkiter.h" |
| 18 | #include "unicode/calendar.h" |
| 19 | #include "unicode/coll.h" |
| 20 | #include "unicode/curramt.h" |
| 21 | #include "unicode/datefmt.h" |
| 22 | #include "unicode/dcfmtsym.h" |
| 23 | #include "unicode/decimfmt.h" |
| 24 | #include "unicode/dtfmtsym.h" |
| 25 | #include "unicode/dtptngen.h" |
| 26 | #include "unicode/locid.h" |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 27 | #include "unicode/normalizer2.h" |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 28 | #include "unicode/numfmt.h" |
| 29 | #include "unicode/numsys.h" |
| 30 | #include "unicode/rbbi.h" |
| 31 | #include "unicode/smpdtfmt.h" |
| 32 | #include "unicode/timezone.h" |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 33 | #include "unicode/translit.h" |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 34 | #include "unicode/uchar.h" |
| 35 | #include "unicode/ucol.h" |
| 36 | #include "unicode/ucurr.h" |
| 37 | #include "unicode/uloc.h" |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 38 | #include "unicode/unistr.h" |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 39 | #include "unicode/unum.h" |
| 40 | #include "unicode/uversion.h" |
| 41 | |
| 42 | |
| 43 | namespace v8 { |
| 44 | namespace internal { |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 45 | namespace { |
| 46 | |
| 47 | const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, |
| 48 | base::SmartArrayPointer<uc16>* dest, |
| 49 | int32_t length) { |
| 50 | DCHECK(flat.IsFlat()); |
| 51 | if (flat.IsOneByte()) { |
| 52 | if (dest->is_empty()) { |
| 53 | dest->Reset(NewArray<uc16>(length)); |
| 54 | CopyChars(dest->get(), flat.ToOneByteVector().start(), length); |
| 55 | } |
| 56 | return reinterpret_cast<const UChar*>(dest->get()); |
| 57 | } else { |
| 58 | return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | } // namespace |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 63 | |
| 64 | RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) { |
| 65 | HandleScope scope(isolate); |
| 66 | Factory* factory = isolate->factory(); |
| 67 | |
| 68 | DCHECK(args.length() == 1); |
| 69 | CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0); |
| 70 | |
| 71 | v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str)); |
| 72 | |
| 73 | // Return value which denotes invalid language tag. |
| 74 | const char* const kInvalidTag = "invalid-tag"; |
| 75 | |
| 76 | UErrorCode error = U_ZERO_ERROR; |
| 77 | char icu_result[ULOC_FULLNAME_CAPACITY]; |
| 78 | int icu_length = 0; |
| 79 | |
| 80 | uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY, |
| 81 | &icu_length, &error); |
| 82 | if (U_FAILURE(error) || icu_length == 0) { |
| 83 | return *factory->NewStringFromAsciiChecked(kInvalidTag); |
| 84 | } |
| 85 | |
| 86 | char result[ULOC_FULLNAME_CAPACITY]; |
| 87 | |
| 88 | // Force strict BCP47 rules. |
| 89 | uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error); |
| 90 | |
| 91 | if (U_FAILURE(error)) { |
| 92 | return *factory->NewStringFromAsciiChecked(kInvalidTag); |
| 93 | } |
| 94 | |
| 95 | return *factory->NewStringFromAsciiChecked(result); |
| 96 | } |
| 97 | |
| 98 | |
| 99 | RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) { |
| 100 | HandleScope scope(isolate); |
| 101 | Factory* factory = isolate->factory(); |
| 102 | |
| 103 | DCHECK(args.length() == 1); |
| 104 | CONVERT_ARG_HANDLE_CHECKED(String, service, 0); |
| 105 | |
| 106 | const icu::Locale* available_locales = NULL; |
| 107 | int32_t count = 0; |
| 108 | |
| 109 | if (service->IsUtf8EqualTo(CStrVector("collator"))) { |
| 110 | available_locales = icu::Collator::getAvailableLocales(count); |
| 111 | } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) { |
| 112 | available_locales = icu::NumberFormat::getAvailableLocales(count); |
| 113 | } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) { |
| 114 | available_locales = icu::DateFormat::getAvailableLocales(count); |
| 115 | } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) { |
| 116 | available_locales = icu::BreakIterator::getAvailableLocales(count); |
| 117 | } |
| 118 | |
| 119 | UErrorCode error = U_ZERO_ERROR; |
| 120 | char result[ULOC_FULLNAME_CAPACITY]; |
| 121 | Handle<JSObject> locales = factory->NewJSObject(isolate->object_function()); |
| 122 | |
| 123 | for (int32_t i = 0; i < count; ++i) { |
| 124 | const char* icu_name = available_locales[i].getName(); |
| 125 | |
| 126 | error = U_ZERO_ERROR; |
| 127 | // No need to force strict BCP47 rules. |
| 128 | uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error); |
| 129 | if (U_FAILURE(error)) { |
| 130 | // This shouldn't happen, but lets not break the user. |
| 131 | continue; |
| 132 | } |
| 133 | |
| 134 | RETURN_FAILURE_ON_EXCEPTION( |
| 135 | isolate, JSObject::SetOwnPropertyIgnoreAttributes( |
| 136 | locales, factory->NewStringFromAsciiChecked(result), |
| 137 | factory->NewNumber(i), NONE)); |
| 138 | } |
| 139 | |
| 140 | return *locales; |
| 141 | } |
| 142 | |
| 143 | |
| 144 | RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) { |
| 145 | HandleScope scope(isolate); |
| 146 | Factory* factory = isolate->factory(); |
| 147 | |
| 148 | DCHECK(args.length() == 0); |
| 149 | |
| 150 | icu::Locale default_locale; |
| 151 | |
| 152 | // Set the locale |
| 153 | char result[ULOC_FULLNAME_CAPACITY]; |
| 154 | UErrorCode status = U_ZERO_ERROR; |
| 155 | uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY, |
| 156 | FALSE, &status); |
| 157 | if (U_SUCCESS(status)) { |
| 158 | return *factory->NewStringFromAsciiChecked(result); |
| 159 | } |
| 160 | |
| 161 | return *factory->NewStringFromStaticChars("und"); |
| 162 | } |
| 163 | |
| 164 | |
| 165 | RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) { |
| 166 | HandleScope scope(isolate); |
| 167 | Factory* factory = isolate->factory(); |
| 168 | |
| 169 | DCHECK(args.length() == 1); |
| 170 | |
| 171 | CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0); |
| 172 | |
| 173 | uint32_t length = static_cast<uint32_t>(input->length()->Number()); |
| 174 | // Set some limit to prevent fuzz tests from going OOM. |
| 175 | // Can be bumped when callers' requirements change. |
| 176 | RUNTIME_ASSERT(length < 100); |
| 177 | Handle<FixedArray> output = factory->NewFixedArray(length); |
| 178 | Handle<Name> maximized = factory->NewStringFromStaticChars("maximized"); |
| 179 | Handle<Name> base = factory->NewStringFromStaticChars("base"); |
| 180 | for (unsigned int i = 0; i < length; ++i) { |
| 181 | Handle<Object> locale_id; |
Ben Murdoch | da12d29 | 2016-06-02 14:46:10 +0100 | [diff] [blame] | 182 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 183 | isolate, locale_id, JSReceiver::GetElement(isolate, input, i)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 184 | if (!locale_id->IsString()) { |
| 185 | return isolate->Throw(*factory->illegal_argument_string()); |
| 186 | } |
| 187 | |
| 188 | v8::String::Utf8Value utf8_locale_id( |
| 189 | v8::Utils::ToLocal(Handle<String>::cast(locale_id))); |
| 190 | |
| 191 | UErrorCode error = U_ZERO_ERROR; |
| 192 | |
| 193 | // Convert from BCP47 to ICU format. |
| 194 | // de-DE-u-co-phonebk -> de_DE@collation=phonebook |
| 195 | char icu_locale[ULOC_FULLNAME_CAPACITY]; |
| 196 | int icu_locale_length = 0; |
| 197 | uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY, |
| 198 | &icu_locale_length, &error); |
| 199 | if (U_FAILURE(error) || icu_locale_length == 0) { |
| 200 | return isolate->Throw(*factory->illegal_argument_string()); |
| 201 | } |
| 202 | |
| 203 | // Maximize the locale. |
| 204 | // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook |
| 205 | char icu_max_locale[ULOC_FULLNAME_CAPACITY]; |
| 206 | uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY, |
| 207 | &error); |
| 208 | |
| 209 | // Remove extensions from maximized locale. |
| 210 | // de_Latn_DE@collation=phonebook -> de_Latn_DE |
| 211 | char icu_base_max_locale[ULOC_FULLNAME_CAPACITY]; |
| 212 | uloc_getBaseName(icu_max_locale, icu_base_max_locale, |
| 213 | ULOC_FULLNAME_CAPACITY, &error); |
| 214 | |
| 215 | // Get original name without extensions. |
| 216 | // de_DE@collation=phonebook -> de_DE |
| 217 | char icu_base_locale[ULOC_FULLNAME_CAPACITY]; |
| 218 | uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY, |
| 219 | &error); |
| 220 | |
| 221 | // Convert from ICU locale format to BCP47 format. |
| 222 | // de_Latn_DE -> de-Latn-DE |
| 223 | char base_max_locale[ULOC_FULLNAME_CAPACITY]; |
| 224 | uloc_toLanguageTag(icu_base_max_locale, base_max_locale, |
| 225 | ULOC_FULLNAME_CAPACITY, FALSE, &error); |
| 226 | |
| 227 | // de_DE -> de-DE |
| 228 | char base_locale[ULOC_FULLNAME_CAPACITY]; |
| 229 | uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY, |
| 230 | FALSE, &error); |
| 231 | |
| 232 | if (U_FAILURE(error)) { |
| 233 | return isolate->Throw(*factory->illegal_argument_string()); |
| 234 | } |
| 235 | |
| 236 | Handle<JSObject> result = factory->NewJSObject(isolate->object_function()); |
| 237 | Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale); |
| 238 | JSObject::AddProperty(result, maximized, value, NONE); |
| 239 | value = factory->NewStringFromAsciiChecked(base_locale); |
| 240 | JSObject::AddProperty(result, base, value, NONE); |
| 241 | output->set(i, *result); |
| 242 | } |
| 243 | |
| 244 | Handle<JSArray> result = factory->NewJSArrayWithElements(output); |
| 245 | result->set_length(Smi::FromInt(length)); |
| 246 | return *result; |
| 247 | } |
| 248 | |
| 249 | |
| 250 | RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) { |
| 251 | HandleScope scope(isolate); |
| 252 | |
| 253 | DCHECK(args.length() == 1); |
| 254 | |
| 255 | CONVERT_ARG_HANDLE_CHECKED(Object, input, 0); |
| 256 | |
| 257 | if (!input->IsJSObject()) return isolate->heap()->false_value(); |
| 258 | Handle<JSObject> obj = Handle<JSObject>::cast(input); |
| 259 | |
| 260 | Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol(); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 261 | Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 262 | return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 263 | } |
| 264 | |
| 265 | |
| 266 | RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) { |
| 267 | HandleScope scope(isolate); |
| 268 | |
| 269 | DCHECK(args.length() == 2); |
| 270 | |
| 271 | CONVERT_ARG_HANDLE_CHECKED(Object, input, 0); |
| 272 | CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1); |
| 273 | |
| 274 | if (!input->IsJSObject()) return isolate->heap()->false_value(); |
| 275 | Handle<JSObject> obj = Handle<JSObject>::cast(input); |
| 276 | |
| 277 | Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol(); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 278 | Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 279 | return isolate->heap()->ToBoolean(tag->IsString() && |
| 280 | String::cast(*tag)->Equals(*expected_type)); |
| 281 | } |
| 282 | |
| 283 | |
| 284 | RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) { |
| 285 | HandleScope scope(isolate); |
| 286 | |
| 287 | DCHECK(args.length() == 3); |
| 288 | |
| 289 | CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0); |
| 290 | CONVERT_ARG_HANDLE_CHECKED(String, type, 1); |
| 291 | CONVERT_ARG_HANDLE_CHECKED(JSObject, impl, 2); |
| 292 | |
| 293 | Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol(); |
| 294 | JSObject::SetProperty(input, marker, type, STRICT).Assert(); |
| 295 | |
| 296 | marker = isolate->factory()->intl_impl_object_symbol(); |
| 297 | JSObject::SetProperty(input, marker, impl, STRICT).Assert(); |
| 298 | |
| 299 | return isolate->heap()->undefined_value(); |
| 300 | } |
| 301 | |
| 302 | |
| 303 | RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject) { |
| 304 | HandleScope scope(isolate); |
| 305 | |
| 306 | DCHECK(args.length() == 1); |
| 307 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 308 | CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 309 | |
| 310 | if (!input->IsJSObject()) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 311 | THROW_NEW_ERROR_RETURN_FAILURE( |
| 312 | isolate, NewTypeError(MessageTemplate::kNotIntlObject, input)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 313 | } |
| 314 | |
| 315 | Handle<JSObject> obj = Handle<JSObject>::cast(input); |
| 316 | |
| 317 | Handle<Symbol> marker = isolate->factory()->intl_impl_object_symbol(); |
| 318 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 319 | Handle<Object> impl = JSReceiver::GetDataProperty(obj, marker); |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 320 | if (impl->IsTheHole(isolate)) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 321 | THROW_NEW_ERROR_RETURN_FAILURE( |
| 322 | isolate, NewTypeError(MessageTemplate::kNotIntlObject, obj)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 323 | } |
| 324 | return *impl; |
| 325 | } |
| 326 | |
| 327 | |
| 328 | RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) { |
| 329 | HandleScope scope(isolate); |
| 330 | |
| 331 | DCHECK(args.length() == 3); |
| 332 | |
| 333 | CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); |
| 334 | CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); |
| 335 | CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); |
| 336 | |
| 337 | Handle<ObjectTemplateInfo> date_format_template = I18N::GetTemplate(isolate); |
| 338 | |
| 339 | // Create an empty object wrapper. |
| 340 | Handle<JSObject> local_object; |
| 341 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 342 | isolate, local_object, |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 343 | ApiNatives::InstantiateObject(date_format_template)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 344 | |
| 345 | // Set date time formatter as internal field of the resulting JS object. |
| 346 | icu::SimpleDateFormat* date_format = |
| 347 | DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved); |
| 348 | |
| 349 | if (!date_format) return isolate->ThrowIllegalOperation(); |
| 350 | |
| 351 | local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format)); |
| 352 | |
| 353 | Factory* factory = isolate->factory(); |
| 354 | Handle<String> key = factory->NewStringFromStaticChars("dateFormat"); |
| 355 | Handle<String> value = factory->NewStringFromStaticChars("valid"); |
| 356 | JSObject::AddProperty(local_object, key, value, NONE); |
| 357 | |
| 358 | // Make object handle weak so we can delete the data format once GC kicks in. |
| 359 | Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 360 | GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), |
| 361 | DateFormat::DeleteDateFormat, |
| 362 | WeakCallbackType::kInternalFields); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 363 | return *local_object; |
| 364 | } |
| 365 | |
| 366 | |
| 367 | RUNTIME_FUNCTION(Runtime_InternalDateFormat) { |
| 368 | HandleScope scope(isolate); |
| 369 | |
| 370 | DCHECK(args.length() == 2); |
| 371 | |
| 372 | CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0); |
| 373 | CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1); |
| 374 | |
| 375 | Handle<Object> value; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 376 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 377 | |
| 378 | icu::SimpleDateFormat* date_format = |
| 379 | DateFormat::UnpackDateFormat(isolate, date_format_holder); |
| 380 | if (!date_format) return isolate->ThrowIllegalOperation(); |
| 381 | |
| 382 | icu::UnicodeString result; |
| 383 | date_format->format(value->Number(), result); |
| 384 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 385 | RETURN_RESULT_OR_FAILURE( |
| 386 | isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
| 387 | reinterpret_cast<const uint16_t*>(result.getBuffer()), |
| 388 | result.length()))); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 389 | } |
| 390 | |
| 391 | |
| 392 | RUNTIME_FUNCTION(Runtime_InternalDateParse) { |
| 393 | HandleScope scope(isolate); |
| 394 | |
| 395 | DCHECK(args.length() == 2); |
| 396 | |
| 397 | CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0); |
| 398 | CONVERT_ARG_HANDLE_CHECKED(String, date_string, 1); |
| 399 | |
| 400 | v8::String::Utf8Value utf8_date(v8::Utils::ToLocal(date_string)); |
| 401 | icu::UnicodeString u_date(icu::UnicodeString::fromUTF8(*utf8_date)); |
| 402 | icu::SimpleDateFormat* date_format = |
| 403 | DateFormat::UnpackDateFormat(isolate, date_format_holder); |
| 404 | if (!date_format) return isolate->ThrowIllegalOperation(); |
| 405 | |
| 406 | UErrorCode status = U_ZERO_ERROR; |
| 407 | UDate date = date_format->parse(u_date, status); |
| 408 | if (U_FAILURE(status)) return isolate->heap()->undefined_value(); |
| 409 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 410 | RETURN_RESULT_OR_FAILURE( |
| 411 | isolate, JSDate::New(isolate->date_function(), isolate->date_function(), |
| 412 | static_cast<double>(date))); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 413 | } |
| 414 | |
| 415 | |
| 416 | RUNTIME_FUNCTION(Runtime_CreateNumberFormat) { |
| 417 | HandleScope scope(isolate); |
| 418 | |
| 419 | DCHECK(args.length() == 3); |
| 420 | |
| 421 | CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); |
| 422 | CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); |
| 423 | CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); |
| 424 | |
| 425 | Handle<ObjectTemplateInfo> number_format_template = |
| 426 | I18N::GetTemplate(isolate); |
| 427 | |
| 428 | // Create an empty object wrapper. |
| 429 | Handle<JSObject> local_object; |
| 430 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 431 | isolate, local_object, |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 432 | ApiNatives::InstantiateObject(number_format_template)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 433 | |
| 434 | // Set number formatter as internal field of the resulting JS object. |
| 435 | icu::DecimalFormat* number_format = |
| 436 | NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved); |
| 437 | |
| 438 | if (!number_format) return isolate->ThrowIllegalOperation(); |
| 439 | |
| 440 | local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format)); |
| 441 | |
| 442 | Factory* factory = isolate->factory(); |
| 443 | Handle<String> key = factory->NewStringFromStaticChars("numberFormat"); |
| 444 | Handle<String> value = factory->NewStringFromStaticChars("valid"); |
| 445 | JSObject::AddProperty(local_object, key, value, NONE); |
| 446 | |
| 447 | Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 448 | GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), |
| 449 | NumberFormat::DeleteNumberFormat, |
| 450 | WeakCallbackType::kInternalFields); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 451 | return *local_object; |
| 452 | } |
| 453 | |
| 454 | |
| 455 | RUNTIME_FUNCTION(Runtime_InternalNumberFormat) { |
| 456 | HandleScope scope(isolate); |
| 457 | |
| 458 | DCHECK(args.length() == 2); |
| 459 | |
| 460 | CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0); |
| 461 | CONVERT_ARG_HANDLE_CHECKED(Object, number, 1); |
| 462 | |
| 463 | Handle<Object> value; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 464 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 465 | |
| 466 | icu::DecimalFormat* number_format = |
| 467 | NumberFormat::UnpackNumberFormat(isolate, number_format_holder); |
| 468 | if (!number_format) return isolate->ThrowIllegalOperation(); |
| 469 | |
| 470 | icu::UnicodeString result; |
| 471 | number_format->format(value->Number(), result); |
| 472 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 473 | RETURN_RESULT_OR_FAILURE( |
| 474 | isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
| 475 | reinterpret_cast<const uint16_t*>(result.getBuffer()), |
| 476 | result.length()))); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 477 | } |
| 478 | |
| 479 | |
| 480 | RUNTIME_FUNCTION(Runtime_InternalNumberParse) { |
| 481 | HandleScope scope(isolate); |
| 482 | |
| 483 | DCHECK(args.length() == 2); |
| 484 | |
| 485 | CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0); |
| 486 | CONVERT_ARG_HANDLE_CHECKED(String, number_string, 1); |
| 487 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 488 | isolate->CountUsage(v8::Isolate::UseCounterFeature::kIntlV8Parse); |
| 489 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 490 | v8::String::Utf8Value utf8_number(v8::Utils::ToLocal(number_string)); |
| 491 | icu::UnicodeString u_number(icu::UnicodeString::fromUTF8(*utf8_number)); |
| 492 | icu::DecimalFormat* number_format = |
| 493 | NumberFormat::UnpackNumberFormat(isolate, number_format_holder); |
| 494 | if (!number_format) return isolate->ThrowIllegalOperation(); |
| 495 | |
| 496 | UErrorCode status = U_ZERO_ERROR; |
| 497 | icu::Formattable result; |
| 498 | // ICU 4.6 doesn't support parseCurrency call. We need to wait for ICU49 |
| 499 | // to be part of Chrome. |
| 500 | // TODO(cira): Include currency parsing code using parseCurrency call. |
| 501 | // We need to check if the formatter parses all currencies or only the |
| 502 | // one it was constructed with (it will impact the API - how to return ISO |
| 503 | // code and the value). |
| 504 | number_format->parse(u_number, result, status); |
| 505 | if (U_FAILURE(status)) return isolate->heap()->undefined_value(); |
| 506 | |
| 507 | switch (result.getType()) { |
| 508 | case icu::Formattable::kDouble: |
| 509 | return *isolate->factory()->NewNumber(result.getDouble()); |
| 510 | case icu::Formattable::kLong: |
| 511 | return *isolate->factory()->NewNumberFromInt(result.getLong()); |
| 512 | case icu::Formattable::kInt64: |
| 513 | return *isolate->factory()->NewNumber( |
| 514 | static_cast<double>(result.getInt64())); |
| 515 | default: |
| 516 | return isolate->heap()->undefined_value(); |
| 517 | } |
| 518 | } |
| 519 | |
| 520 | |
| 521 | RUNTIME_FUNCTION(Runtime_CreateCollator) { |
| 522 | HandleScope scope(isolate); |
| 523 | |
| 524 | DCHECK(args.length() == 3); |
| 525 | |
| 526 | CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); |
| 527 | CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); |
| 528 | CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); |
| 529 | |
| 530 | Handle<ObjectTemplateInfo> collator_template = I18N::GetTemplate(isolate); |
| 531 | |
| 532 | // Create an empty object wrapper. |
| 533 | Handle<JSObject> local_object; |
| 534 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 535 | isolate, local_object, ApiNatives::InstantiateObject(collator_template)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 536 | |
| 537 | // Set collator as internal field of the resulting JS object. |
| 538 | icu::Collator* collator = |
| 539 | Collator::InitializeCollator(isolate, locale, options, resolved); |
| 540 | |
| 541 | if (!collator) return isolate->ThrowIllegalOperation(); |
| 542 | |
| 543 | local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator)); |
| 544 | |
| 545 | Factory* factory = isolate->factory(); |
| 546 | Handle<String> key = factory->NewStringFromStaticChars("collator"); |
| 547 | Handle<String> value = factory->NewStringFromStaticChars("valid"); |
| 548 | JSObject::AddProperty(local_object, key, value, NONE); |
| 549 | |
| 550 | Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 551 | GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), |
| 552 | Collator::DeleteCollator, |
| 553 | WeakCallbackType::kInternalFields); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 554 | return *local_object; |
| 555 | } |
| 556 | |
| 557 | |
| 558 | RUNTIME_FUNCTION(Runtime_InternalCompare) { |
| 559 | HandleScope scope(isolate); |
| 560 | |
| 561 | DCHECK(args.length() == 3); |
| 562 | |
| 563 | CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0); |
| 564 | CONVERT_ARG_HANDLE_CHECKED(String, string1, 1); |
| 565 | CONVERT_ARG_HANDLE_CHECKED(String, string2, 2); |
| 566 | |
| 567 | icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder); |
| 568 | if (!collator) return isolate->ThrowIllegalOperation(); |
| 569 | |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 570 | string1 = String::Flatten(string1); |
| 571 | string2 = String::Flatten(string2); |
| 572 | DisallowHeapAllocation no_gc; |
| 573 | int32_t length1 = string1->length(); |
| 574 | int32_t length2 = string2->length(); |
| 575 | String::FlatContent flat1 = string1->GetFlatContent(); |
| 576 | String::FlatContent flat2 = string2->GetFlatContent(); |
| 577 | base::SmartArrayPointer<uc16> sap1; |
| 578 | base::SmartArrayPointer<uc16> sap2; |
| 579 | const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1); |
| 580 | const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 581 | UErrorCode status = U_ZERO_ERROR; |
| 582 | UCollationResult result = |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 583 | collator->compare(string_val1, length1, string_val2, length2, status); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 584 | if (U_FAILURE(status)) return isolate->ThrowIllegalOperation(); |
| 585 | |
| 586 | return *isolate->factory()->NewNumberFromInt(result); |
| 587 | } |
| 588 | |
| 589 | |
| 590 | RUNTIME_FUNCTION(Runtime_StringNormalize) { |
| 591 | HandleScope scope(isolate); |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 592 | static const struct { |
| 593 | const char* name; |
| 594 | UNormalization2Mode mode; |
| 595 | } normalizationForms[] = { |
| 596 | {"nfc", UNORM2_COMPOSE}, |
| 597 | {"nfc", UNORM2_DECOMPOSE}, |
| 598 | {"nfkc", UNORM2_COMPOSE}, |
| 599 | {"nfkc", UNORM2_DECOMPOSE}, |
| 600 | }; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 601 | |
| 602 | DCHECK(args.length() == 2); |
| 603 | |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 604 | CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 605 | CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]); |
| 606 | RUNTIME_ASSERT(form_id >= 0 && |
| 607 | static_cast<size_t>(form_id) < arraysize(normalizationForms)); |
| 608 | |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 609 | int length = s->length(); |
| 610 | s = String::Flatten(s); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 611 | icu::UnicodeString result; |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 612 | base::SmartArrayPointer<uc16> sap; |
| 613 | UErrorCode status = U_ZERO_ERROR; |
| 614 | { |
| 615 | DisallowHeapAllocation no_gc; |
| 616 | String::FlatContent flat = s->GetFlatContent(); |
| 617 | const UChar* src = GetUCharBufferFromFlat(flat, &sap, length); |
| 618 | icu::UnicodeString input(false, src, length); |
| 619 | // Getting a singleton. Should not free it. |
| 620 | const icu::Normalizer2* normalizer = |
| 621 | icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name, |
| 622 | normalizationForms[form_id].mode, status); |
| 623 | DCHECK(U_SUCCESS(status)); |
| 624 | RUNTIME_ASSERT(normalizer != nullptr); |
| 625 | int32_t normalized_prefix_length = |
| 626 | normalizer->spanQuickCheckYes(input, status); |
| 627 | // Quick return if the input is already normalized. |
| 628 | if (length == normalized_prefix_length) return *s; |
| 629 | icu::UnicodeString unnormalized = |
| 630 | input.tempSubString(normalized_prefix_length); |
| 631 | // Read-only alias of the normalized prefix. |
| 632 | result.setTo(false, input.getBuffer(), normalized_prefix_length); |
| 633 | // copy-on-write; normalize the suffix and append to |result|. |
| 634 | normalizer->normalizeSecondAndAppend(result, unnormalized, status); |
| 635 | } |
| 636 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 637 | if (U_FAILURE(status)) { |
| 638 | return isolate->heap()->undefined_value(); |
| 639 | } |
| 640 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 641 | RETURN_RESULT_OR_FAILURE( |
| 642 | isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
| 643 | reinterpret_cast<const uint16_t*>(result.getBuffer()), |
| 644 | result.length()))); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 645 | } |
| 646 | |
| 647 | |
| 648 | RUNTIME_FUNCTION(Runtime_CreateBreakIterator) { |
| 649 | HandleScope scope(isolate); |
| 650 | |
| 651 | DCHECK(args.length() == 3); |
| 652 | |
| 653 | CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); |
| 654 | CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); |
| 655 | CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); |
| 656 | |
| 657 | Handle<ObjectTemplateInfo> break_iterator_template = |
| 658 | I18N::GetTemplate2(isolate); |
| 659 | |
| 660 | // Create an empty object wrapper. |
| 661 | Handle<JSObject> local_object; |
| 662 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 663 | isolate, local_object, |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 664 | ApiNatives::InstantiateObject(break_iterator_template)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 665 | |
| 666 | // Set break iterator as internal field of the resulting JS object. |
| 667 | icu::BreakIterator* break_iterator = BreakIterator::InitializeBreakIterator( |
| 668 | isolate, locale, options, resolved); |
| 669 | |
| 670 | if (!break_iterator) return isolate->ThrowIllegalOperation(); |
| 671 | |
| 672 | local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator)); |
| 673 | // Make sure that the pointer to adopted text is NULL. |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 674 | local_object->SetInternalField(1, static_cast<Smi*>(nullptr)); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 675 | |
| 676 | Factory* factory = isolate->factory(); |
| 677 | Handle<String> key = factory->NewStringFromStaticChars("breakIterator"); |
| 678 | Handle<String> value = factory->NewStringFromStaticChars("valid"); |
| 679 | JSObject::AddProperty(local_object, key, value, NONE); |
| 680 | |
| 681 | // Make object handle weak so we can delete the break iterator once GC kicks |
| 682 | // in. |
| 683 | Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 684 | GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), |
| 685 | BreakIterator::DeleteBreakIterator, |
| 686 | WeakCallbackType::kInternalFields); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 687 | return *local_object; |
| 688 | } |
| 689 | |
| 690 | |
| 691 | RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) { |
| 692 | HandleScope scope(isolate); |
| 693 | |
| 694 | DCHECK(args.length() == 2); |
| 695 | |
| 696 | CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); |
| 697 | CONVERT_ARG_HANDLE_CHECKED(String, text, 1); |
| 698 | |
| 699 | icu::BreakIterator* break_iterator = |
| 700 | BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); |
| 701 | if (!break_iterator) return isolate->ThrowIllegalOperation(); |
| 702 | |
| 703 | icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>( |
| 704 | break_iterator_holder->GetInternalField(1)); |
| 705 | delete u_text; |
| 706 | |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 707 | int length = text->length(); |
| 708 | text = String::Flatten(text); |
| 709 | DisallowHeapAllocation no_gc; |
| 710 | String::FlatContent flat = text->GetFlatContent(); |
| 711 | base::SmartArrayPointer<uc16> sap; |
| 712 | const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length); |
| 713 | u_text = new icu::UnicodeString(text_value, length); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 714 | break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text)); |
| 715 | |
| 716 | break_iterator->setText(*u_text); |
| 717 | |
| 718 | return isolate->heap()->undefined_value(); |
| 719 | } |
| 720 | |
| 721 | |
| 722 | RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) { |
| 723 | HandleScope scope(isolate); |
| 724 | |
| 725 | DCHECK(args.length() == 1); |
| 726 | |
| 727 | CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); |
| 728 | |
| 729 | icu::BreakIterator* break_iterator = |
| 730 | BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); |
| 731 | if (!break_iterator) return isolate->ThrowIllegalOperation(); |
| 732 | |
| 733 | return *isolate->factory()->NewNumberFromInt(break_iterator->first()); |
| 734 | } |
| 735 | |
| 736 | |
| 737 | RUNTIME_FUNCTION(Runtime_BreakIteratorNext) { |
| 738 | HandleScope scope(isolate); |
| 739 | |
| 740 | DCHECK(args.length() == 1); |
| 741 | |
| 742 | CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); |
| 743 | |
| 744 | icu::BreakIterator* break_iterator = |
| 745 | BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); |
| 746 | if (!break_iterator) return isolate->ThrowIllegalOperation(); |
| 747 | |
| 748 | return *isolate->factory()->NewNumberFromInt(break_iterator->next()); |
| 749 | } |
| 750 | |
| 751 | |
| 752 | RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) { |
| 753 | HandleScope scope(isolate); |
| 754 | |
| 755 | DCHECK(args.length() == 1); |
| 756 | |
| 757 | CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); |
| 758 | |
| 759 | icu::BreakIterator* break_iterator = |
| 760 | BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); |
| 761 | if (!break_iterator) return isolate->ThrowIllegalOperation(); |
| 762 | |
| 763 | return *isolate->factory()->NewNumberFromInt(break_iterator->current()); |
| 764 | } |
| 765 | |
| 766 | |
| 767 | RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) { |
| 768 | HandleScope scope(isolate); |
| 769 | |
| 770 | DCHECK(args.length() == 1); |
| 771 | |
| 772 | CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); |
| 773 | |
| 774 | icu::BreakIterator* break_iterator = |
| 775 | BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); |
| 776 | if (!break_iterator) return isolate->ThrowIllegalOperation(); |
| 777 | |
| 778 | // TODO(cira): Remove cast once ICU fixes base BreakIterator class. |
| 779 | icu::RuleBasedBreakIterator* rule_based_iterator = |
| 780 | static_cast<icu::RuleBasedBreakIterator*>(break_iterator); |
| 781 | int32_t status = rule_based_iterator->getRuleStatus(); |
| 782 | // Keep return values in sync with JavaScript BreakType enum. |
| 783 | if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) { |
| 784 | return *isolate->factory()->NewStringFromStaticChars("none"); |
| 785 | } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) { |
| 786 | return *isolate->factory()->number_string(); |
| 787 | } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) { |
| 788 | return *isolate->factory()->NewStringFromStaticChars("letter"); |
| 789 | } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { |
| 790 | return *isolate->factory()->NewStringFromStaticChars("kana"); |
| 791 | } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { |
| 792 | return *isolate->factory()->NewStringFromStaticChars("ideo"); |
| 793 | } else { |
| 794 | return *isolate->factory()->NewStringFromStaticChars("unknown"); |
| 795 | } |
| 796 | } |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 797 | |
| 798 | namespace { |
| 799 | void ConvertCaseWithTransliterator(icu::UnicodeString* input, |
| 800 | const char* transliterator_id) { |
| 801 | UErrorCode status = U_ZERO_ERROR; |
| 802 | base::SmartPointer<icu::Transliterator> translit( |
| 803 | icu::Transliterator::createInstance( |
| 804 | icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD, |
| 805 | status)); |
| 806 | if (U_FAILURE(status)) return; |
| 807 | translit->transliterate(*input); |
| 808 | } |
| 809 | |
| 810 | MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, |
| 811 | bool is_to_upper, const char* lang) { |
| 812 | int32_t src_length = s->length(); |
| 813 | |
| 814 | // Greek uppercasing has to be done via transliteration. |
| 815 | // TODO(jshin): Drop this special-casing once ICU's regular case conversion |
| 816 | // API supports Greek uppercasing. See |
| 817 | // http://bugs.icu-project.org/trac/ticket/10582 . |
| 818 | // In the meantime, if there's no Greek character in |s|, call this |
| 819 | // function again with the root locale (lang=""). |
| 820 | // ICU's C API for transliteration is nasty and we just use C++ API. |
| 821 | if (V8_UNLIKELY(is_to_upper && lang[0] == 'e' && lang[1] == 'l')) { |
| 822 | icu::UnicodeString converted; |
| 823 | base::SmartArrayPointer<uc16> sap; |
| 824 | { |
| 825 | DisallowHeapAllocation no_gc; |
| 826 | String::FlatContent flat = s->GetFlatContent(); |
| 827 | const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length); |
| 828 | // Starts with the source string (read-only alias with copy-on-write |
| 829 | // semantics) and will be modified to contain the converted result. |
| 830 | // Using read-only alias at first saves one copy operation if |
| 831 | // transliteration does not change the input, which is rather rare. |
| 832 | // Moreover, transliteration takes rather long so that saving one copy |
| 833 | // helps only a little bit. |
| 834 | converted.setTo(false, src, src_length); |
| 835 | ConvertCaseWithTransliterator(&converted, "el-Upper"); |
| 836 | // If no change is made, just return |s|. |
| 837 | if (converted.getBuffer() == src) return *s; |
| 838 | } |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 839 | RETURN_RESULT_OR_FAILURE( |
| 840 | isolate, |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 841 | isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( |
| 842 | reinterpret_cast<const uint16_t*>(converted.getBuffer()), |
| 843 | converted.length()))); |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 844 | } |
| 845 | |
| 846 | auto case_converter = is_to_upper ? u_strToUpper : u_strToLower; |
| 847 | |
| 848 | int32_t dest_length = src_length; |
| 849 | UErrorCode status; |
| 850 | Handle<SeqTwoByteString> result; |
| 851 | base::SmartArrayPointer<uc16> sap; |
| 852 | |
| 853 | // This is not a real loop. It'll be executed only once (no overflow) or |
| 854 | // twice (overflow). |
| 855 | for (int i = 0; i < 2; ++i) { |
| 856 | result = |
| 857 | isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked(); |
| 858 | DisallowHeapAllocation no_gc; |
| 859 | String::FlatContent flat = s->GetFlatContent(); |
| 860 | const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length); |
| 861 | status = U_ZERO_ERROR; |
| 862 | dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()), |
| 863 | dest_length, src, src_length, lang, &status); |
| 864 | if (status != U_BUFFER_OVERFLOW_ERROR) break; |
| 865 | } |
| 866 | |
| 867 | // In most cases, the output will fill the destination buffer completely |
| 868 | // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING). |
| 869 | // Only in rare cases, it'll be shorter than the destination buffer and |
| 870 | // |result| has to be truncated. |
| 871 | DCHECK(U_SUCCESS(status)); |
| 872 | if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) { |
| 873 | DCHECK(dest_length == result->length()); |
| 874 | return *result; |
| 875 | } |
| 876 | if (U_SUCCESS(status)) { |
| 877 | DCHECK(dest_length < result->length()); |
| 878 | return *Handle<SeqTwoByteString>::cast( |
| 879 | SeqString::Truncate(result, dest_length)); |
| 880 | } |
| 881 | return *s; |
| 882 | } |
| 883 | |
| 884 | inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } |
| 885 | |
| 886 | const uint8_t kToLower[256] = { |
| 887 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, |
| 888 | 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
| 889 | 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, |
| 890 | 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, |
| 891 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, |
| 892 | 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
| 893 | 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, |
| 894 | 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, |
| 895 | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, |
| 896 | 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
| 897 | 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83, |
| 898 | 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, |
| 899 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, |
| 900 | 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, |
| 901 | 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, |
| 902 | 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, |
| 903 | 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, |
| 904 | 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7, |
| 905 | 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, |
| 906 | 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, |
| 907 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, |
| 908 | 0xFC, 0xFD, 0xFE, 0xFF, |
| 909 | }; |
| 910 | |
| 911 | inline uint16_t ToLatin1Lower(uint16_t ch) { |
| 912 | return static_cast<uint16_t>(kToLower[ch]); |
| 913 | } |
| 914 | |
| 915 | inline uint16_t ToASCIIUpper(uint16_t ch) { |
| 916 | return ch & ~((ch >= 'a' && ch <= 'z') << 5); |
| 917 | } |
| 918 | |
| 919 | // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF. |
| 920 | inline uint16_t ToLatin1Upper(uint16_t ch) { |
| 921 | DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF); |
| 922 | return ch & |
| 923 | ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xE7)) |
| 924 | << 5); |
| 925 | } |
| 926 | |
| 927 | template <typename Char> |
| 928 | bool ToUpperFastASCII(const Vector<const Char>& src, |
| 929 | Handle<SeqOneByteString> result) { |
| 930 | // Do a faster loop for the case where all the characters are ASCII. |
| 931 | uint16_t ored = 0; |
| 932 | int32_t index = 0; |
| 933 | for (auto it = src.begin(); it != src.end(); ++it) { |
| 934 | uint16_t ch = static_cast<uint16_t>(*it); |
| 935 | ored |= ch; |
| 936 | result->SeqOneByteStringSet(index++, ToASCIIUpper(ch)); |
| 937 | } |
| 938 | return !(ored & ~0x7F); |
| 939 | } |
| 940 | |
| 941 | const uint16_t sharp_s = 0xDF; |
| 942 | |
| 943 | template <typename Char> |
| 944 | bool ToUpperOneByte(const Vector<const Char>& src, |
| 945 | Handle<SeqOneByteString> result, int* sharp_s_count) { |
| 946 | // Still pretty-fast path for the input with non-ASCII Latin-1 characters. |
| 947 | |
| 948 | // There are two special cases. |
| 949 | // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF. |
| 950 | // 2. Lower case sharp-S converts to "SS" (two characters) |
| 951 | *sharp_s_count = 0; |
| 952 | int32_t index = 0; |
| 953 | for (auto it = src.begin(); it != src.end(); ++it) { |
| 954 | uint16_t ch = static_cast<uint16_t>(*it); |
| 955 | if (V8_UNLIKELY(ch == sharp_s)) { |
| 956 | ++(*sharp_s_count); |
| 957 | continue; |
| 958 | } |
| 959 | if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) { |
| 960 | // Since this upper-cased character does not fit in an 8-bit string, we |
| 961 | // need to take the 16-bit path. |
| 962 | return false; |
| 963 | } |
| 964 | result->SeqOneByteStringSet(index++, ToLatin1Upper(ch)); |
| 965 | } |
| 966 | |
| 967 | return true; |
| 968 | } |
| 969 | |
| 970 | template <typename Char> |
| 971 | void ToUpperWithSharpS(const Vector<const Char>& src, |
| 972 | Handle<SeqOneByteString> result) { |
| 973 | int32_t dest_index = 0; |
| 974 | for (auto it = src.begin(); it != src.end(); ++it) { |
| 975 | uint16_t ch = static_cast<uint16_t>(*it); |
| 976 | if (ch == sharp_s) { |
| 977 | result->SeqOneByteStringSet(dest_index++, 'S'); |
| 978 | result->SeqOneByteStringSet(dest_index++, 'S'); |
| 979 | } else { |
| 980 | result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch)); |
| 981 | } |
| 982 | } |
| 983 | } |
| 984 | |
| 985 | } // namespace |
| 986 | |
| 987 | RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) { |
| 988 | HandleScope scope(isolate); |
| 989 | DCHECK_EQ(args.length(), 1); |
| 990 | CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
| 991 | |
| 992 | int length = s->length(); |
| 993 | s = String::Flatten(s); |
| 994 | // First scan the string for uppercase and non-ASCII characters: |
| 995 | if (s->HasOnlyOneByteChars()) { |
| 996 | unsigned first_index_to_lower = length; |
| 997 | for (int index = 0; index < length; ++index) { |
| 998 | // Blink specializes this path for one-byte strings, so it |
| 999 | // does not need to do a generic get, but can do the equivalent |
| 1000 | // of SeqOneByteStringGet. |
| 1001 | uint16_t ch = s->Get(index); |
| 1002 | if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { |
| 1003 | first_index_to_lower = index; |
| 1004 | break; |
| 1005 | } |
| 1006 | } |
| 1007 | |
| 1008 | // Nothing to do if the string is all ASCII with no uppercase. |
| 1009 | if (first_index_to_lower == length) return *s; |
| 1010 | |
| 1011 | // We depend here on the invariant that the length of a Latin1 |
| 1012 | // string is invariant under ToLowerCase, and the result always |
| 1013 | // fits in the Latin1 range in the *root locale*. It does not hold |
| 1014 | // for ToUpperCase even in the root locale. |
| 1015 | Handle<SeqOneByteString> result; |
| 1016 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 1017 | isolate, result, isolate->factory()->NewRawOneByteString(length)); |
| 1018 | |
| 1019 | DisallowHeapAllocation no_gc; |
| 1020 | String::FlatContent flat = s->GetFlatContent(); |
| 1021 | if (flat.IsOneByte()) { |
| 1022 | const uint8_t* src = flat.ToOneByteVector().start(); |
| 1023 | CopyChars(result->GetChars(), src, first_index_to_lower); |
| 1024 | for (int index = first_index_to_lower; index < length; ++index) { |
| 1025 | uint16_t ch = static_cast<uint16_t>(src[index]); |
| 1026 | result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); |
| 1027 | } |
| 1028 | } else { |
| 1029 | const uint16_t* src = flat.ToUC16Vector().start(); |
| 1030 | CopyChars(result->GetChars(), src, first_index_to_lower); |
| 1031 | for (int index = first_index_to_lower; index < length; ++index) { |
| 1032 | uint16_t ch = src[index]; |
| 1033 | result->SeqOneByteStringSet(index, ToLatin1Lower(ch)); |
| 1034 | } |
| 1035 | } |
| 1036 | |
| 1037 | return *result; |
| 1038 | } |
| 1039 | |
| 1040 | // Blink had an additional case here for ASCII 2-byte strings, but |
| 1041 | // that is subsumed by the above code (assuming there isn't a false |
| 1042 | // negative for HasOnlyOneByteChars). |
| 1043 | |
| 1044 | // Do a slower implementation for cases that include non-ASCII characters. |
| 1045 | return LocaleConvertCase(s, isolate, false, ""); |
| 1046 | } |
| 1047 | |
| 1048 | RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) { |
| 1049 | HandleScope scope(isolate); |
| 1050 | DCHECK_EQ(args.length(), 1); |
| 1051 | CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
| 1052 | |
| 1053 | // This function could be optimized for no-op cases the way lowercase |
| 1054 | // counterpart is, but in empirical testing, few actual calls to upper() |
| 1055 | // are no-ops. So, it wouldn't be worth the extra time for pre-scanning. |
| 1056 | |
| 1057 | int32_t length = s->length(); |
| 1058 | s = String::Flatten(s); |
| 1059 | |
| 1060 | if (s->HasOnlyOneByteChars()) { |
| 1061 | Handle<SeqOneByteString> result; |
| 1062 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 1063 | isolate, result, isolate->factory()->NewRawOneByteString(length)); |
| 1064 | |
| 1065 | int sharp_s_count; |
| 1066 | bool is_result_single_byte; |
| 1067 | { |
| 1068 | DisallowHeapAllocation no_gc; |
| 1069 | String::FlatContent flat = s->GetFlatContent(); |
| 1070 | // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII |
| 1071 | // could be removed because ToUpperOneByte is pretty fast now (it |
| 1072 | // does not call ICU API any more.). |
| 1073 | if (flat.IsOneByte()) { |
| 1074 | Vector<const uint8_t> src = flat.ToOneByteVector(); |
| 1075 | if (ToUpperFastASCII(src, result)) return *result; |
| 1076 | is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count); |
| 1077 | } else { |
| 1078 | DCHECK(flat.IsTwoByte()); |
| 1079 | Vector<const uint16_t> src = flat.ToUC16Vector(); |
| 1080 | if (ToUpperFastASCII(src, result)) return *result; |
| 1081 | is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count); |
| 1082 | } |
| 1083 | } |
| 1084 | |
| 1085 | // Go to the full Unicode path if there are characters whose uppercase |
| 1086 | // is beyond the Latin-1 range (cannot be represented in OneByteString). |
| 1087 | if (V8_UNLIKELY(!is_result_single_byte)) { |
| 1088 | return LocaleConvertCase(s, isolate, true, ""); |
| 1089 | } |
| 1090 | |
| 1091 | if (sharp_s_count == 0) return *result; |
| 1092 | |
| 1093 | // We have sharp_s_count sharp-s characters, but the result is still |
| 1094 | // in the Latin-1 range. |
| 1095 | ASSIGN_RETURN_FAILURE_ON_EXCEPTION( |
| 1096 | isolate, result, |
| 1097 | isolate->factory()->NewRawOneByteString(length + sharp_s_count)); |
| 1098 | DisallowHeapAllocation no_gc; |
| 1099 | String::FlatContent flat = s->GetFlatContent(); |
| 1100 | if (flat.IsOneByte()) { |
| 1101 | ToUpperWithSharpS(flat.ToOneByteVector(), result); |
| 1102 | } else { |
| 1103 | ToUpperWithSharpS(flat.ToUC16Vector(), result); |
| 1104 | } |
| 1105 | |
| 1106 | return *result; |
| 1107 | } |
| 1108 | |
| 1109 | return LocaleConvertCase(s, isolate, true, ""); |
| 1110 | } |
| 1111 | |
| 1112 | RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) { |
| 1113 | HandleScope scope(isolate); |
| 1114 | DCHECK_EQ(args.length(), 3); |
| 1115 | CONVERT_ARG_HANDLE_CHECKED(String, s, 0); |
| 1116 | CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1); |
| 1117 | CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2); |
| 1118 | |
| 1119 | // All the languages requiring special handling ("az", "el", "lt", "tr") |
| 1120 | // have a 2-letter language code. |
| 1121 | DCHECK(lang->length() == 2); |
| 1122 | uint8_t lang_str[3]; |
| 1123 | memcpy(lang_str, lang->GetChars(), 2); |
| 1124 | lang_str[2] = 0; |
| 1125 | s = String::Flatten(s); |
| 1126 | // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath |
| 1127 | // in the root locale needs to be adjusted for az, lt and tr because even case |
| 1128 | // mapping of ASCII range characters are different in those locales. |
| 1129 | // Greek (el) does not require any adjustment, though. |
| 1130 | return LocaleConvertCase(s, isolate, is_upper, |
| 1131 | reinterpret_cast<const char*>(lang_str)); |
| 1132 | } |
| 1133 | |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 1134 | RUNTIME_FUNCTION(Runtime_DateCacheVersion) { |
| 1135 | HandleScope scope(isolate); |
| 1136 | DCHECK_EQ(0, args.length()); |
| 1137 | if (isolate->serializer_enabled()) return isolate->heap()->undefined_value(); |
| 1138 | if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) { |
| 1139 | Handle<FixedArray> date_cache_version = |
| 1140 | isolate->factory()->NewFixedArray(1, TENURED); |
| 1141 | date_cache_version->set(0, Smi::FromInt(0)); |
| 1142 | isolate->eternal_handles()->CreateSingleton( |
| 1143 | isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION); |
| 1144 | } |
| 1145 | Handle<FixedArray> date_cache_version = |
| 1146 | Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton( |
| 1147 | EternalHandles::DATE_CACHE_VERSION)); |
| 1148 | return date_cache_version->get(0); |
| 1149 | } |
| 1150 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1151 | } // namespace internal |
| 1152 | } // namespace v8 |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1153 | |
| 1154 | #endif // V8_I18N_SUPPORT |