| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************** |
| * COPYRIGHT: |
| * Copyright (c) 1996-2016, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ******************************************************************** |
| * |
| * ucnv_bld.cpp: |
| * |
| * Defines functions that are used in the creation/initialization/deletion |
| * of converters and related structures. |
| * uses uconv_io.h routines to access disk information |
| * is used by ucnv.h to implement public API create/delete/flushCache routines |
| * Modification History: |
| * |
| * Date Name Description |
| * |
| * 06/20/2000 helena OS/400 port changes; mostly typecast. |
| * 06/29/2000 helena Major rewrite of the callback interface. |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_CONVERSION |
| |
| #include "unicode/putil.h" |
| #include "unicode/udata.h" |
| #include "unicode/ucnv.h" |
| #include "unicode/uloc.h" |
| #include "mutex.h" |
| #include "putilimp.h" |
| #include "uassert.h" |
| #include "utracimp.h" |
| #include "ucnv_io.h" |
| #include "ucnv_bld.h" |
| #include "ucnvmbcs.h" |
| #include "ucnv_ext.h" |
| #include "ucnv_cnv.h" |
| #include "ucnv_imp.h" |
| #include "uhash.h" |
| #include "umutex.h" |
| #include "cstring.h" |
| #include "cmemory.h" |
| #include "ucln_cmn.h" |
| #include "ustr_cnv.h" |
| |
| |
| #if 0 |
| #include <stdio.h> |
| extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); |
| #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) |
| #else |
| # define UCNV_DEBUG_LOG(x,y,z) |
| #endif |
| |
| static const UConverterSharedData * const |
| converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ |
| NULL, NULL, |
| |
| #if UCONFIG_NO_LEGACY_CONVERSION |
| NULL, |
| #else |
| &_MBCSData, |
| #endif |
| |
| &_Latin1Data, |
| &_UTF8Data, &_UTF16BEData, &_UTF16LEData, |
| #if UCONFIG_ONLY_HTML_CONVERSION |
| NULL, NULL, |
| #else |
| &_UTF32BEData, &_UTF32LEData, |
| #endif |
| NULL, |
| |
| #if UCONFIG_NO_LEGACY_CONVERSION |
| NULL, |
| #else |
| &_ISO2022Data, |
| #endif |
| |
| #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
| NULL, NULL, NULL, NULL, NULL, NULL, |
| NULL, NULL, NULL, NULL, NULL, NULL, |
| NULL, |
| #else |
| &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, |
| &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, |
| &_HZData, |
| #endif |
| |
| #if UCONFIG_ONLY_HTML_CONVERSION |
| NULL, |
| #else |
| &_SCSUData, |
| #endif |
| |
| |
| #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
| NULL, |
| #else |
| &_ISCIIData, |
| #endif |
| |
| &_ASCIIData, |
| #if UCONFIG_ONLY_HTML_CONVERSION |
| NULL, NULL, &_UTF16Data, NULL, NULL, NULL, |
| #else |
| &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
| #endif |
| |
| #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
| NULL, |
| #else |
| &_CompoundTextData |
| #endif |
| }; |
| |
| /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. |
| Also the name should be in lower case and all spaces, dashes and underscores |
| removed |
| */ |
| static struct { |
| const char *name; |
| const UConverterType type; |
| } const cnvNameType[] = { |
| #if !UCONFIG_ONLY_HTML_CONVERSION |
| { "bocu1", UCNV_BOCU1 }, |
| { "cesu8", UCNV_CESU8 }, |
| #endif |
| #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
| { "hz",UCNV_HZ }, |
| #endif |
| #if !UCONFIG_ONLY_HTML_CONVERSION |
| { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
| #endif |
| #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
| { "iscii", UCNV_ISCII }, |
| #endif |
| #if !UCONFIG_NO_LEGACY_CONVERSION |
| { "iso2022", UCNV_ISO_2022 }, |
| #endif |
| { "iso88591", UCNV_LATIN_1 }, |
| #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
| { "lmbcs1", UCNV_LMBCS_1 }, |
| { "lmbcs11",UCNV_LMBCS_11 }, |
| { "lmbcs16",UCNV_LMBCS_16 }, |
| { "lmbcs17",UCNV_LMBCS_17 }, |
| { "lmbcs18",UCNV_LMBCS_18 }, |
| { "lmbcs19",UCNV_LMBCS_19 }, |
| { "lmbcs2", UCNV_LMBCS_2 }, |
| { "lmbcs3", UCNV_LMBCS_3 }, |
| { "lmbcs4", UCNV_LMBCS_4 }, |
| { "lmbcs5", UCNV_LMBCS_5 }, |
| { "lmbcs6", UCNV_LMBCS_6 }, |
| { "lmbcs8", UCNV_LMBCS_8 }, |
| #endif |
| #if !UCONFIG_ONLY_HTML_CONVERSION |
| { "scsu", UCNV_SCSU }, |
| #endif |
| { "usascii", UCNV_US_ASCII }, |
| { "utf16", UCNV_UTF16 }, |
| { "utf16be", UCNV_UTF16_BigEndian }, |
| { "utf16le", UCNV_UTF16_LittleEndian }, |
| #if U_IS_BIG_ENDIAN |
| { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, |
| { "utf16platformendian", UCNV_UTF16_BigEndian }, |
| #else |
| { "utf16oppositeendian", UCNV_UTF16_BigEndian}, |
| { "utf16platformendian", UCNV_UTF16_LittleEndian }, |
| #endif |
| #if !UCONFIG_ONLY_HTML_CONVERSION |
| { "utf32", UCNV_UTF32 }, |
| { "utf32be", UCNV_UTF32_BigEndian }, |
| { "utf32le", UCNV_UTF32_LittleEndian }, |
| #if U_IS_BIG_ENDIAN |
| { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, |
| { "utf32platformendian", UCNV_UTF32_BigEndian }, |
| #else |
| { "utf32oppositeendian", UCNV_UTF32_BigEndian }, |
| { "utf32platformendian", UCNV_UTF32_LittleEndian }, |
| #endif |
| #endif |
| #if !UCONFIG_ONLY_HTML_CONVERSION |
| { "utf7", UCNV_UTF7 }, |
| #endif |
| { "utf8", UCNV_UTF8 }, |
| #if !UCONFIG_ONLY_HTML_CONVERSION |
| { "x11compoundtext", UCNV_COMPOUND_TEXT} |
| #endif |
| }; |
| |
| |
| /*initializes some global variables */ |
| static UHashtable *SHARED_DATA_HASHTABLE = NULL; |
| static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ |
| /* Note: the global mutex is used for */ |
| /* reference count updates. */ |
| |
| static const char **gAvailableConverters = NULL; |
| static uint16_t gAvailableConverterCount = 0; |
| static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; |
| |
| #if !U_CHARSET_IS_UTF8 |
| |
| /* This contains the resolved converter name. So no further alias lookup is needed again. */ |
| static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ |
| static const char *gDefaultConverterName = NULL; |
| |
| /* |
| If the default converter is an algorithmic converter, this is the cached value. |
| We don't cache a full UConverter and clone it because ucnv_clone doesn't have |
| less overhead than an algorithmic open. We don't cache non-algorithmic converters |
| because ucnv_flushCache must be able to unload the default converter and its table. |
| */ |
| static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; |
| |
| /* Does gDefaultConverterName have a converter option and require extra parsing? */ |
| static UBool gDefaultConverterContainsOption; |
| |
| #endif /* !U_CHARSET_IS_UTF8 */ |
| |
| static const char DATA_TYPE[] = "cnv"; |
| |
| /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). |
| * If it is ever to be called from elsewhere, synchronization |
| * will need to be considered. |
| */ |
| static void |
| ucnv_flushAvailableConverterCache() { |
| gAvailableConverterCount = 0; |
| if (gAvailableConverters) { |
| uprv_free((char **)gAvailableConverters); |
| gAvailableConverters = NULL; |
| } |
| gAvailableConvertersInitOnce.reset(); |
| } |
| |
| /* ucnv_cleanup - delete all storage held by the converter cache, except any */ |
| /* in use by open converters. */ |
| /* Not thread safe. */ |
| /* Not supported API. */ |
| static UBool U_CALLCONV ucnv_cleanup(void) { |
| ucnv_flushCache(); |
| if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { |
| uhash_close(SHARED_DATA_HASHTABLE); |
| SHARED_DATA_HASHTABLE = NULL; |
| } |
| |
| /* Isn't called from flushCache because other threads may have preexisting references to the table. */ |
| ucnv_flushAvailableConverterCache(); |
| |
| #if !U_CHARSET_IS_UTF8 |
| gDefaultConverterName = NULL; |
| gDefaultConverterNameBuffer[0] = 0; |
| gDefaultConverterContainsOption = FALSE; |
| gDefaultAlgorithmicSharedData = NULL; |
| #endif |
| |
| return (SHARED_DATA_HASHTABLE == NULL); |
| } |
| |
| static UBool U_CALLCONV |
| isCnvAcceptable(void * /*context*/, |
| const char * /*type*/, const char * /*name*/, |
| const UDataInfo *pInfo) { |
| return (UBool)( |
| pInfo->size>=20 && |
| pInfo->isBigEndian==U_IS_BIG_ENDIAN && |
| pInfo->charsetFamily==U_CHARSET_FAMILY && |
| pInfo->sizeofUChar==U_SIZEOF_UCHAR && |
| pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ |
| pInfo->dataFormat[1]==0x6e && |
| pInfo->dataFormat[2]==0x76 && |
| pInfo->dataFormat[3]==0x74 && |
| pInfo->formatVersion[0]==6); /* Everything will be version 6 */ |
| } |
| |
| /** |
| * Un flatten shared data from a UDATA.. |
| */ |
| static UConverterSharedData* |
| ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) |
| { |
| /* UDataInfo info; -- necessary only if some converters have different formatVersion */ |
| const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); |
| const UConverterStaticData *source = (const UConverterStaticData *) raw; |
| UConverterSharedData *data; |
| UConverterType type = (UConverterType)source->conversionType; |
| |
| if(U_FAILURE(*status)) |
| return NULL; |
| |
| if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || |
| converterData[type] == NULL || |
| !converterData[type]->isReferenceCounted || |
| converterData[type]->referenceCounter != 1 || |
| source->structSize != sizeof(UConverterStaticData)) |
| { |
| *status = U_INVALID_TABLE_FORMAT; |
| return NULL; |
| } |
| |
| data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); |
| if(data == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| |
| /* copy initial values from the static structure for this type */ |
| uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); |
| |
| data->staticData = source; |
| |
| data->sharedDataCached = FALSE; |
| |
| /* fill in fields from the loaded data */ |
| data->dataMemory = (void*)pData; /* for future use */ |
| |
| if(data->impl->load != NULL) { |
| data->impl->load(data, pArgs, raw + source->structSize, status); |
| if(U_FAILURE(*status)) { |
| uprv_free(data); |
| return NULL; |
| } |
| } |
| return data; |
| } |
| |
| /*Takes an alias name gets an actual converter file name |
| *goes to disk and opens it. |
| *allocates the memory and returns a new UConverter object |
| */ |
| static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) |
| { |
| UDataMemory *data; |
| UConverterSharedData *sharedData; |
| |
| UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); |
| |
| if (U_FAILURE (*err)) { |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| |
| UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); |
| |
| data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); |
| if(U_FAILURE(*err)) |
| { |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| |
| sharedData = ucnv_data_unFlattenClone(pArgs, data, err); |
| if(U_FAILURE(*err)) |
| { |
| udata_close(data); |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| |
| /* |
| * TODO Store pkg in a field in the shared data so that delta-only converters |
| * can load base converters from the same package. |
| * If the pkg name is longer than the field, then either do not load the converter |
| * in the first place, or just set the pkg field to "". |
| */ |
| |
| UTRACE_EXIT_PTR_STATUS(sharedData, *err); |
| return sharedData; |
| } |
| |
| /*returns a converter type from a string |
| */ |
| static const UConverterSharedData * |
| getAlgorithmicTypeFromName(const char *realName) |
| { |
| uint32_t mid, start, limit; |
| uint32_t lastMid; |
| int result; |
| char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
| |
| /* Lower case and remove ignoreable characters. */ |
| ucnv_io_stripForCompare(strippedName, realName); |
| |
| /* do a binary search for the alias */ |
| start = 0; |
| limit = UPRV_LENGTHOF(cnvNameType); |
| mid = limit; |
| lastMid = UINT32_MAX; |
| |
| for (;;) { |
| mid = (uint32_t)((start + limit) / 2); |
| if (lastMid == mid) { /* Have we moved? */ |
| break; /* We haven't moved, and it wasn't found. */ |
| } |
| lastMid = mid; |
| result = uprv_strcmp(strippedName, cnvNameType[mid].name); |
| |
| if (result < 0) { |
| limit = mid; |
| } else if (result > 0) { |
| start = mid; |
| } else { |
| return converterData[cnvNameType[mid].type]; |
| } |
| } |
| |
| return NULL; |
| } |
| |
| /* |
| * Based on the number of known converters, this determines how many times larger |
| * the shared data hash table should be. When on small platforms, or just a couple |
| * of converters are used, this number should be 2. When memory is plentiful, or |
| * when ucnv_countAvailable is ever used with a lot of available converters, |
| * this should be 4. |
| * Larger numbers reduce the number of hash collisions, but use more memory. |
| */ |
| #define UCNV_CACHE_LOAD_FACTOR 2 |
| |
| /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ |
| /* Will always be called with the cnvCacheMutex alrady being held */ |
| /* by the calling function. */ |
| /* Stores the shared data in the SHARED_DATA_HASHTABLE |
| * @param data The shared data |
| */ |
| static void |
| ucnv_shareConverterData(UConverterSharedData * data) |
| { |
| UErrorCode err = U_ZERO_ERROR; |
| /*Lazy evaluates the Hashtable itself */ |
| /*void *sanity = NULL;*/ |
| |
| if (SHARED_DATA_HASHTABLE == NULL) |
| { |
| SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, |
| ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, |
| &err); |
| ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
| |
| if (U_FAILURE(err)) |
| return; |
| } |
| |
| /* ### check to see if the element is not already there! */ |
| |
| /* |
| sanity = ucnv_getSharedConverterData (data->staticData->name); |
| if(sanity != NULL) |
| { |
| UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); |
| } |
| UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); |
| */ |
| |
| /* Mark it shared */ |
| data->sharedDataCached = TRUE; |
| |
| uhash_put(SHARED_DATA_HASHTABLE, |
| (void*) data->staticData->name, /* Okay to cast away const as long as |
| keyDeleter == NULL */ |
| data, |
| &err); |
| UCNV_DEBUG_LOG("put", data->staticData->name,data); |
| |
| } |
| |
| /* Look up a converter name in the shared data cache. */ |
| /* cnvCacheMutex must be held by the caller to protect the hash table. */ |
| /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) |
| * @param name The name of the shared data |
| * @return the shared data from the SHARED_DATA_HASHTABLE |
| */ |
| static UConverterSharedData * |
| ucnv_getSharedConverterData(const char *name) |
| { |
| /*special case when no Table has yet been created we return NULL */ |
| if (SHARED_DATA_HASHTABLE == NULL) |
| { |
| return NULL; |
| } |
| else |
| { |
| UConverterSharedData *rc; |
| |
| rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); |
| UCNV_DEBUG_LOG("get",name,rc); |
| return rc; |
| } |
| } |
| |
| /*frees the string of memory blocks associates with a sharedConverter |
| *if and only if the referenceCounter == 0 |
| */ |
| /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to |
| * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and |
| * returns TRUE, |
| * otherwise returns FALSE |
| * @param sharedConverterData The shared data |
| * @return if not it frees all the memory stemming from sharedConverterData and |
| * returns TRUE, otherwise returns FALSE |
| */ |
| static UBool |
| ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) |
| { |
| UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); |
| UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); |
| |
| if (deadSharedData->referenceCounter > 0) { |
| UTRACE_EXIT_VALUE((int32_t)FALSE); |
| return FALSE; |
| } |
| |
| if (deadSharedData->impl->unload != NULL) { |
| deadSharedData->impl->unload(deadSharedData); |
| } |
| |
| if(deadSharedData->dataMemory != NULL) |
| { |
| UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; |
| udata_close(data); |
| } |
| |
| uprv_free(deadSharedData); |
| |
| UTRACE_EXIT_VALUE((int32_t)TRUE); |
| return TRUE; |
| } |
| |
| /** |
| * Load a non-algorithmic converter. |
| * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). |
| */ |
| UConverterSharedData * |
| ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { |
| UConverterSharedData *mySharedConverterData; |
| |
| if(err == NULL || U_FAILURE(*err)) { |
| return NULL; |
| } |
| |
| if(pArgs->pkg != NULL && *pArgs->pkg != 0) { |
| /* application-provided converters are not currently cached */ |
| return createConverterFromFile(pArgs, err); |
| } |
| |
| mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); |
| if (mySharedConverterData == NULL) |
| { |
| /*Not cached, we need to stream it in from file */ |
| mySharedConverterData = createConverterFromFile(pArgs, err); |
| if (U_FAILURE (*err) || (mySharedConverterData == NULL)) |
| { |
| return NULL; |
| } |
| else if (!pArgs->onlyTestIsLoadable) |
| { |
| /* share it with other library clients */ |
| ucnv_shareConverterData(mySharedConverterData); |
| } |
| } |
| else |
| { |
| /* The data for this converter was already in the cache. */ |
| /* Update the reference counter on the shared data: one more client */ |
| mySharedConverterData->referenceCounter++; |
| } |
| |
| return mySharedConverterData; |
| } |
| |
| /** |
| * Unload a non-algorithmic converter. |
| * It must be sharedData->isReferenceCounted |
| * and this function must be called inside umtx_lock(&cnvCacheMutex). |
| */ |
| U_CAPI void |
| ucnv_unload(UConverterSharedData *sharedData) { |
| if(sharedData != NULL) { |
| if (sharedData->referenceCounter > 0) { |
| sharedData->referenceCounter--; |
| } |
| |
| if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { |
| ucnv_deleteSharedConverterData(sharedData); |
| } |
| } |
| } |
| |
| U_CFUNC void |
| ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) |
| { |
| if(sharedData != NULL && sharedData->isReferenceCounted) { |
| umtx_lock(&cnvCacheMutex); |
| ucnv_unload(sharedData); |
| umtx_unlock(&cnvCacheMutex); |
| } |
| } |
| |
| U_CFUNC void |
| ucnv_incrementRefCount(UConverterSharedData *sharedData) |
| { |
| if(sharedData != NULL && sharedData->isReferenceCounted) { |
| umtx_lock(&cnvCacheMutex); |
| sharedData->referenceCounter++; |
| umtx_unlock(&cnvCacheMutex); |
| } |
| } |
| |
| /* |
| * *pPieces must be initialized. |
| * The name without options will be copied to pPieces->cnvName. |
| * The locale and options will be copied to pPieces only if present in inName, |
| * otherwise the existing values in pPieces remain. |
| * *pArgs will be set to the pPieces values. |
| */ |
| static void |
| parseConverterOptions(const char *inName, |
| UConverterNamePieces *pPieces, |
| UConverterLoadArgs *pArgs, |
| UErrorCode *err) |
| { |
| char *cnvName = pPieces->cnvName; |
| char c; |
| int32_t len = 0; |
| |
| pArgs->name=inName; |
| pArgs->locale=pPieces->locale; |
| pArgs->options=pPieces->options; |
| |
| /* copy the converter name itself to cnvName */ |
| while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { |
| if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ |
| pPieces->cnvName[0]=0; |
| return; |
| } |
| *cnvName++=c; |
| inName++; |
| } |
| *cnvName=0; |
| pArgs->name=pPieces->cnvName; |
| |
| /* parse options. No more name copying should occur. */ |
| while((c=*inName)!=0) { |
| if(c==UCNV_OPTION_SEP_CHAR) { |
| ++inName; |
| } |
| |
| /* inName is behind an option separator */ |
| if(uprv_strncmp(inName, "locale=", 7)==0) { |
| /* do not modify locale itself in case we have multiple locale options */ |
| char *dest=pPieces->locale; |
| |
| /* copy the locale option value */ |
| inName+=7; |
| len=0; |
| while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { |
| ++inName; |
| |
| if(++len>=ULOC_FULLNAME_CAPACITY) { |
| *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ |
| pPieces->locale[0]=0; |
| return; |
| } |
| |
| *dest++=c; |
| } |
| *dest=0; |
| } else if(uprv_strncmp(inName, "version=", 8)==0) { |
| /* copy the version option value into bits 3..0 of pPieces->options */ |
| inName+=8; |
| c=*inName; |
| if(c==0) { |
| pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); |
| return; |
| } else if((uint8_t)(c-'0')<10) { |
| pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); |
| ++inName; |
| } |
| } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { |
| inName+=8; |
| pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); |
| /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ |
| } else { |
| /* ignore any other options until we define some */ |
| while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { |
| } |
| if(c==0) { |
| return; |
| } |
| } |
| } |
| } |
| |
| /*Logic determines if the converter is Algorithmic AND/OR cached |
| *depending on that: |
| * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) |
| * -Get it from a Hashtable (Data=X, Cached=TRUE) |
| * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) |
| * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) |
| */ |
| U_CFUNC UConverterSharedData * |
| ucnv_loadSharedData(const char *converterName, |
| UConverterNamePieces *pPieces, |
| UConverterLoadArgs *pArgs, |
| UErrorCode * err) { |
| UConverterNamePieces stackPieces; |
| UConverterLoadArgs stackArgs; |
| UConverterSharedData *mySharedConverterData = NULL; |
| UErrorCode internalErrorCode = U_ZERO_ERROR; |
| UBool mayContainOption = TRUE; |
| UBool checkForAlgorithmic = TRUE; |
| |
| if (U_FAILURE (*err)) { |
| return NULL; |
| } |
| |
| if(pPieces == NULL) { |
| if(pArgs != NULL) { |
| /* |
| * Bad: We may set pArgs pointers to stackPieces fields |
| * which will be invalid after this function returns. |
| */ |
| *err = U_INTERNAL_PROGRAM_ERROR; |
| return NULL; |
| } |
| pPieces = &stackPieces; |
| } |
| if(pArgs == NULL) { |
| uprv_memset(&stackArgs, 0, sizeof(stackArgs)); |
| stackArgs.size = (int32_t)sizeof(stackArgs); |
| pArgs = &stackArgs; |
| } |
| |
| pPieces->cnvName[0] = 0; |
| pPieces->locale[0] = 0; |
| pPieces->options = 0; |
| |
| pArgs->name = converterName; |
| pArgs->locale = pPieces->locale; |
| pArgs->options = pPieces->options; |
| |
| /* In case "name" is NULL we want to open the default converter. */ |
| if (converterName == NULL) { |
| #if U_CHARSET_IS_UTF8 |
| pArgs->name = "UTF-8"; |
| return (UConverterSharedData *)converterData[UCNV_UTF8]; |
| #else |
| /* Call ucnv_getDefaultName first to query the name from the OS. */ |
| pArgs->name = ucnv_getDefaultName(); |
| if (pArgs->name == NULL) { |
| *err = U_MISSING_RESOURCE_ERROR; |
| return NULL; |
| } |
| mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; |
| checkForAlgorithmic = FALSE; |
| mayContainOption = gDefaultConverterContainsOption; |
| /* the default converter name is already canonical */ |
| #endif |
| } |
| else if(UCNV_FAST_IS_UTF8(converterName)) { |
| /* fastpath for UTF-8 */ |
| pArgs->name = "UTF-8"; |
| return (UConverterSharedData *)converterData[UCNV_UTF8]; |
| } |
| else { |
| /* separate the converter name from the options */ |
| parseConverterOptions(converterName, pPieces, pArgs, err); |
| if (U_FAILURE(*err)) { |
| /* Very bad name used. */ |
| return NULL; |
| } |
| |
| /* get the canonical converter name */ |
| pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); |
| if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { |
| /* |
| * set the input name in case the converter was added |
| * without updating the alias table, or when there is no alias table |
| */ |
| pArgs->name = pPieces->cnvName; |
| } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { |
| *err = U_AMBIGUOUS_ALIAS_WARNING; |
| } |
| } |
| |
| /* separate the converter name from the options */ |
| if(mayContainOption && pArgs->name != pPieces->cnvName) { |
| parseConverterOptions(pArgs->name, pPieces, pArgs, err); |
| } |
| |
| /* get the shared data for an algorithmic converter, if it is one */ |
| if (checkForAlgorithmic) { |
| mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); |
| } |
| if (mySharedConverterData == NULL) |
| { |
| /* it is a data-based converter, get its shared data. */ |
| /* Hold the cnvCacheMutex through the whole process of checking the */ |
| /* converter data cache, and adding new entries to the cache */ |
| /* to prevent other threads from modifying the cache during the */ |
| /* process. */ |
| pArgs->nestedLoads=1; |
| pArgs->pkg=NULL; |
| |
| umtx_lock(&cnvCacheMutex); |
| mySharedConverterData = ucnv_load(pArgs, err); |
| umtx_unlock(&cnvCacheMutex); |
| if (U_FAILURE (*err) || (mySharedConverterData == NULL)) |
| { |
| return NULL; |
| } |
| } |
| |
| return mySharedConverterData; |
| } |
| |
| U_CAPI UConverter * |
| ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) |
| { |
| UConverterNamePieces stackPieces; |
| UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
| UConverterSharedData *mySharedConverterData; |
| |
| UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); |
| |
| if(U_SUCCESS(*err)) { |
| UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); |
| |
| mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
| |
| myUConverter = ucnv_createConverterFromSharedData( |
| myUConverter, mySharedConverterData, |
| &stackArgs, |
| err); |
| |
| if(U_SUCCESS(*err)) { |
| UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
| return myUConverter; |
| } |
| } |
| |
| /* exit with error */ |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| |
| U_CFUNC UBool |
| ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { |
| UConverter myUConverter; |
| UConverterNamePieces stackPieces; |
| UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
| UConverterSharedData *mySharedConverterData; |
| |
| UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); |
| |
| if(U_SUCCESS(*err)) { |
| UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); |
| |
| stackArgs.onlyTestIsLoadable=TRUE; |
| mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
| ucnv_createConverterFromSharedData( |
| &myUConverter, mySharedConverterData, |
| &stackArgs, |
| err); |
| ucnv_unloadSharedDataIfReady(mySharedConverterData); |
| } |
| |
| UTRACE_EXIT_STATUS(*err); |
| return U_SUCCESS(*err); |
| } |
| |
| UConverter * |
| ucnv_createAlgorithmicConverter(UConverter *myUConverter, |
| UConverterType type, |
| const char *locale, uint32_t options, |
| UErrorCode *err) { |
| UConverter *cnv; |
| const UConverterSharedData *sharedData; |
| UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
| |
| UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); |
| UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); |
| |
| if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
| return NULL; |
| } |
| |
| sharedData = converterData[type]; |
| if(sharedData == NULL || sharedData->isReferenceCounted) { |
| /* not a valid type, or not an algorithmic converter */ |
| *err = U_ILLEGAL_ARGUMENT_ERROR; |
| UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
| return NULL; |
| } |
| |
| stackArgs.name = ""; |
| stackArgs.options = options; |
| stackArgs.locale=locale; |
| cnv = ucnv_createConverterFromSharedData( |
| myUConverter, (UConverterSharedData *)sharedData, |
| &stackArgs, err); |
| |
| UTRACE_EXIT_PTR_STATUS(cnv, *err); |
| return cnv; |
| } |
| |
| U_CFUNC UConverter* |
| ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) |
| { |
| UConverter *myUConverter; |
| UConverterSharedData *mySharedConverterData; |
| UConverterNamePieces stackPieces; |
| UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
| |
| UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); |
| |
| if(U_FAILURE(*err)) { |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| |
| UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); |
| |
| /* first, get the options out of the converterName string */ |
| stackPieces.cnvName[0] = 0; |
| stackPieces.locale[0] = 0; |
| stackPieces.options = 0; |
| parseConverterOptions(converterName, &stackPieces, &stackArgs, err); |
| if (U_FAILURE(*err)) { |
| /* Very bad name used. */ |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| stackArgs.nestedLoads=1; |
| stackArgs.pkg=packageName; |
| |
| /* open the data, unflatten the shared structure */ |
| mySharedConverterData = createConverterFromFile(&stackArgs, err); |
| |
| if (U_FAILURE(*err)) { |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| |
| /* create the actual converter */ |
| myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); |
| |
| if (U_FAILURE(*err)) { |
| ucnv_close(myUConverter); |
| UTRACE_EXIT_STATUS(*err); |
| return NULL; |
| } |
| |
| UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
| return myUConverter; |
| } |
| |
| |
| U_CFUNC UConverter* |
| ucnv_createConverterFromSharedData(UConverter *myUConverter, |
| UConverterSharedData *mySharedConverterData, |
| UConverterLoadArgs *pArgs, |
| UErrorCode *err) |
| { |
| UBool isCopyLocal; |
| |
| if(U_FAILURE(*err)) { |
| ucnv_unloadSharedDataIfReady(mySharedConverterData); |
| return myUConverter; |
| } |
| if(myUConverter == NULL) |
| { |
| myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); |
| if(myUConverter == NULL) |
| { |
| *err = U_MEMORY_ALLOCATION_ERROR; |
| ucnv_unloadSharedDataIfReady(mySharedConverterData); |
| return NULL; |
| } |
| isCopyLocal = FALSE; |
| } else { |
| isCopyLocal = TRUE; |
| } |
| |
| /* initialize the converter */ |
| uprv_memset(myUConverter, 0, sizeof(UConverter)); |
| myUConverter->isCopyLocal = isCopyLocal; |
| /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ |
| myUConverter->sharedData = mySharedConverterData; |
| myUConverter->options = pArgs->options; |
| if(!pArgs->onlyTestIsLoadable) { |
| myUConverter->preFromUFirstCP = U_SENTINEL; |
| myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; |
| myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; |
| myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; |
| myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; |
| myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; |
| myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; |
| myUConverter->subChars = (uint8_t *)myUConverter->subUChars; |
| uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); |
| myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ |
| } |
| |
| if(mySharedConverterData->impl->open != NULL) { |
| mySharedConverterData->impl->open(myUConverter, pArgs, err); |
| if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { |
| /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ |
| ucnv_close(myUConverter); |
| return NULL; |
| } |
| } |
| |
| return myUConverter; |
| } |
| |
| /*Frees all shared immutable objects that aren't referred to (reference count = 0) |
| */ |
| U_CAPI int32_t U_EXPORT2 |
| ucnv_flushCache () |
| { |
| UConverterSharedData *mySharedData = NULL; |
| int32_t pos; |
| int32_t tableDeletedNum = 0; |
| const UHashElement *e; |
| /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ |
| int32_t i, remaining; |
| |
| UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); |
| |
| /* Close the default converter without creating a new one so that everything will be flushed. */ |
| u_flushDefaultConverter(); |
| |
| /*if shared data hasn't even been lazy evaluated yet |
| * return 0 |
| */ |
| if (SHARED_DATA_HASHTABLE == NULL) { |
| UTRACE_EXIT_VALUE((int32_t)0); |
| return 0; |
| } |
| |
| /*creates an enumeration to iterate through every element in the |
| * table |
| * |
| * Synchronization: holding cnvCacheMutex will prevent any other thread from |
| * accessing or modifying the hash table during the iteration. |
| * The reference count of an entry may be decremented by |
| * ucnv_close while the iteration is in process, but this is |
| * benign. It can't be incremented (in ucnv_createConverter()) |
| * because the sequence of looking up in the cache + incrementing |
| * is protected by cnvCacheMutex. |
| */ |
| umtx_lock(&cnvCacheMutex); |
| /* |
| * double loop: A delta/extension-only converter has a pointer to its base table's |
| * shared data; the first iteration of the outer loop may see the delta converter |
| * before the base converter, and unloading the delta converter may get the base |
| * converter's reference counter down to 0. |
| */ |
| i = 0; |
| do { |
| remaining = 0; |
| pos = UHASH_FIRST; |
| while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) |
| { |
| mySharedData = (UConverterSharedData *) e->value.pointer; |
| /*deletes only if reference counter == 0 */ |
| if (mySharedData->referenceCounter == 0) |
| { |
| tableDeletedNum++; |
| |
| UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); |
| |
| uhash_removeElement(SHARED_DATA_HASHTABLE, e); |
| mySharedData->sharedDataCached = FALSE; |
| ucnv_deleteSharedConverterData (mySharedData); |
| } else { |
| ++remaining; |
| } |
| } |
| } while(++i == 1 && remaining > 0); |
| umtx_unlock(&cnvCacheMutex); |
| |
| UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); |
| |
| UTRACE_EXIT_VALUE(tableDeletedNum); |
| return tableDeletedNum; |
| } |
| |
| /* available converters list --------------------------------------------------- */ |
| |
| static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { |
| U_ASSERT(gAvailableConverterCount == 0); |
| U_ASSERT(gAvailableConverters == NULL); |
| |
| ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
| UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); |
| int32_t allConverterCount = uenum_count(allConvEnum, &errCode); |
| if (U_FAILURE(errCode)) { |
| return; |
| } |
| |
| /* We can't have more than "*converterTable" converters to open */ |
| gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); |
| if (!gAvailableConverters) { |
| errCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| |
| /* Open the default converter to make sure that it has first dibs in the hash table. */ |
| UErrorCode localStatus = U_ZERO_ERROR; |
| UConverter tempConverter; |
| ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); |
| |
| gAvailableConverterCount = 0; |
| |
| for (int32_t idx = 0; idx < allConverterCount; idx++) { |
| localStatus = U_ZERO_ERROR; |
| const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); |
| if (ucnv_canCreateConverter(converterName, &localStatus)) { |
| gAvailableConverters[gAvailableConverterCount++] = converterName; |
| } |
| } |
| |
| uenum_close(allConvEnum); |
| } |
| |
| |
| static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { |
| umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); |
| return U_SUCCESS(*pErrorCode); |
| } |
| |
| U_CFUNC uint16_t |
| ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { |
| if (haveAvailableConverterList(pErrorCode)) { |
| return gAvailableConverterCount; |
| } |
| return 0; |
| } |
| |
| U_CFUNC const char * |
| ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { |
| if (haveAvailableConverterList(pErrorCode)) { |
| if (n < gAvailableConverterCount) { |
| return gAvailableConverters[n]; |
| } |
| *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; |
| } |
| return NULL; |
| } |
| |
| /* default converter name --------------------------------------------------- */ |
| |
| #if !U_CHARSET_IS_UTF8 |
| /* |
| Copy the canonical converter name. |
| ucnv_getDefaultName must be thread safe, which can call this function. |
| |
| ucnv_setDefaultName calls this function and it doesn't have to be |
| thread safe because there is no reliable/safe way to reset the |
| converter in use in all threads. If you did reset the converter, you |
| would not be sure that retrieving a default converter for one string |
| would be the same type of default converter for a successive string. |
| Since the name is a returned via ucnv_getDefaultName without copying, |
| you shouldn't be modifying or deleting the string from a separate thread. |
| */ |
| static inline void |
| internalSetName(const char *name, UErrorCode *status) { |
| UConverterNamePieces stackPieces; |
| UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
| int32_t length=(int32_t)(uprv_strlen(name)); |
| UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); |
| const UConverterSharedData *algorithmicSharedData; |
| |
| stackArgs.name = name; |
| if(containsOption) { |
| stackPieces.cnvName[0] = 0; |
| stackPieces.locale[0] = 0; |
| stackPieces.options = 0; |
| parseConverterOptions(name, &stackPieces, &stackArgs, status); |
| if(U_FAILURE(*status)) { |
| return; |
| } |
| } |
| algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); |
| |
| umtx_lock(&cnvCacheMutex); |
| |
| gDefaultAlgorithmicSharedData = algorithmicSharedData; |
| gDefaultConverterContainsOption = containsOption; |
| uprv_memcpy(gDefaultConverterNameBuffer, name, length); |
| gDefaultConverterNameBuffer[length]=0; |
| |
| /* gDefaultConverterName MUST be the last global var set by this function. */ |
| /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ |
| // But there is nothing here preventing that from being reordered, either by the compiler |
| // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. |
| // -- Andy |
| gDefaultConverterName = gDefaultConverterNameBuffer; |
| |
| ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
| |
| umtx_unlock(&cnvCacheMutex); |
| } |
| #endif |
| |
| /* |
| * In order to be really thread-safe, the get function would have to take |
| * a buffer parameter and copy the current string inside a mutex block. |
| * This implementation only tries to be really thread-safe while |
| * setting the name. |
| * It assumes that setting a pointer is atomic. |
| */ |
| |
| U_CAPI const char* U_EXPORT2 |
| ucnv_getDefaultName() { |
| #if U_CHARSET_IS_UTF8 |
| return "UTF-8"; |
| #else |
| /* local variable to be thread-safe */ |
| const char *name; |
| |
| /* |
| Concurrent calls to ucnv_getDefaultName must be thread safe, |
| but ucnv_setDefaultName is not thread safe. |
| */ |
| { |
| icu::Mutex lock(&cnvCacheMutex); |
| name = gDefaultConverterName; |
| } |
| if(name==NULL) { |
| UErrorCode errorCode = U_ZERO_ERROR; |
| UConverter *cnv = NULL; |
| |
| name = uprv_getDefaultCodepage(); |
| |
| /* if the name is there, test it out and get the canonical name with options */ |
| if(name != NULL) { |
| cnv = ucnv_open(name, &errorCode); |
| if(U_SUCCESS(errorCode) && cnv != NULL) { |
| name = ucnv_getName(cnv, &errorCode); |
| } |
| } |
| |
| if(name == NULL || name[0] == 0 |
| || U_FAILURE(errorCode) || cnv == NULL |
| || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) |
| { |
| /* Panic time, let's use a fallback. */ |
| #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) |
| name = "US-ASCII"; |
| /* there is no 'algorithmic' converter for EBCDIC */ |
| #elif U_PLATFORM == U_PF_OS390 |
| name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; |
| #else |
| name = "ibm-37_P100-1995"; |
| #endif |
| } |
| |
| internalSetName(name, &errorCode); |
| |
| /* The close may make the current name go away. */ |
| ucnv_close(cnv); |
| } |
| |
| return name; |
| #endif |
| } |
| |
| #if U_CHARSET_IS_UTF8 |
| U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} |
| #else |
| /* |
| This function is not thread safe, and it can't be thread safe. |
| See internalSetName or the API reference for details. |
| */ |
| U_CAPI void U_EXPORT2 |
| ucnv_setDefaultName(const char *converterName) { |
| if(converterName==NULL) { |
| /* reset to the default codepage */ |
| gDefaultConverterName=NULL; |
| } else { |
| UErrorCode errorCode = U_ZERO_ERROR; |
| UConverter *cnv = NULL; |
| const char *name = NULL; |
| |
| /* if the name is there, test it out and get the canonical name with options */ |
| cnv = ucnv_open(converterName, &errorCode); |
| if(U_SUCCESS(errorCode) && cnv != NULL) { |
| name = ucnv_getName(cnv, &errorCode); |
| } |
| |
| if(U_SUCCESS(errorCode) && name!=NULL) { |
| internalSetName(name, &errorCode); |
| } |
| /* else this converter is bad to use. Don't change it to a bad value. */ |
| |
| /* The close may make the current name go away. */ |
| ucnv_close(cnv); |
| |
| /* reset the converter cache */ |
| u_flushDefaultConverter(); |
| } |
| } |
| #endif |
| |
| /* data swapping ------------------------------------------------------------ */ |
| |
| /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ |
| |
| #if !UCONFIG_NO_LEGACY_CONVERSION |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucnv_swap(const UDataSwapper *ds, |
| const void *inData, int32_t length, void *outData, |
| UErrorCode *pErrorCode) { |
| const UDataInfo *pInfo; |
| int32_t headerSize; |
| |
| const uint8_t *inBytes; |
| uint8_t *outBytes; |
| |
| uint32_t offset, count, staticDataSize; |
| int32_t size; |
| |
| const UConverterStaticData *inStaticData; |
| UConverterStaticData *outStaticData; |
| |
| const _MBCSHeader *inMBCSHeader; |
| _MBCSHeader *outMBCSHeader; |
| _MBCSHeader mbcsHeader; |
| uint32_t mbcsHeaderLength; |
| UBool noFromU=FALSE; |
| |
| uint8_t outputType; |
| |
| int32_t maxFastUChar, mbcsIndexLength; |
| |
| const int32_t *inExtIndexes; |
| int32_t extOffset; |
| |
| /* udata_swapDataHeader checks the arguments */ |
| headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
| if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| return 0; |
| } |
| |
| /* check data format and format version */ |
| pInfo=(const UDataInfo *)((const char *)inData+4); |
| if(!( |
| pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ |
| pInfo->dataFormat[1]==0x6e && |
| pInfo->dataFormat[2]==0x76 && |
| pInfo->dataFormat[3]==0x74 && |
| pInfo->formatVersion[0]==6 && |
| pInfo->formatVersion[1]>=2 |
| )) { |
| udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", |
| pInfo->dataFormat[0], pInfo->dataFormat[1], |
| pInfo->dataFormat[2], pInfo->dataFormat[3], |
| pInfo->formatVersion[0], pInfo->formatVersion[1]); |
| *pErrorCode=U_UNSUPPORTED_ERROR; |
| return 0; |
| } |
| |
| inBytes=(const uint8_t *)inData+headerSize; |
| outBytes=(uint8_t *)outData+headerSize; |
| |
| /* read the initial UConverterStaticData structure after the UDataInfo header */ |
| inStaticData=(const UConverterStaticData *)inBytes; |
| outStaticData=(UConverterStaticData *)outBytes; |
| |
| if(length<0) { |
| staticDataSize=ds->readUInt32(inStaticData->structSize); |
| } else { |
| length-=headerSize; |
| if( length<(int32_t)sizeof(UConverterStaticData) || |
| (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) |
| ) { |
| udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", |
| length); |
| *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0; |
| } |
| } |
| |
| if(length>=0) { |
| /* swap the static data */ |
| if(inStaticData!=outStaticData) { |
| uprv_memcpy(outStaticData, inStaticData, staticDataSize); |
| } |
| |
| ds->swapArray32(ds, &inStaticData->structSize, 4, |
| &outStaticData->structSize, pErrorCode); |
| ds->swapArray32(ds, &inStaticData->codepage, 4, |
| &outStaticData->codepage, pErrorCode); |
| |
| ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), |
| outStaticData->name, pErrorCode); |
| if(U_FAILURE(*pErrorCode)) { |
| udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); |
| return 0; |
| } |
| } |
| |
| inBytes+=staticDataSize; |
| outBytes+=staticDataSize; |
| if(length>=0) { |
| length-=(int32_t)staticDataSize; |
| } |
| |
| /* check for supported conversionType values */ |
| if(inStaticData->conversionType==UCNV_MBCS) { |
| /* swap MBCS data */ |
| inMBCSHeader=(const _MBCSHeader *)inBytes; |
| outMBCSHeader=(_MBCSHeader *)outBytes; |
| |
| if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { |
| udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
| length); |
| *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0; |
| } |
| if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { |
| mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; |
| } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && |
| ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& |
| MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 |
| ) { |
| mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; |
| noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); |
| } else { |
| udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", |
| inMBCSHeader->version[0], inMBCSHeader->version[1]); |
| *pErrorCode=U_UNSUPPORTED_ERROR; |
| return 0; |
| } |
| |
| uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); |
| mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); |
| mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); |
| mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); |
| mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); |
| mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); |
| mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); |
| mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); |
| /* mbcsHeader.options have been read above */ |
| |
| extOffset=(int32_t)(mbcsHeader.flags>>8); |
| outputType=(uint8_t)mbcsHeader.flags; |
| if(noFromU && outputType==MBCS_OUTPUT_1) { |
| udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); |
| *pErrorCode=U_UNSUPPORTED_ERROR; |
| return 0; |
| } |
| |
| /* make sure that the output type is known */ |
| switch(outputType) { |
| case MBCS_OUTPUT_1: |
| case MBCS_OUTPUT_2: |
| case MBCS_OUTPUT_3: |
| case MBCS_OUTPUT_4: |
| case MBCS_OUTPUT_3_EUC: |
| case MBCS_OUTPUT_4_EUC: |
| case MBCS_OUTPUT_2_SISO: |
| case MBCS_OUTPUT_EXT_ONLY: |
| /* OK */ |
| break; |
| default: |
| udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", |
| outputType); |
| *pErrorCode=U_UNSUPPORTED_ERROR; |
| return 0; |
| } |
| |
| /* calculate the length of the MBCS data */ |
| |
| /* |
| * utf8Friendly MBCS files (mbcsHeader.version 4.3) |
| * contain an additional mbcsIndex table: |
| * uint16_t[(maxFastUChar+1)>>6]; |
| * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). |
| */ |
| maxFastUChar=0; |
| mbcsIndexLength=0; |
| if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && |
| mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 |
| ) { |
| maxFastUChar=(maxFastUChar<<8)|0xff; |
| mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ |
| } |
| |
| if(extOffset==0) { |
| size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); |
| if(!noFromU) { |
| size+=(int32_t)mbcsHeader.fromUBytesLength; |
| } |
| |
| /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ |
| inExtIndexes=NULL; |
| } else { |
| /* there is extension data after the base data, see ucnv_ext.h */ |
| if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { |
| udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", |
| length); |
| *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0; |
| } |
| |
| inExtIndexes=(const int32_t *)(inBytes+extOffset); |
| size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); |
| } |
| |
| if(length>=0) { |
| if(length<size) { |
| udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
| length); |
| *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0; |
| } |
| |
| /* copy the data for inaccessible bytes */ |
| if(inBytes!=outBytes) { |
| uprv_memcpy(outBytes, inBytes, size); |
| } |
| |
| /* swap the MBCSHeader, except for the version field */ |
| count=mbcsHeaderLength*4; |
| ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, |
| &outMBCSHeader->countStates, pErrorCode); |
| |
| if(outputType==MBCS_OUTPUT_EXT_ONLY) { |
| /* |
| * extension-only file, |
| * contains a base name instead of normal base table data |
| */ |
| |
| /* swap the base name, between the header and the extension data */ |
| const char *inBaseName=(const char *)inBytes+count; |
| char *outBaseName=(char *)outBytes+count; |
| ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), |
| outBaseName, pErrorCode); |
| } else { |
| /* normal file with base table data */ |
| |
| /* swap the state table, 1kB per state */ |
| offset=count; |
| count=mbcsHeader.countStates*1024; |
| ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| |
| /* swap the toUFallbacks[] */ |
| offset+=count; |
| count=mbcsHeader.countToUFallbacks*8; |
| ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| |
| /* swap the unicodeCodeUnits[] */ |
| offset=mbcsHeader.offsetToUCodeUnits; |
| count=mbcsHeader.offsetFromUTable-offset; |
| ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| |
| /* offset to the stage 1 table, independent of the outputType */ |
| offset=mbcsHeader.offsetFromUTable; |
| |
| if(outputType==MBCS_OUTPUT_1) { |
| /* SBCS: swap the fromU tables, all 16 bits wide */ |
| count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; |
| ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| } else { |
| /* otherwise: swap the stage tables separately */ |
| |
| /* stage 1 table: uint16_t[0x440 or 0x40] */ |
| if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { |
| count=0x440*2; /* for all of Unicode */ |
| } else { |
| count=0x40*2; /* only BMP */ |
| } |
| ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| |
| /* stage 2 table: uint32_t[] */ |
| offset+=count; |
| count=mbcsHeader.offsetFromUBytes-offset; |
| ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| |
| /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ |
| offset=mbcsHeader.offsetFromUBytes; |
| count= noFromU ? 0 : mbcsHeader.fromUBytesLength; |
| switch(outputType) { |
| case MBCS_OUTPUT_2: |
| case MBCS_OUTPUT_3_EUC: |
| case MBCS_OUTPUT_2_SISO: |
| ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| break; |
| case MBCS_OUTPUT_4: |
| ds->swapArray32(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| break; |
| default: |
| /* just uint8_t[], nothing to swap */ |
| break; |
| } |
| |
| if(mbcsIndexLength!=0) { |
| offset+=count; |
| count=mbcsIndexLength; |
| ds->swapArray16(ds, inBytes+offset, (int32_t)count, |
| outBytes+offset, pErrorCode); |
| } |
| } |
| } |
| |
| if(extOffset!=0) { |
| /* swap the extension data */ |
| inBytes+=extOffset; |
| outBytes+=extOffset; |
| |
| /* swap toUTable[] */ |
| offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); |
| length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); |
| ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
| |
| /* swap toUUChars[] */ |
| offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); |
| length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); |
| ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
| |
| /* swap fromUTableUChars[] */ |
| offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); |
| length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); |
| ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
| |
| /* swap fromUTableValues[] */ |
| offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); |
| /* same length as for fromUTableUChars[] */ |
| ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
| |
| /* no need to swap fromUBytes[] */ |
| |
| /* swap fromUStage12[] */ |
| offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); |
| length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); |
| ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
| |
| /* swap fromUStage3[] */ |
| offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); |
| length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); |
| ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); |
| |
| /* swap fromUStage3b[] */ |
| offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); |
| length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); |
| ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); |
| |
| /* swap indexes[] */ |
| length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); |
| ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); |
| } |
| } |
| } else { |
| udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", |
| inStaticData->conversionType); |
| *pErrorCode=U_UNSUPPORTED_ERROR; |
| return 0; |
| } |
| |
| return headerSize+(int32_t)staticDataSize+size; |
| } |
| |
| #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| |
| #endif |