Blame - libicu/cts_headers/unicode/ucnv.h - platform/external/icu

blob: 58f271cfb5adf2cb34e9c52811e17de38ba2108a [file] [log] [blame]

Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/*
				4	**********************************************************************
				5	* Copyright (C) 1999-2014, International Business Machines
				6	* Corporation and others. All Rights Reserved.
				7	**********************************************************************
				8	* ucnv.h:
				9	* External APIs for the ICU's codeset conversion library
				10	* Bertrand A. Damiba
				11	*
				12	* Modification History:
				13	*
				14	* Date Name Description
				15	* 04/04/99 helena Fixed internal header inclusion.
				16	* 05/11/00 helena Added setFallback and usesFallback APIs.
				17	* 06/29/2000 helena Major rewrite of the callback APIs.
				18	* 12/07/2000 srl Update of documentation
				19	*/
				20
				21	/**
				22	* \file
				23	* \brief C API: Character conversion
				24	*
				25	* <h2>Character Conversion C API</h2>
				26	*
				27	* <p>This API is used to convert codepage or character encoded data to and
				28	* from UTF-16. You can open a converter with {@link ucnv_open() }. With that
				29	* converter, you can get its properties, set options, convert your data and
				30	* close the converter.</p>
				31	*
				32	* <p>Since many software programs recognize different converter names for
				33	* different types of converters, there are other functions in this API to
				34	* iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
				35	* {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
				36	* more frequently used alias functions to get this information.</p>
				37	*
				38	* <p>When a converter encounters an illegal, irregular, invalid or unmappable character
				39	* its default behavior is to use a substitution character to replace the
				40	* bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
				41	* or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
				42	* many other callback actions that can be used instead of a character substitution.</p>
				43	*
				44	* <p>More information about this API can be found in our
				45	* <a href="http://icu-project.org/userguide/conversion.html">User's
				46	* Guide</a>.</p>
				47	*/
				48
				49	#ifndef UCNV_H
				50	#define UCNV_H
				51
				52	#include "unicode/ucnv_err.h"
				53	#include "unicode/uenum.h"
				54
				55	#if U_SHOW_CPLUSPLUS_API
				56	#include "unicode/localpointer.h"
				57	#endif // U_SHOW_CPLUSPLUS_API
				58
				59	#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
				60
				61	#define USET_DEFINED
				62
				63	/**
				64	* USet is the C API type corresponding to C++ class UnicodeSet.
				65	* It is forward-declared here to avoid including unicode/uset.h file if related
				66	* conversion APIs are not used.
				67	*
				68	* @see ucnv_getUnicodeSet
				69	* @stable ICU 2.4
				70	*/
				71	typedef struct USet USet;
				72
				73	#endif
				74
				75	#if !UCONFIG_NO_CONVERSION
				76
				77	U_CDECL_BEGIN
				78
				79	/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
				80	#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
				81	/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
				82	#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
				83
				84	/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
				85	#define UCNV_SI 0x0F
				86	/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
				87	#define UCNV_SO 0x0E
				88
				89	/**
				90	* Enum for specifying basic types of converters
				91	* @see ucnv_getType
				92	* @stable ICU 2.0
				93	*/
				94	typedef enum {
				95	/** @stable ICU 2.0 */
				96	UCNV_UNSUPPORTED_CONVERTER = -1,
				97	/** @stable ICU 2.0 */
				98	UCNV_SBCS = 0,
				99	/** @stable ICU 2.0 */
				100	UCNV_DBCS = 1,
				101	/** @stable ICU 2.0 */
				102	UCNV_MBCS = 2,
				103	/** @stable ICU 2.0 */
				104	UCNV_LATIN_1 = 3,
				105	/** @stable ICU 2.0 */
				106	UCNV_UTF8 = 4,
				107	/** @stable ICU 2.0 */
				108	UCNV_UTF16_BigEndian = 5,
				109	/** @stable ICU 2.0 */
				110	UCNV_UTF16_LittleEndian = 6,
				111	/** @stable ICU 2.0 */
				112	UCNV_UTF32_BigEndian = 7,
				113	/** @stable ICU 2.0 */
				114	UCNV_UTF32_LittleEndian = 8,
				115	/** @stable ICU 2.0 */
				116	UCNV_EBCDIC_STATEFUL = 9,
				117	/** @stable ICU 2.0 */
				118	UCNV_ISO_2022 = 10,
				119
				120	/** @stable ICU 2.0 */
				121	UCNV_LMBCS_1 = 11,
				122	/** @stable ICU 2.0 */
				123	UCNV_LMBCS_2,
				124	/** @stable ICU 2.0 */
				125	UCNV_LMBCS_3,
				126	/** @stable ICU 2.0 */
				127	UCNV_LMBCS_4,
				128	/** @stable ICU 2.0 */
				129	UCNV_LMBCS_5,
				130	/** @stable ICU 2.0 */
				131	UCNV_LMBCS_6,
				132	/** @stable ICU 2.0 */
				133	UCNV_LMBCS_8,
				134	/** @stable ICU 2.0 */
				135	UCNV_LMBCS_11,
				136	/** @stable ICU 2.0 */
				137	UCNV_LMBCS_16,
				138	/** @stable ICU 2.0 */
				139	UCNV_LMBCS_17,
				140	/** @stable ICU 2.0 */
				141	UCNV_LMBCS_18,
				142	/** @stable ICU 2.0 */
				143	UCNV_LMBCS_19,
				144	/** @stable ICU 2.0 */
				145	UCNV_LMBCS_LAST = UCNV_LMBCS_19,
				146	/** @stable ICU 2.0 */
				147	UCNV_HZ,
				148	/** @stable ICU 2.0 */
				149	UCNV_SCSU,
				150	/** @stable ICU 2.0 */
				151	UCNV_ISCII,
				152	/** @stable ICU 2.0 */
				153	UCNV_US_ASCII,
				154	/** @stable ICU 2.0 */
				155	UCNV_UTF7,
				156	/** @stable ICU 2.2 */
				157	UCNV_BOCU1,
				158	/** @stable ICU 2.2 */
				159	UCNV_UTF16,
				160	/** @stable ICU 2.2 */
				161	UCNV_UTF32,
				162	/** @stable ICU 2.2 */
				163	UCNV_CESU8,
				164	/** @stable ICU 2.4 */
				165	UCNV_IMAP_MAILBOX,
				166	/** @stable ICU 4.8 */
				167	UCNV_COMPOUND_TEXT,
				168
				169	/* Number of converter types for which we have conversion routines. */
				170	UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
				171	} UConverterType;
				172
				173	/**
				174	* Enum for specifying which platform a converter ID refers to.
				175	* The use of platform/CCSID is not recommended. See ucnv_openCCSID().
				176	*
				177	* @see ucnv_getPlatform
				178	* @see ucnv_openCCSID
				179	* @see ucnv_getCCSID
				180	* @stable ICU 2.0
				181	*/
				182	typedef enum {
				183	UCNV_UNKNOWN = -1,
				184	UCNV_IBM = 0
				185	} UConverterPlatform;
				186
				187	/**
				188	* Function pointer for error callback in the codepage to unicode direction.
				189	* Called when an error has occurred in conversion to unicode, or on open/close of the callback (see reason).
				190	* @param context Pointer to the callback's private data
				191	* @param args Information about the conversion in progress
				192	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
				193	* @param length Size (in bytes) of the concerned codepage sequence
				194	* @param reason Defines the reason the callback was invoked
				195	* @param pErrorCode ICU error code in/out parameter.
				196	* For converter callback functions, set to a conversion error
				197	* before the call, and the callback may reset it to U_ZERO_ERROR.
				198	* @see ucnv_setToUCallBack
				199	* @see UConverterToUnicodeArgs
				200	* @stable ICU 2.0
				201	*/
				202	typedef void (U_EXPORT2 *UConverterToUCallback) (
				203	const void* context,
				204	UConverterToUnicodeArgs *args,
				205	const char *codeUnits,
				206	int32_t length,
				207	UConverterCallbackReason reason,
				208	UErrorCode *pErrorCode);
				209
				210	/**
				211	* Function pointer for error callback in the unicode to codepage direction.
				212	* Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason).
				213	* @param context Pointer to the callback's private data
				214	* @param args Information about the conversion in progress
				215	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
				216	* @param length Size (in bytes) of the concerned codepage sequence
				217	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
				218	* @param reason Defines the reason the callback was invoked
				219	* @param pErrorCode ICU error code in/out parameter.
				220	* For converter callback functions, set to a conversion error
				221	* before the call, and the callback may reset it to U_ZERO_ERROR.
				222	* @see ucnv_setFromUCallBack
				223	* @stable ICU 2.0
				224	*/
				225	typedef void (U_EXPORT2 *UConverterFromUCallback) (
				226	const void* context,
				227	UConverterFromUnicodeArgs *args,
				228	const UChar* codeUnits,
				229	int32_t length,
				230	UChar32 codePoint,
				231	UConverterCallbackReason reason,
				232	UErrorCode *pErrorCode);
				233
				234	U_CDECL_END
				235
				236	/**
				237	* Character that separates converter names from options and options from each other.
				238	* @see ucnv_open
				239	* @stable ICU 2.0
				240	*/
				241	#define UCNV_OPTION_SEP_CHAR ','
				242
				243	/**
				244	* String version of UCNV_OPTION_SEP_CHAR.
				245	* @see ucnv_open
				246	* @stable ICU 2.0
				247	*/
				248	#define UCNV_OPTION_SEP_STRING ","
				249
				250	/**
				251	* Character that separates a converter option from its value.
				252	* @see ucnv_open
				253	* @stable ICU 2.0
				254	*/
				255	#define UCNV_VALUE_SEP_CHAR '='
				256
				257	/**
				258	* String version of UCNV_VALUE_SEP_CHAR.
				259	* @see ucnv_open
				260	* @stable ICU 2.0
				261	*/
				262	#define UCNV_VALUE_SEP_STRING "="
				263
				264	/**
				265	* Converter option for specifying a locale.
				266	* For example, ucnv_open("SCSU,locale=ja", &errorCode);
				267	* See convrtrs.txt.
				268	*
				269	* @see ucnv_open
				270	* @stable ICU 2.0
				271	*/
				272	#define UCNV_LOCALE_OPTION_STRING ",locale="
				273
				274	/**
				275	* Converter option for specifying a version selector (0..9) for some converters.
				276	* For example,
				277	* \code
				278	* ucnv_open("UTF-7,version=1", &errorCode);
				279	* \endcode
				280	* See convrtrs.txt.
				281	*
				282	* @see ucnv_open
				283	* @stable ICU 2.4
				284	*/
				285	#define UCNV_VERSION_OPTION_STRING ",version="
				286
				287	/**
				288	* Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
				289	* Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
				290	* S/390 (z/OS) Unix System Services (Open Edition).
				291	* For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
				292	* See convrtrs.txt.
				293	*
				294	* @see ucnv_open
				295	* @stable ICU 2.4
				296	*/
				297	#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
				298
				299	/**
				300	* Do a fuzzy compare of two converter/alias names.
				301	* The comparison is case-insensitive, ignores leading zeroes if they are not
				302	* followed by further digits, and ignores all but letters and digits.
				303	* Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
				304	* See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
				305	* at http://www.unicode.org/reports/tr22/
				306	*
				307	* @param name1 a converter name or alias, zero-terminated
				308	* @param name2 a converter name or alias, zero-terminated
				309	* @return 0 if the names match, or a negative value if the name1
				310	* lexically precedes name2, or a positive value if the name1
				311	* lexically follows name2.
				312	* @stable ICU 2.0
				313	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	314	U_CAPI int U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	315	ucnv_compareNames(const char name1, const char name2);
				316
				317
				318	/**
				319	* Creates a UConverter object with the name of a coded character set specified as a C string.
				320	* The actual name will be resolved with the alias file
				321	* using a case-insensitive string comparison that ignores
				322	* leading zeroes and all non-alphanumeric characters.
				323	* E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
				324	* (See also ucnv_compareNames().)
				325	* If <code>NULL</code> is passed for the converter name, it will create one with the
				326	* getDefaultName return value.
				327	*
				328	* <p>A converter name for ICU 1.5 and above may contain options
				329	* like a locale specification to control the specific behavior of
				330	* the newly instantiated converter.
				331	* The meaning of the options depends on the particular converter.
				332	* If an option is not defined for or recognized by a given converter, then it is ignored.</p>
				333	*
				334	* <p>Options are appended to the converter name string, with a
				335	* <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
				336	* also between adjacent options.</p>
				337	*
				338	* <p>If the alias is ambiguous, then the preferred converter is used
				339	* and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
				340	*
				341	* <p>The conversion behavior and names can vary between platforms. ICU may
				342	* convert some characters differently from other platforms. Details on this topic
				343	* are in the <a href="http://icu-project.org/userguide/conversion.html">User's
				344	* Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
				345	* other than its an alias starting with the letters "cp". Please do not
				346	* associate any meaning to these aliases.</p>
				347	*
				348	* \snippet samples/ucnv/convsamp.cpp ucnv_open
				349	*
				350	* @param converterName Name of the coded character set table.
				351	* This may have options appended to the string.
				352	* IANA alias character set names, IBM CCSIDs starting with "ibm-",
				353	* Windows codepage numbers starting with "windows-" are frequently
				354	* used for this parameter. See ucnv_getAvailableName and
				355	* ucnv_getAlias for a complete list that is available.
				356	* If this parameter is NULL, the default converter will be used.
				357	* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
				358	* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
				359	* @see ucnv_openU
				360	* @see ucnv_openCCSID
				361	* @see ucnv_getAvailableName
				362	* @see ucnv_getAlias
				363	* @see ucnv_getDefaultName
				364	* @see ucnv_close
				365	* @see ucnv_compareNames
				366	* @stable ICU 2.0
				367	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	368	U_CAPI UConverter* U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	369	ucnv_open(const char converterName, UErrorCode err);
				370
				371
				372	/**
				373	* Creates a Unicode converter with the names specified as unicode string.
				374	* The name should be limited to the ASCII-7 alphanumerics range.
				375	* The actual name will be resolved with the alias file
				376	* using a case-insensitive string comparison that ignores
				377	* leading zeroes and all non-alphanumeric characters.
				378	* E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
				379	* (See also ucnv_compareNames().)
				380	* If <TT>NULL</TT> is passed for the converter name, it will create
				381	* one with the ucnv_getDefaultName() return value.
				382	* If the alias is ambiguous, then the preferred converter is used
				383	* and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
				384	*
				385	* <p>See ucnv_open for the complete details</p>
				386	* @param name Name of the UConverter table in a zero terminated
				387	* Unicode string
				388	* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
				389	* U_FILE_ACCESS_ERROR</TT>
				390	* @return the created Unicode converter object, or <TT>NULL</TT> if an
				391	* error occurred
				392	* @see ucnv_open
				393	* @see ucnv_openCCSID
				394	* @see ucnv_close
				395	* @see ucnv_compareNames
				396	* @stable ICU 2.0
				397	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	398	U_CAPI UConverter* U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	399	ucnv_openU(const UChar *name,
				400	UErrorCode *err);
				401
				402	/**
				403	* Creates a UConverter object from a CCSID number and platform pair.
				404	* Note that the usefulness of this function is limited to platforms with numeric
				405	* encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
				406	* encodings.
				407	*
				408	* In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
				409	* For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
				410	* for some Unicode conversion tables there are multiple CCSIDs.
				411	* Some "alternate" Unicode conversion tables are provided by the
				412	* IBM CDRA conversion table registry.
				413	* The most prominent example of a systematic modification of conversion tables that is
				414	* not provided in the form of conversion table files in the repository is
				415	* that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
				416	* EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
				417	*
				418	* Only IBM default conversion tables are accessible with ucnv_openCCSID().
				419	* ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
				420	* with that CCSID.
				421	*
				422	* Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
				423	*
				424	* In summary, the use of CCSIDs and the associated API functions is not recommended.
				425	*
				426	* In order to open a converter with the default IBM CDRA Unicode conversion table,
				427	* you can use this function or use the prefix "ibm-":
				428	* \code
				429	* char name[20];
				430	* sprintf(name, "ibm-%hu", ccsid);
				431	* cnv=ucnv_open(name, &errorCode);
				432	* \endcode
				433	*
				434	* In order to open a converter with the IBM S/390 Unix System Services variant
				435	* of a Unicode/EBCDIC conversion table,
				436	* you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
				437	* \code
				438	* char name[20];
				439	* sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
				440	* cnv=ucnv_open(name, &errorCode);
				441	* \endcode
				442	*
				443	* In order to open a converter from a Microsoft codepage number, use the prefix "cp":
				444	* \code
				445	* char name[20];
				446	* sprintf(name, "cp%hu", codepageID);
				447	* cnv=ucnv_open(name, &errorCode);
				448	* \endcode
				449	*
				450	* If the alias is ambiguous, then the preferred converter is used
				451	* and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
				452	*
				453	* @param codepage codepage number to create
				454	* @param platform the platform in which the codepage number exists
				455	* @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
				456	* @return the created Unicode converter object, or <TT>NULL</TT> if an error
				457	* occurred.
				458	* @see ucnv_open
				459	* @see ucnv_openU
				460	* @see ucnv_close
				461	* @see ucnv_getCCSID
				462	* @see ucnv_getPlatform
				463	* @see UConverterPlatform
				464	* @stable ICU 2.0
				465	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	466	U_CAPI UConverter* U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	467	ucnv_openCCSID(int32_t codepage,
				468	UConverterPlatform platform,
				469	UErrorCode * err);
				470
				471	/**
				472	* <p>Creates a UConverter object specified from a packageName and a converterName.</p>
				473	*
				474	* <p>The packageName and converterName must point to an ICU udata object, as defined by
				475	* <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
				476	* Typically, packageName will refer to a (.dat) file, or to a package registered with
				477	* udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
				478	*
				479	* <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
				480	* stored in the converter cache or the alias table. The only way to open further converters
				481	* is call this function multiple times, or use the ucnv_safeClone() function to clone a
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	482	* 'primary' converter.</p>
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	483	*
				484	* <p>A future version of ICU may add alias table lookups and/or caching
				485	* to this function.</p>
				486	*
				487	* <p>Example Use:
				488	* <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
				489	* </p>
				490	*
				491	* @param packageName name of the package (equivalent to 'path' in udata_open() call)
				492	* @param converterName name of the data item to be used, without suffix.
				493	* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
				494	* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
				495	* @see udata_open
				496	* @see ucnv_open
				497	* @see ucnv_safeClone
				498	* @see ucnv_close
				499	* @stable ICU 2.2
				500	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	501	U_CAPI UConverter* U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	502	ucnv_openPackage(const char packageName, const char converterName, UErrorCode *err);
				503
				504	/**
				505	* Thread safe converter cloning operation.
				506	* For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
				507	* with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
				508	* If the buffer size is sufficient, then the clone will use the stack buffer;
				509	* otherwise, it will be allocated, and *pBufferSize will indicate
				510	* the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
				511	*
				512	* You must ucnv_close() the clone in any case.
				513	*
				514	* If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
				515	* then *pBufferSize will be changed to a sufficient size
				516	* for cloning this converter,
				517	* without actually cloning the converter ("pure pre-flighting").
				518	*
				519	* If *pBufferSize is greater than zero but not large enough for a stack-based
				520	* clone, then the converter is cloned using newly allocated memory
				521	* and *pBufferSize is changed to the necessary size.
				522	*
				523	* If the converter clone fits into the stack buffer but the stack buffer is not
				524	* sufficiently aligned for the clone, then the clone will use an
				525	* adjusted pointer and use an accordingly smaller buffer size.
				526	*
				527	* @param cnv converter to be cloned
				528	* @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
				529	* user allocated space for the new clone. If NULL new memory will be allocated.
				530	* If buffer is not large enough, new memory will be allocated.
				531	* Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
				532	* @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
				533	* pointer to size of allocated space.
				534	* @param status to indicate whether the operation went on smoothly or there were errors
				535	* An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
				536	* is used if any allocations were necessary.
				537	* However, it is better to check if *pBufferSize grew for checking for
				538	* allocations because warning codes can be overridden by subsequent
				539	* function calls.
				540	* @return pointer to the new clone
				541	* @stable ICU 2.0
				542	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	543	U_CAPI UConverter * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	544	ucnv_safeClone(const UConverter *cnv,
				545	void *stackBuffer,
				546	int32_t *pBufferSize,
				547	UErrorCode *status);
				548
				549	#ifndef U_HIDE_DEPRECATED_API
				550
				551	/**
				552	* \def U_CNV_SAFECLONE_BUFFERSIZE
				553	* Definition of a buffer size that is designed to be large enough for
				554	* converters to be cloned with ucnv_safeClone().
				555	* @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer.
				556	*/
				557	#define U_CNV_SAFECLONE_BUFFERSIZE 1024
				558
				559	#endif /* U_HIDE_DEPRECATED_API */
				560
				561	/**
				562	* Deletes the unicode converter and releases resources associated
				563	* with just this instance.
				564	* Does not free up shared converter tables.
				565	*
				566	* @param converter the converter object to be deleted
				567	* @see ucnv_open
				568	* @see ucnv_openU
				569	* @see ucnv_openCCSID
				570	* @stable ICU 2.0
				571	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	572	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	573	ucnv_close(UConverter * converter);
				574
				575	#if U_SHOW_CPLUSPLUS_API
				576
				577	U_NAMESPACE_BEGIN
				578
				579	/**
				580	* \class LocalUConverterPointer
				581	* "Smart pointer" class, closes a UConverter via ucnv_close().
				582	* For most methods see the LocalPointerBase base class.
				583	*
				584	* @see LocalPointerBase
				585	* @see LocalPointer
				586	* @stable ICU 4.4
				587	*/
				588	U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close);
				589
				590	U_NAMESPACE_END
				591
				592	#endif
				593
				594	/**
				595	* Fills in the output parameter, subChars, with the substitution characters
				596	* as multiple bytes.
				597	* If ucnv_setSubstString() set a Unicode string because the converter is
				598	* stateful, then subChars will be an empty string.
				599	*
				600	* @param converter the Unicode converter
				601	* @param subChars the substitution characters
				602	* @param len on input the capacity of subChars, on output the number
				603	* of bytes copied to it
				604	* @param err the outgoing error status code.
				605	* If the substitution character array is too small, an
				606	* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
				607	* @see ucnv_setSubstString
				608	* @see ucnv_setSubstChars
				609	* @stable ICU 2.0
				610	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	611	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	612	ucnv_getSubstChars(const UConverter *converter,
				613	char *subChars,
				614	int8_t *len,
				615	UErrorCode *err);
				616
				617	/**
				618	* Sets the substitution chars when converting from unicode to a codepage. The
				619	* substitution is specified as a string of 1-4 bytes, and may contain
				620	* <TT>NULL</TT> bytes.
				621	* The subChars must represent a single character. The caller needs to know the
				622	* byte sequence of a valid character in the converter's charset.
				623	* For some converters, for example some ISO 2022 variants, only single-byte
				624	* substitution characters may be supported.
				625	* The newer ucnv_setSubstString() function relaxes these limitations.
				626	*
				627	* @param converter the Unicode converter
				628	* @param subChars the substitution character byte sequence we want set
				629	* @param len the number of bytes in subChars
				630	* @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
				631	* len is bigger than the maximum number of bytes allowed in subchars
				632	* @see ucnv_setSubstString
				633	* @see ucnv_getSubstChars
				634	* @stable ICU 2.0
				635	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	636	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	637	ucnv_setSubstChars(UConverter *converter,
				638	const char *subChars,
				639	int8_t len,
				640	UErrorCode *err);
				641
				642	/**
				643	* Set a substitution string for converting from Unicode to a charset.
				644	* The caller need not know the charset byte sequence for each charset.
				645	*
				646	* Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
				647	* for a single character, this function takes a Unicode string with
				648	* zero, one or more characters, and immediately verifies that the string can be
				649	* converted to the charset.
				650	* If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
				651	* then the function returns with an error accordingly.
				652	*
				653	* Also unlike ucnv_setSubstChars(), this function works for stateful charsets
				654	* by converting on the fly at the point of substitution rather than setting
				655	* a fixed byte sequence.
				656	*
				657	* @param cnv The UConverter object.
				658	* @param s The Unicode string.
				659	* @param length The number of UChars in s, or -1 for a NUL-terminated string.
				660	* @param err Pointer to a standard ICU error code. Its input value must
				661	* pass the U_SUCCESS() test, or else the function returns
				662	* immediately. Check for U_FAILURE() on output or use with
				663	* function chaining. (See User Guide for details.)
				664	*
				665	* @see ucnv_setSubstChars
				666	* @see ucnv_getSubstChars
				667	* @stable ICU 3.6
				668	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	669	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	670	ucnv_setSubstString(UConverter *cnv,
				671	const UChar *s,
				672	int32_t length,
				673	UErrorCode *err);
				674
				675	/**
				676	* Fills in the output parameter, errBytes, with the error characters from the
				677	* last failing conversion.
				678	*
				679	* @param converter the Unicode converter
				680	* @param errBytes the codepage bytes which were in error
				681	* @param len on input the capacity of errBytes, on output the number of
				682	* bytes which were copied to it
				683	* @param err the error status code.
				684	* If the substitution character array is too small, an
				685	* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
				686	* @stable ICU 2.0
				687	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	688	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	689	ucnv_getInvalidChars(const UConverter *converter,
				690	char *errBytes,
				691	int8_t *len,
				692	UErrorCode *err);
				693
				694	/**
				695	* Fills in the output parameter, errChars, with the error characters from the
				696	* last failing conversion.
				697	*
				698	* @param converter the Unicode converter
				699	* @param errUChars the UChars which were in error
				700	* @param len on input the capacity of errUChars, on output the number of
				701	* UChars which were copied to it
				702	* @param err the error status code.
				703	* If the substitution character array is too small, an
				704	* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
				705	* @stable ICU 2.0
				706	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	707	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	708	ucnv_getInvalidUChars(const UConverter *converter,
				709	UChar *errUChars,
				710	int8_t *len,
				711	UErrorCode *err);
				712
				713	/**
				714	* Resets the state of a converter to the default state. This is used
				715	* in the case of an error, to restart a conversion from a known default state.
				716	* It will also empty the internal output buffers.
				717	* @param converter the Unicode converter
				718	* @stable ICU 2.0
				719	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	720	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	721	ucnv_reset(UConverter *converter);
				722
				723	/**
				724	* Resets the to-Unicode part of a converter state to the default state.
				725	* This is used in the case of an error to restart a conversion to
				726	* Unicode to a known default state. It will also empty the internal
				727	* output buffers used for the conversion to Unicode codepoints.
				728	* @param converter the Unicode converter
				729	* @stable ICU 2.0
				730	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	731	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	732	ucnv_resetToUnicode(UConverter *converter);
				733
				734	/**
				735	* Resets the from-Unicode part of a converter state to the default state.
				736	* This is used in the case of an error to restart a conversion from
				737	* Unicode to a known default state. It will also empty the internal output
				738	* buffers used for the conversion from Unicode codepoints.
				739	* @param converter the Unicode converter
				740	* @stable ICU 2.0
				741	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	742	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	743	ucnv_resetFromUnicode(UConverter *converter);
				744
				745	/**
				746	* Returns the maximum number of bytes that are output per UChar in conversion
				747	* from Unicode using this converter.
				748	* The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
				749	* to calculate the size of a target buffer for conversion from Unicode.
				750	*
				751	* Note: Before ICU 2.8, this function did not return reliable numbers for
				752	* some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
				753	*
				754	* This number may not be the same as the maximum number of bytes per
				755	* "conversion unit". In other words, it may not be the intuitively expected
				756	* number of bytes per character that would be published for a charset,
				757	* and may not fulfill any other purpose than the allocation of an output
				758	* buffer of guaranteed sufficient size for a given input length and converter.
				759	*
				760	* Examples for special cases that are taken into account:
				761	* - Supplementary code points may convert to more bytes than BMP code points.
				762	* This function returns bytes per UChar (UTF-16 code unit), not per
				763	* Unicode code point, for efficient buffer allocation.
				764	* - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
				765	* - When m input UChars are converted to n output bytes, then the maximum m/n
				766	* is taken into account.
				767	*
				768	* The number returned here does not take into account
				769	* (see UCNV_GET_MAX_BYTES_FOR_STRING):
				770	* - callbacks which output more than one charset character sequence per call,
				771	* like escape callbacks
				772	* - initial and final non-character bytes that are output by some converters
				773	* (automatic BOMs, initial escape sequence, final SI, etc.)
				774	*
				775	* Examples for returned values:
				776	* - SBCS charsets: 1
				777	* - Shift-JIS: 2
				778	* - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
				779	* - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
				780	* - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
				781	* - ISO-2022: 3 (always outputs UTF-8)
				782	* - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
				783	* - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
				784	*
				785	* @param converter The Unicode converter.
				786	* @return The maximum number of bytes per UChar (16 bit code unit)
				787	* that are output by ucnv_fromUnicode(),
				788	* to be used together with UCNV_GET_MAX_BYTES_FOR_STRING
				789	* for buffer allocation.
				790	*
				791	* @see UCNV_GET_MAX_BYTES_FOR_STRING
				792	* @see ucnv_getMinCharSize
				793	* @stable ICU 2.0
				794	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	795	U_CAPI int8_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	796	ucnv_getMaxCharSize(const UConverter *converter);
				797
				798	/**
				799	* Calculates the size of a buffer for conversion from Unicode to a charset.
				800	* The calculated size is guaranteed to be sufficient for this conversion.
				801	*
				802	* It takes into account initial and final non-character bytes that are output
				803	* by some converters.
				804	* It does not take into account callbacks which output more than one charset
				805	* character sequence per call, like escape callbacks.
				806	* The default (substitution) callback only outputs one charset character sequence.
				807	*
				808	* @param length Number of UChars to be converted.
				809	* @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
				810	* that will be used.
				811	* @return Size of a buffer that will be large enough to hold the output bytes of
				812	* converting length UChars with the converter that returned the maxCharSize.
				813	*
				814	* @see ucnv_getMaxCharSize
				815	* @stable ICU 2.8
				816	*/
				817	#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
				818	(((int32_t)(length)+10)*(int32_t)(maxCharSize))
				819
				820	/**
				821	* Returns the minimum byte length (per codepoint) for characters in this codepage.
				822	* This is usually either 1 or 2.
				823	* @param converter the Unicode converter
				824	* @return the minimum number of bytes per codepoint allowed by this particular converter
				825	* @see ucnv_getMaxCharSize
				826	* @stable ICU 2.0
				827	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	828	U_CAPI int8_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	829	ucnv_getMinCharSize(const UConverter *converter);
				830
				831	/**
				832	* Returns the display name of the converter passed in based on the Locale
				833	* passed in. If the locale contains no display name, the internal ASCII
				834	* name will be filled in.
				835	*
				836	* @param converter the Unicode converter.
				837	* @param displayLocale is the specific Locale we want to localized for
				838	* @param displayName user provided buffer to be filled in
				839	* @param displayNameCapacity size of displayName Buffer
				840	* @param err error status code
				841	* @return displayNameLength number of UChar needed in displayName
				842	* @see ucnv_getName
				843	* @stable ICU 2.0
				844	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	845	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	846	ucnv_getDisplayName(const UConverter *converter,
				847	const char *displayLocale,
				848	UChar *displayName,
				849	int32_t displayNameCapacity,
				850	UErrorCode *err);
				851
				852	/**
				853	* Gets the internal, canonical name of the converter (zero-terminated).
				854	* The lifetime of the returned string will be that of the converter
				855	* passed to this function.
				856	* @param converter the Unicode converter
				857	* @param err UErrorCode status
				858	* @return the internal name of the converter
				859	* @see ucnv_getDisplayName
				860	* @stable ICU 2.0
				861	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	862	U_CAPI const char * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	863	ucnv_getName(const UConverter converter, UErrorCode err);
				864
				865	/**
				866	* Gets a codepage number associated with the converter. This is not guaranteed
				867	* to be the one used to create the converter. Some converters do not represent
				868	* platform registered codepages and return zero for the codepage number.
				869	* The error code fill-in parameter indicates if the codepage number
				870	* is available.
				871	* Does not check if the converter is <TT>NULL</TT> or if converter's data
				872	* table is <TT>NULL</TT>.
				873	*
				874	* Important: The use of CCSIDs is not recommended because it is limited
				875	* to only two platforms in principle and only one (UCNV_IBM) in the current
				876	* ICU converter API.
				877	* Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
				878	* For more details see ucnv_openCCSID().
				879	*
				880	* @param converter the Unicode converter
				881	* @param err the error status code.
				882	* @return If any error occurs, -1 will be returned otherwise, the codepage number
				883	* will be returned
				884	* @see ucnv_openCCSID
				885	* @see ucnv_getPlatform
				886	* @stable ICU 2.0
				887	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	888	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	889	ucnv_getCCSID(const UConverter *converter,
				890	UErrorCode *err);
				891
				892	/**
				893	* Gets a codepage platform associated with the converter. Currently,
				894	* only <TT>UCNV_IBM</TT> will be returned.
				895	* Does not test if the converter is <TT>NULL</TT> or if converter's data
				896	* table is <TT>NULL</TT>.
				897	* @param converter the Unicode converter
				898	* @param err the error status code.
				899	* @return The codepage platform
				900	* @stable ICU 2.0
				901	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	902	U_CAPI UConverterPlatform U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	903	ucnv_getPlatform(const UConverter *converter,
				904	UErrorCode *err);
				905
				906	/**
				907	* Gets the type of the converter
				908	* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
				909	* EBCDIC_STATEFUL, LATIN_1
				910	* @param converter a valid, opened converter
				911	* @return the type of the converter
				912	* @stable ICU 2.0
				913	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	914	U_CAPI UConverterType U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	915	ucnv_getType(const UConverter * converter);
				916
				917	/**
				918	* Gets the "starter" (lead) bytes for converters of type MBCS.
				919	* Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
				920	* is not MBCS. Fills in an array of type UBool, with the value of the byte
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	921	* as offset to the array. For example, if (starters[0x20] == true) at return,
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	922	* it means that the byte 0x20 is a starter byte in this converter.
				923	* Context pointers are always owned by the caller.
				924	*
				925	* @param converter a valid, opened converter of type MBCS
				926	* @param starters an array of size 256 to be filled in
				927	* @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the
				928	* converter is not a type which can return starters.
				929	* @see ucnv_getType
				930	* @stable ICU 2.0
				931	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	932	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	933	ucnv_getStarters(const UConverter* converter,
				934	UBool starters[256],
				935	UErrorCode* err);
				936
				937
				938	/**
				939	* Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
				940	* @see ucnv_getUnicodeSet
				941	* @stable ICU 2.6
				942	*/
				943	typedef enum UConverterUnicodeSet {
				944	/** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
				945	UCNV_ROUNDTRIP_SET,
				946	/** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */
				947	UCNV_ROUNDTRIP_AND_FALLBACK_SET,
				948	#ifndef U_HIDE_DEPRECATED_API
				949	/**
				950	* Number of UConverterUnicodeSet selectors.
				951	* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
				952	*/
				953	UCNV_SET_COUNT
				954	#endif // U_HIDE_DEPRECATED_API
				955	} UConverterUnicodeSet;
				956
				957
				958	/**
				959	* Returns the set of Unicode code points that can be converted by an ICU converter.
				960	*
				961	* Returns one of several kinds of set:
				962	*
				963	* 1. UCNV_ROUNDTRIP_SET
				964	*
				965	* The set of all Unicode code points that can be roundtrip-converted
				966	* (converted without any data loss) with the converter (ucnv_fromUnicode()).
				967	* This set will not include code points that have fallback mappings
				968	* or are only the result of reverse fallback mappings.
				969	* This set will also not include PUA code points with fallbacks, although
				970	* ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
				971	* See UTR #22 "Character Mapping Markup Language"
				972	* at http://www.unicode.org/reports/tr22/
				973	*
				974	* This is useful for example for
				975	* - checking that a string or document can be roundtrip-converted with a converter,
				976	* without/before actually performing the conversion
				977	* - testing if a converter can be used for text for typical text for a certain locale,
				978	* by comparing its roundtrip set with the set of ExemplarCharacters from
				979	* ICU's locale data or other sources
				980	*
				981	* 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
				982	*
				983	* The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
				984	* when fallbacks are turned on (see ucnv_setFallback()).
				985	* This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
				986	*
				987	* In the future, there may be more UConverterUnicodeSet choices to select
				988	* sets with different properties.
				989	*
				990	* @param cnv The converter for which a set is requested.
				991	* @param setFillIn A valid USet *. It will be cleared by this function before
				992	* the converter's specific set is filled into the USet.
				993	* @param whichSet A UConverterUnicodeSet selector;
				994	* currently UCNV_ROUNDTRIP_SET is the only supported value.
				995	* @param pErrorCode ICU error code in/out parameter.
				996	* Must fulfill U_SUCCESS before the function call.
				997	*
				998	* @see UConverterUnicodeSet
				999	* @see uset_open
				1000	* @see uset_close
				1001	* @stable ICU 2.6
				1002	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1003	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1004	ucnv_getUnicodeSet(const UConverter *cnv,
				1005	USet *setFillIn,
				1006	UConverterUnicodeSet whichSet,
				1007	UErrorCode *pErrorCode);
				1008
				1009	/**
				1010	* Gets the current calback function used by the converter when an illegal
				1011	* or invalid codepage sequence is found.
				1012	* Context pointers are always owned by the caller.
				1013	*
				1014	* @param converter the unicode converter
				1015	* @param action fillin: returns the callback function pointer
				1016	* @param context fillin: returns the callback's private void* context
				1017	* @see ucnv_setToUCallBack
				1018	* @stable ICU 2.0
				1019	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1020	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1021	ucnv_getToUCallBack (const UConverter * converter,
				1022	UConverterToUCallback *action,
				1023	const void **context);
				1024
				1025	/**
				1026	* Gets the current callback function used by the converter when illegal
				1027	* or invalid Unicode sequence is found.
				1028	* Context pointers are always owned by the caller.
				1029	*
				1030	* @param converter the unicode converter
				1031	* @param action fillin: returns the callback function pointer
				1032	* @param context fillin: returns the callback's private void* context
				1033	* @see ucnv_setFromUCallBack
				1034	* @stable ICU 2.0
				1035	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1036	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1037	ucnv_getFromUCallBack (const UConverter * converter,
				1038	UConverterFromUCallback *action,
				1039	const void **context);
				1040
				1041	/**
				1042	* Changes the callback function used by the converter when
				1043	* an illegal or invalid sequence is found.
				1044	* Context pointers are always owned by the caller.
				1045	* Predefined actions and contexts can be found in the ucnv_err.h header.
				1046	*
				1047	* @param converter the unicode converter
				1048	* @param newAction the new callback function
				1049	* @param newContext the new toUnicode callback context pointer. This can be NULL.
				1050	* @param oldAction fillin: returns the old callback function pointer. This can be NULL.
				1051	* @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
				1052	* @param err The error code status
				1053	* @see ucnv_getToUCallBack
				1054	* @stable ICU 2.0
				1055	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1056	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1057	ucnv_setToUCallBack (UConverter * converter,
				1058	UConverterToUCallback newAction,
				1059	const void* newContext,
				1060	UConverterToUCallback *oldAction,
				1061	const void** oldContext,
				1062	UErrorCode * err);
				1063
				1064	/**
				1065	* Changes the current callback function used by the converter when
				1066	* an illegal or invalid sequence is found.
				1067	* Context pointers are always owned by the caller.
				1068	* Predefined actions and contexts can be found in the ucnv_err.h header.
				1069	*
				1070	* @param converter the unicode converter
				1071	* @param newAction the new callback function
				1072	* @param newContext the new fromUnicode callback context pointer. This can be NULL.
				1073	* @param oldAction fillin: returns the old callback function pointer. This can be NULL.
				1074	* @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
				1075	* @param err The error code status
				1076	* @see ucnv_getFromUCallBack
				1077	* @stable ICU 2.0
				1078	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1079	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1080	ucnv_setFromUCallBack (UConverter * converter,
				1081	UConverterFromUCallback newAction,
				1082	const void *newContext,
				1083	UConverterFromUCallback *oldAction,
				1084	const void **oldContext,
				1085	UErrorCode * err);
				1086
				1087	/**
				1088	* Converts an array of unicode characters to an array of codepage
				1089	* characters. This function is optimized for converting a continuous
				1090	* stream of data in buffer-sized chunks, where the entire source and
				1091	* target does not fit in available buffers.
				1092	*
				1093	* The source pointer is an in/out parameter. It starts out pointing where the
				1094	* conversion is to begin, and ends up pointing after the last UChar consumed.
				1095	*
				1096	* Target similarly starts out pointer at the first available byte in the output
				1097	* buffer, and ends up pointing after the last byte written to the output.
				1098	*
				1099	* The converter always attempts to consume the entire source buffer, unless
				1100	* (1.) the target buffer is full, or (2.) a failing error is returned from the
				1101	* current callback function. When a successful error status has been
				1102	* returned, it means that all of the source buffer has been
				1103	* consumed. At that point, the caller should reset the source and
				1104	* sourceLimit pointers to point to the next chunk.
				1105	*
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1106	* At the end of the stream (flush==true), the input is completely consumed
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1107	* when *source==sourceLimit and no error code is set.
				1108	* The converter object is then automatically reset by this function.
				1109	* (This means that a converter need not be reset explicitly between data
				1110	* streams if it finishes the previous stream without errors.)
				1111	*
				1112	* This is a <I>stateful</I> conversion. Additionally, even when all source data has
				1113	* been consumed, some data may be in the converters' internal state.
				1114	* Call this function repeatedly, updating the target pointers with
				1115	* the next empty chunk of target in case of a
				1116	* <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
				1117	* with the next chunk of source when a successful error status is
				1118	* returned, until there are no more chunks of source data.
				1119	* @param converter the Unicode converter
				1120	* @param target I/O parameter. Input : Points to the beginning of the buffer to copy
				1121	* codepage characters to. Output : points to after the last codepage character copied
				1122	* to <TT>target</TT>.
				1123	* @param targetLimit the pointer just after last of the <TT>target</TT> buffer
				1124	* @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
				1125	* @param sourceLimit the pointer just after the last of the source buffer
				1126	* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
				1127	* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
				1128	* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
				1129	* For output data carried across calls, and other data without a specific source character
				1130	* (such as from escape sequences or callbacks) -1 will be placed for offsets.
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1131	* @param flush set to <TT>true</TT> if the current source buffer is the last available
				1132	* chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
				1133	* this function may have to be called multiple times with flush set to <TT>true</TT> until
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1134	* the source buffer is consumed.
				1135	* @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
				1136	* converter is <TT>NULL</TT>.
				1137	* <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
				1138	* still data to be written to the target.
				1139	* @see ucnv_fromUChars
				1140	* @see ucnv_convert
				1141	* @see ucnv_getMinCharSize
				1142	* @see ucnv_setToUCallBack
				1143	* @stable ICU 2.0
				1144	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1145	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1146	ucnv_fromUnicode (UConverter * converter,
				1147	char **target,
				1148	const char *targetLimit,
				1149	const UChar ** source,
				1150	const UChar * sourceLimit,
				1151	int32_t* offsets,
				1152	UBool flush,
				1153	UErrorCode * err);
				1154
				1155	/**
				1156	* Converts a buffer of codepage bytes into an array of unicode UChars
				1157	* characters. This function is optimized for converting a continuous
				1158	* stream of data in buffer-sized chunks, where the entire source and
				1159	* target does not fit in available buffers.
				1160	*
				1161	* The source pointer is an in/out parameter. It starts out pointing where the
				1162	* conversion is to begin, and ends up pointing after the last byte of source consumed.
				1163	*
				1164	* Target similarly starts out pointer at the first available UChar in the output
				1165	* buffer, and ends up pointing after the last UChar written to the output.
				1166	* It does NOT necessarily keep UChar sequences together.
				1167	*
				1168	* The converter always attempts to consume the entire source buffer, unless
				1169	* (1.) the target buffer is full, or (2.) a failing error is returned from the
				1170	* current callback function. When a successful error status has been
				1171	* returned, it means that all of the source buffer has been
				1172	* consumed. At that point, the caller should reset the source and
				1173	* sourceLimit pointers to point to the next chunk.
				1174	*
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1175	* At the end of the stream (flush==true), the input is completely consumed
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1176	* when *source==sourceLimit and no error code is set
				1177	* The converter object is then automatically reset by this function.
				1178	* (This means that a converter need not be reset explicitly between data
				1179	* streams if it finishes the previous stream without errors.)
				1180	*
				1181	* This is a <I>stateful</I> conversion. Additionally, even when all source data has
				1182	* been consumed, some data may be in the converters' internal state.
				1183	* Call this function repeatedly, updating the target pointers with
				1184	* the next empty chunk of target in case of a
				1185	* <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
				1186	* with the next chunk of source when a successful error status is
				1187	* returned, until there are no more chunks of source data.
				1188	* @param converter the Unicode converter
				1189	* @param target I/O parameter. Input : Points to the beginning of the buffer to copy
				1190	* UChars into. Output : points to after the last UChar copied.
				1191	* @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
				1192	* @param source I/O parameter, pointer to pointer to the source codepage buffer.
				1193	* @param sourceLimit the pointer to the byte after the end of the source buffer
				1194	* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
				1195	* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
				1196	* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
				1197	* For output data carried across calls, and other data without a specific source character
				1198	* (such as from escape sequences or callbacks) -1 will be placed for offsets.
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1199	* @param flush set to <TT>true</TT> if the current source buffer is the last available
				1200	* chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
				1201	* this function may have to be called multiple times with flush set to <TT>true</TT> until
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1202	* the source buffer is consumed.
				1203	* @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
				1204	* converter is <TT>NULL</TT>.
				1205	* <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
				1206	* still data to be written to the target.
				1207	* @see ucnv_fromUChars
				1208	* @see ucnv_convert
				1209	* @see ucnv_getMinCharSize
				1210	* @see ucnv_setFromUCallBack
				1211	* @see ucnv_getNextUChar
				1212	* @stable ICU 2.0
				1213	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1214	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1215	ucnv_toUnicode(UConverter *converter,
				1216	UChar **target,
				1217	const UChar *targetLimit,
				1218	const char **source,
				1219	const char *sourceLimit,
				1220	int32_t *offsets,
				1221	UBool flush,
				1222	UErrorCode *err);
				1223
				1224	/**
				1225	* Convert the Unicode string into a codepage string using an existing UConverter.
				1226	* The output string is NUL-terminated if possible.
				1227	*
				1228	* This function is a more convenient but less powerful version of ucnv_fromUnicode().
				1229	* It is only useful for whole strings, not for streaming conversion.
				1230	*
				1231	* The maximum output buffer capacity required (barring output from callbacks) will be
				1232	* UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
				1233	*
				1234	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
				1235	* @param src the input Unicode string
				1236	* @param srcLength the input string length, or -1 if NUL-terminated
				1237	* @param dest destination string buffer, can be NULL if destCapacity==0
				1238	* @param destCapacity the number of chars available at dest
				1239	* @param pErrorCode normal ICU error code;
				1240	* common error codes that may be set by this function include
				1241	* U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
				1242	* U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
				1243	* @return the length of the output string, not counting the terminating NUL;
				1244	* if the length is greater than destCapacity, then the string will not fit
				1245	* and a buffer of the indicated length would need to be passed in
				1246	* @see ucnv_fromUnicode
				1247	* @see ucnv_convert
				1248	* @see UCNV_GET_MAX_BYTES_FOR_STRING
				1249	* @stable ICU 2.0
				1250	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1251	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1252	ucnv_fromUChars(UConverter *cnv,
				1253	char *dest, int32_t destCapacity,
				1254	const UChar *src, int32_t srcLength,
				1255	UErrorCode *pErrorCode);
				1256
				1257	/**
				1258	* Convert the codepage string into a Unicode string using an existing UConverter.
				1259	* The output string is NUL-terminated if possible.
				1260	*
				1261	* This function is a more convenient but less powerful version of ucnv_toUnicode().
				1262	* It is only useful for whole strings, not for streaming conversion.
				1263	*
				1264	* The maximum output buffer capacity required (barring output from callbacks) will be
				1265	* 2*srcLength (each char may be converted into a surrogate pair).
				1266	*
				1267	* @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
				1268	* @param src the input codepage string
				1269	* @param srcLength the input string length, or -1 if NUL-terminated
				1270	* @param dest destination string buffer, can be NULL if destCapacity==0
				1271	* @param destCapacity the number of UChars available at dest
				1272	* @param pErrorCode normal ICU error code;
				1273	* common error codes that may be set by this function include
				1274	* U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
				1275	* U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
				1276	* @return the length of the output string, not counting the terminating NUL;
				1277	* if the length is greater than destCapacity, then the string will not fit
				1278	* and a buffer of the indicated length would need to be passed in
				1279	* @see ucnv_toUnicode
				1280	* @see ucnv_convert
				1281	* @stable ICU 2.0
				1282	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1283	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1284	ucnv_toUChars(UConverter *cnv,
				1285	UChar *dest, int32_t destCapacity,
				1286	const char *src, int32_t srcLength,
				1287	UErrorCode *pErrorCode);
				1288
				1289	/**
				1290	* Convert a codepage buffer into Unicode one character at a time.
				1291	* The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
				1292	*
				1293	* Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
				1294	* - Faster for small amounts of data, for most converters, e.g.,
				1295	* US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
				1296	* (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
				1297	* it uses ucnv_toUnicode() internally.)
				1298	* - Convenient.
				1299	*
				1300	* Limitations compared to ucnv_toUnicode():
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1301	* - Always assumes flush=true.
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1302	* This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
				1303	* that is, for where the input is supplied in multiple buffers,
				1304	* because ucnv_getNextUChar() will assume the end of the input at the end
				1305	* of the first buffer.
				1306	* - Does not provide offset output.
				1307	*
				1308	* It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
				1309	* ucnv_getNextUChar() uses the current state of the converter
				1310	* (unlike ucnv_toUChars() which always resets first).
				1311	* However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1312	* stopped in the middle of a character sequence (with flush=false),
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1313	* then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
				1314	* internally until the next character boundary.
				1315	* (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
				1316	* start at a character boundary.)
				1317	*
				1318	* Instead of using ucnv_getNextUChar(), it is recommended
				1319	* to convert using ucnv_toUnicode() or ucnv_toUChars()
				1320	* and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
				1321	* or a C++ CharacterIterator or similar.
				1322	* This allows streaming conversion and offset output, for example.
				1323	*
				1324	* <p>Handling of surrogate pairs and supplementary-plane code points:<br>
				1325	* There are two different kinds of codepages that provide mappings for surrogate characters:
				1326	* <ul>
				1327	* <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
				1328	* code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
				1329	* Each valid sequence will result in exactly one returned code point.
				1330	* If a sequence results in a single surrogate, then that will be returned
				1331	* by itself, even if a neighboring sequence encodes the matching surrogate.</li>
				1332	* <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
				1333	* including surrogates. Code points in supplementary planes are represented with
				1334	* two sequences, each encoding a surrogate.
				1335	* For these codepages, matching pairs of surrogates will be combined into single
				1336	* code points for returning from this function.
				1337	* (Note that SCSU is actually a mix of these codepage types.)</li>
				1338	* </ul></p>
				1339	*
				1340	* @param converter an open UConverter
				1341	* @param source the address of a pointer to the codepage buffer, will be
				1342	* updated to point after the bytes consumed in the conversion call.
				1343	* @param sourceLimit points to the end of the input buffer
				1344	* @param err fills in error status (see ucnv_toUnicode)
				1345	* <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input
				1346	* is empty or does not convert to any output (e.g.: pure state-change
				1347	* codes SI/SO, escape sequences for ISO 2022,
				1348	* or if the callback did not output anything, ...).
				1349	* This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
				1350	* the "buffer" is the return code. However, there might be subsequent output
				1351	* stored in the converter object
				1352	* that will be returned in following calls to this function.
				1353	* @return a UChar32 resulting from the partial conversion of source
				1354	* @see ucnv_toUnicode
				1355	* @see ucnv_toUChars
				1356	* @see ucnv_convert
				1357	* @stable ICU 2.0
				1358	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1359	U_CAPI UChar32 U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1360	ucnv_getNextUChar(UConverter * converter,
				1361	const char **source,
				1362	const char * sourceLimit,
				1363	UErrorCode * err);
				1364
				1365	/**
				1366	* Convert from one external charset to another using two existing UConverters.
				1367	* Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
				1368	* are used, "pivoting" through 16-bit Unicode.
				1369	*
				1370	* Important: For streaming conversion (multiple function calls for successive
				1371	* parts of a text stream), the caller must provide a pivot buffer explicitly,
				1372	* and must preserve the pivot buffer and associated pointers from one
				1373	* call to another. (The buffer may be moved if its contents and the relative
				1374	* pointer positions are preserved.)
				1375	*
				1376	* There is a similar function, ucnv_convert(),
				1377	* which has the following limitations:
				1378	* - it takes charset names, not converter objects, so that
				1379	* - two converters are opened for each call
				1380	* - only single-string conversion is possible, not streaming operation
				1381	* - it does not provide enough information to find out,
				1382	* in case of failure, whether the toUnicode or
				1383	* the fromUnicode conversion failed
				1384	*
				1385	* By contrast, ucnv_convertEx()
				1386	* - takes UConverter parameters instead of charset names
				1387	* - fully exposes the pivot buffer for streaming conversion and complete error handling
				1388	*
				1389	* ucnv_convertEx() also provides further convenience:
				1390	* - an option to reset the converters at the beginning
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1391	* (if reset==true, see parameters;
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1392	* also sets pivotTarget=pivotSource=pivotStart)
				1393	* - allow NUL-terminated input
				1394	* (only a single NUL byte, will not work for charsets with multi-byte NULs)
				1395	* (if sourceLimit==NULL, see parameters)
				1396	* - terminate with a NUL on output
				1397	* (only a single NUL byte, not useful for charsets with multi-byte NULs),
				1398	* or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
				1399	* the target buffer
				1400	* - the pivot buffer can be provided internally;
				1401	* possible only for whole-string conversion, not streaming conversion;
				1402	* in this case, the caller will not be able to get details about where an
				1403	* error occurred
				1404	* (if pivotStart==NULL, see below)
				1405	*
				1406	* The function returns when one of the following is true:
				1407	* - the entire source text has been converted successfully to the target buffer
				1408	* - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
				1409	* - a conversion error occurred
				1410	* (other U_FAILURE(), see description of pErrorCode)
				1411	*
				1412	* Limitation compared to the direct use of
				1413	* ucnv_fromUnicode() and ucnv_toUnicode():
				1414	* ucnv_convertEx() does not provide offset information.
				1415	*
				1416	* Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
				1417	* ucnv_convertEx() does not support preflighting directly.
				1418	*
				1419	* Sample code for converting a single string from
				1420	* one external charset to UTF-8, ignoring the location of errors:
				1421	*
				1422	* \code
				1423	* int32_t
				1424	* myToUTF8(UConverter *cnv,
				1425	* const char *s, int32_t length,
				1426	* char *u8, int32_t capacity,
				1427	* UErrorCode *pErrorCode) {
				1428	* UConverter *utf8Cnv;
				1429	* char *target;
				1430	*
				1431	* if(U_FAILURE(*pErrorCode)) {
				1432	* return 0;
				1433	* }
				1434	*
				1435	* utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
				1436	* if(U_FAILURE(*pErrorCode)) {
				1437	* return 0;
				1438	* }
				1439	*
				1440	* if(length<0) {
				1441	* length=strlen(s);
				1442	* }
				1443	* target=u8;
				1444	* ucnv_convertEx(utf8Cnv, cnv,
				1445	* &target, u8+capacity,
				1446	* &s, s+length,
				1447	* NULL, NULL, NULL, NULL,
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1448	* true, true,
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1449	* pErrorCode);
				1450	*
				1451	* myReleaseCachedUTF8Converter(utf8Cnv);
				1452	*
				1453	* // return the output string length, but without preflighting
				1454	* return (int32_t)(target-u8);
				1455	* }
				1456	* \endcode
				1457	*
				1458	* @param targetCnv Output converter, used to convert from the UTF-16 pivot
				1459	* to the target using ucnv_fromUnicode().
				1460	* @param sourceCnv Input converter, used to convert from the source to
				1461	* the UTF-16 pivot using ucnv_toUnicode().
				1462	* @param target I/O parameter, same as for ucnv_fromUChars().
				1463	* Input: *target points to the beginning of the target buffer.
				1464	* Output: *target points to the first unit after the last char written.
				1465	* @param targetLimit Pointer to the first unit after the target buffer.
				1466	* @param source I/O parameter, same as for ucnv_toUChars().
				1467	* Input: *source points to the beginning of the source buffer.
				1468	* Output: *source points to the first unit after the last char read.
				1469	* @param sourceLimit Pointer to the first unit after the source buffer.
				1470	* @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
				1471	* then an internal buffer is used and the other pivot
				1472	* arguments are ignored and can be NULL as well.
				1473	* @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
				1474	* conversion from the pivot buffer to the target buffer.
				1475	* @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
				1476	* conversion from the source buffer to the pivot buffer.
				1477	* It must be pivotStart<=pivotSource<=pivotTarget<=pivotLimit
				1478	* and pivotStart<pivotLimit (unless pivotStart==NULL).
				1479	* @param pivotLimit Pointer to the first unit after the pivot buffer.
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1480	* @param reset If true, then ucnv_resetToUnicode(sourceCnv) and
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1481	* ucnv_resetFromUnicode(targetCnv) are called, and the
				1482	* pivot pointers are reset (pivotTarget=pivotSource=pivotStart).
				1483	* @param flush If true, indicates the end of the input.
				1484	* Passed directly to ucnv_toUnicode(), and carried over to
				1485	* ucnv_fromUnicode() when the source is empty as well.
				1486	* @param pErrorCode ICU error code in/out parameter.
				1487	* Must fulfill U_SUCCESS before the function call.
				1488	* U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
				1489	* because overflows into the pivot buffer are handled internally.
				1490	* Other conversion errors are from the source-to-pivot
				1491	* conversion if *pivotSource==pivotStart, otherwise from
				1492	* the pivot-to-target conversion.
				1493	*
				1494	* @see ucnv_convert
				1495	* @see ucnv_fromAlgorithmic
				1496	* @see ucnv_toAlgorithmic
				1497	* @see ucnv_fromUnicode
				1498	* @see ucnv_toUnicode
				1499	* @see ucnv_fromUChars
				1500	* @see ucnv_toUChars
				1501	* @stable ICU 2.6
				1502	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1503	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1504	ucnv_convertEx(UConverter targetCnv, UConverter sourceCnv,
				1505	char *target, const char targetLimit,
				1506	const char *source, const char sourceLimit,
				1507	UChar pivotStart, UChar *pivotSource,
				1508	UChar *pivotTarget, const UChar pivotLimit,
				1509	UBool reset, UBool flush,
				1510	UErrorCode *pErrorCode);
				1511
				1512	/**
				1513	* Convert from one external charset to another.
				1514	* Internally, two converters are opened according to the name arguments,
				1515	* then the text is converted to and from the 16-bit Unicode "pivot"
				1516	* using ucnv_convertEx(), then the converters are closed again.
				1517	*
				1518	* This is a convenience function, not an efficient way to convert a lot of text:
				1519	* ucnv_convert()
				1520	* - takes charset names, not converter objects, so that
				1521	* - two converters are opened for each call
				1522	* - only single-string conversion is possible, not streaming operation
				1523	* - does not provide enough information to find out,
				1524	* in case of failure, whether the toUnicode or
				1525	* the fromUnicode conversion failed
				1526	* - allows NUL-terminated input
				1527	* (only a single NUL byte, will not work for charsets with multi-byte NULs)
				1528	* (if sourceLength==-1, see parameters)
				1529	* - terminate with a NUL on output
				1530	* (only a single NUL byte, not useful for charsets with multi-byte NULs),
				1531	* or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
				1532	* the target buffer
				1533	* - a pivot buffer is provided internally
				1534	*
				1535	* The function returns when one of the following is true:
				1536	* - the entire source text has been converted successfully to the target buffer
				1537	* and either the target buffer is terminated with a single NUL byte
				1538	* or the error code is set to U_STRING_NOT_TERMINATED_WARNING
				1539	* - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
				1540	* and the full output string length is returned ("preflighting")
				1541	* - a conversion error occurred
				1542	* (other U_FAILURE(), see description of pErrorCode)
				1543	*
				1544	* @param toConverterName The name of the converter that is used to convert
				1545	* from the UTF-16 pivot buffer to the target.
				1546	* @param fromConverterName The name of the converter that is used to convert
				1547	* from the source to the UTF-16 pivot buffer.
				1548	* @param target Pointer to the output buffer.
				1549	* @param targetCapacity Capacity of the target, in bytes.
				1550	* @param source Pointer to the input buffer.
				1551	* @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input.
				1552	* @param pErrorCode ICU error code in/out parameter.
				1553	* Must fulfill U_SUCCESS before the function call.
				1554	* @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
				1555	* and a U_BUFFER_OVERFLOW_ERROR is set.
				1556	*
				1557	* @see ucnv_convertEx
				1558	* @see ucnv_fromAlgorithmic
				1559	* @see ucnv_toAlgorithmic
				1560	* @see ucnv_fromUnicode
				1561	* @see ucnv_toUnicode
				1562	* @see ucnv_fromUChars
				1563	* @see ucnv_toUChars
				1564	* @see ucnv_getNextUChar
				1565	* @stable ICU 2.0
				1566	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1567	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1568	ucnv_convert(const char *toConverterName,
				1569	const char *fromConverterName,
				1570	char *target,
				1571	int32_t targetCapacity,
				1572	const char *source,
				1573	int32_t sourceLength,
				1574	UErrorCode *pErrorCode);
				1575
				1576	/**
				1577	* Convert from one external charset to another.
				1578	* Internally, the text is converted to and from the 16-bit Unicode "pivot"
				1579	* using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
				1580	* except that the two converters need not be looked up and opened completely.
				1581	*
				1582	* The source-to-pivot conversion uses the cnv converter parameter.
				1583	* The pivot-to-target conversion uses a purely algorithmic converter
				1584	* according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
				1585	*
				1586	* Internally, the algorithmic converter is opened and closed for each
				1587	* function call, which is more efficient than using the public ucnv_open()
				1588	* but somewhat less efficient than only resetting an existing converter
				1589	* and using ucnv_convertEx().
				1590	*
				1591	* This function is more convenient than ucnv_convertEx() for single-string
				1592	* conversions, especially when "preflighting" is desired (returning the length
				1593	* of the complete output even if it does not fit into the target buffer;
				1594	* see the User Guide Strings chapter). See ucnv_convert() for details.
				1595	*
				1596	* @param algorithmicType UConverterType constant identifying the desired target
				1597	* charset as a purely algorithmic converter.
				1598	* Those are converters for Unicode charsets like
				1599	* UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
				1600	* as well as US-ASCII and ISO-8859-1.
				1601	* @param cnv The converter that is used to convert
				1602	* from the source to the UTF-16 pivot buffer.
				1603	* @param target Pointer to the output buffer.
				1604	* @param targetCapacity Capacity of the target, in bytes.
				1605	* @param source Pointer to the input buffer.
				1606	* @param sourceLength Length of the input text, in bytes
				1607	* @param pErrorCode ICU error code in/out parameter.
				1608	* Must fulfill U_SUCCESS before the function call.
				1609	* @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
				1610	* and a U_BUFFER_OVERFLOW_ERROR is set.
				1611	*
				1612	* @see ucnv_fromAlgorithmic
				1613	* @see ucnv_convert
				1614	* @see ucnv_convertEx
				1615	* @see ucnv_fromUnicode
				1616	* @see ucnv_toUnicode
				1617	* @see ucnv_fromUChars
				1618	* @see ucnv_toUChars
				1619	* @stable ICU 2.6
				1620	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1621	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1622	ucnv_toAlgorithmic(UConverterType algorithmicType,
				1623	UConverter *cnv,
				1624	char *target, int32_t targetCapacity,
				1625	const char *source, int32_t sourceLength,
				1626	UErrorCode *pErrorCode);
				1627
				1628	/**
				1629	* Convert from one external charset to another.
				1630	* Internally, the text is converted to and from the 16-bit Unicode "pivot"
				1631	* using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
				1632	* except that the two converters need not be looked up and opened completely.
				1633	*
				1634	* The source-to-pivot conversion uses a purely algorithmic converter
				1635	* according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
				1636	* The pivot-to-target conversion uses the cnv converter parameter.
				1637	*
				1638	* Internally, the algorithmic converter is opened and closed for each
				1639	* function call, which is more efficient than using the public ucnv_open()
				1640	* but somewhat less efficient than only resetting an existing converter
				1641	* and using ucnv_convertEx().
				1642	*
				1643	* This function is more convenient than ucnv_convertEx() for single-string
				1644	* conversions, especially when "preflighting" is desired (returning the length
				1645	* of the complete output even if it does not fit into the target buffer;
				1646	* see the User Guide Strings chapter). See ucnv_convert() for details.
				1647	*
				1648	* @param cnv The converter that is used to convert
				1649	* from the UTF-16 pivot buffer to the target.
				1650	* @param algorithmicType UConverterType constant identifying the desired source
				1651	* charset as a purely algorithmic converter.
				1652	* Those are converters for Unicode charsets like
				1653	* UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
				1654	* as well as US-ASCII and ISO-8859-1.
				1655	* @param target Pointer to the output buffer.
				1656	* @param targetCapacity Capacity of the target, in bytes.
				1657	* @param source Pointer to the input buffer.
				1658	* @param sourceLength Length of the input text, in bytes
				1659	* @param pErrorCode ICU error code in/out parameter.
				1660	* Must fulfill U_SUCCESS before the function call.
				1661	* @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
				1662	* and a U_BUFFER_OVERFLOW_ERROR is set.
				1663	*
				1664	* @see ucnv_fromAlgorithmic
				1665	* @see ucnv_convert
				1666	* @see ucnv_convertEx
				1667	* @see ucnv_fromUnicode
				1668	* @see ucnv_toUnicode
				1669	* @see ucnv_fromUChars
				1670	* @see ucnv_toUChars
				1671	* @stable ICU 2.6
				1672	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1673	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1674	ucnv_fromAlgorithmic(UConverter *cnv,
				1675	UConverterType algorithmicType,
				1676	char *target, int32_t targetCapacity,
				1677	const char *source, int32_t sourceLength,
				1678	UErrorCode *pErrorCode);
				1679
				1680	/**
				1681	* Frees up memory occupied by unused, cached converter shared data.
				1682	*
				1683	* @return the number of cached converters successfully deleted
				1684	* @see ucnv_close
				1685	* @stable ICU 2.0
				1686	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1687	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1688	ucnv_flushCache(void);
				1689
				1690	/**
				1691	* Returns the number of available converters, as per the alias file.
				1692	*
				1693	* @return the number of available converters
				1694	* @see ucnv_getAvailableName
				1695	* @stable ICU 2.0
				1696	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1697	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1698	ucnv_countAvailable(void);
				1699
				1700	/**
				1701	* Gets the canonical converter name of the specified converter from a list of
				1702	* all available converters contaied in the alias file. All converters
				1703	* in this list can be opened.
				1704	*
				1705	* @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
				1706	* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
				1707	* @see ucnv_countAvailable
				1708	* @stable ICU 2.0
				1709	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1710	U_CAPI const char* U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1711	ucnv_getAvailableName(int32_t n);
				1712
				1713	/**
				1714	* Returns a UEnumeration to enumerate all of the canonical converter
				1715	* names, as per the alias file, regardless of the ability to open each
				1716	* converter.
				1717	*
				1718	* @return A UEnumeration object for getting all the recognized canonical
				1719	* converter names.
				1720	* @see ucnv_getAvailableName
				1721	* @see uenum_close
				1722	* @see uenum_next
				1723	* @stable ICU 2.4
				1724	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1725	U_CAPI UEnumeration * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1726	ucnv_openAllNames(UErrorCode *pErrorCode);
				1727
				1728	/**
				1729	* Gives the number of aliases for a given converter or alias name.
				1730	* If the alias is ambiguous, then the preferred converter is used
				1731	* and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
				1732	* This method only enumerates the listed entries in the alias file.
				1733	* @param alias alias name
				1734	* @param pErrorCode error status
				1735	* @return number of names on alias list for given alias
				1736	* @stable ICU 2.0
				1737	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1738	U_CAPI uint16_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1739	ucnv_countAliases(const char alias, UErrorCode pErrorCode);
				1740
				1741	/**
				1742	* Gives the name of the alias at given index of alias list.
				1743	* This method only enumerates the listed entries in the alias file.
				1744	* If the alias is ambiguous, then the preferred converter is used
				1745	* and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
				1746	* @param alias alias name
				1747	* @param n index in alias list
				1748	* @param pErrorCode result of operation
				1749	* @return returns the name of the alias at given index
				1750	* @see ucnv_countAliases
				1751	* @stable ICU 2.0
				1752	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1753	U_CAPI const char * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1754	ucnv_getAlias(const char alias, uint16_t n, UErrorCode pErrorCode);
				1755
				1756	/**
				1757	* Fill-up the list of alias names for the given alias.
				1758	* This method only enumerates the listed entries in the alias file.
				1759	* If the alias is ambiguous, then the preferred converter is used
				1760	* and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
				1761	* @param alias alias name
				1762	* @param aliases fill-in list, aliases is a pointer to an array of
				1763	* <code>ucnv_countAliases()</code> string-pointers
				1764	* (<code>const char *</code>) that will be filled in.
				1765	* The strings themselves are owned by the library.
				1766	* @param pErrorCode result of operation
				1767	* @stable ICU 2.0
				1768	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1769	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1770	ucnv_getAliases(const char alias, const char aliases, UErrorCode pErrorCode);
				1771
				1772	/**
				1773	* Return a new UEnumeration object for enumerating all the
				1774	* alias names for a given converter that are recognized by a standard.
				1775	* This method only enumerates the listed entries in the alias file.
				1776	* The convrtrs.txt file can be modified to change the results of
				1777	* this function.
				1778	* The first result in this list is the same result given by
				1779	* <code>ucnv_getStandardName</code>, which is the default alias for
				1780	* the specified standard name. The returned object must be closed with
				1781	* <code>uenum_close</code> when you are done with the object.
				1782	*
				1783	* @param convName original converter name
				1784	* @param standard name of the standard governing the names; MIME and IANA
				1785	* are such standards
				1786	* @param pErrorCode The error code
				1787	* @return A UEnumeration object for getting all aliases that are recognized
				1788	* by a standard. If any of the parameters are invalid, NULL
				1789	* is returned.
				1790	* @see ucnv_getStandardName
				1791	* @see uenum_close
				1792	* @see uenum_next
				1793	* @stable ICU 2.2
				1794	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1795	U_CAPI UEnumeration * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1796	ucnv_openStandardNames(const char *convName,
				1797	const char *standard,
				1798	UErrorCode *pErrorCode);
				1799
				1800	/**
				1801	* Gives the number of standards associated to converter names.
				1802	* @return number of standards
				1803	* @stable ICU 2.0
				1804	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1805	U_CAPI uint16_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1806	ucnv_countStandards(void);
				1807
				1808	/**
				1809	* Gives the name of the standard at given index of standard list.
				1810	* @param n index in standard list
				1811	* @param pErrorCode result of operation
				1812	* @return returns the name of the standard at given index. Owned by the library.
				1813	* @stable ICU 2.0
				1814	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1815	U_CAPI const char * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1816	ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
				1817
				1818	/**
				1819	* Returns a standard name for a given converter name.
				1820	* <p>
				1821	* Example alias table:<br>
				1822	* conv alias1 { STANDARD1 } alias2 { STANDARD1* }
				1823	* <p>
				1824	* Result of ucnv_getStandardName("conv", "STANDARD1") from example
				1825	* alias table:<br>
				1826	* <b>"alias2"</b>
				1827	*
				1828	* @param name original converter name
				1829	* @param standard name of the standard governing the names; MIME and IANA
				1830	* are such standards
				1831	* @param pErrorCode result of operation
				1832	* @return returns the standard converter name;
				1833	* if a standard converter name cannot be determined,
				1834	* then <code>NULL</code> is returned. Owned by the library.
				1835	* @stable ICU 2.0
				1836	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1837	U_CAPI const char * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1838	ucnv_getStandardName(const char name, const char standard, UErrorCode *pErrorCode);
				1839
				1840	/**
				1841	* This function will return the internal canonical converter name of the
				1842	* tagged alias. This is the opposite of ucnv_openStandardNames, which
				1843	* returns the tagged alias given the canonical name.
				1844	* <p>
				1845	* Example alias table:<br>
				1846	* conv alias1 { STANDARD1 } alias2 { STANDARD1* }
				1847	* <p>
				1848	* Result of ucnv_getStandardName("alias1", "STANDARD1") from example
				1849	* alias table:<br>
				1850	* <b>"conv"</b>
				1851	*
				1852	* @return returns the canonical converter name;
				1853	* if a standard or alias name cannot be determined,
				1854	* then <code>NULL</code> is returned. The returned string is
				1855	* owned by the library.
				1856	* @see ucnv_getStandardName
				1857	* @stable ICU 2.4
				1858	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1859	U_CAPI const char * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1860	ucnv_getCanonicalName(const char alias, const char standard, UErrorCode *pErrorCode);
				1861
				1862	/**
				1863	* Returns the current default converter name. If you want to open
				1864	* a default converter, you do not need to use this function.
				1865	* It is faster if you pass a NULL argument to ucnv_open the
				1866	* default converter.
				1867	*
				1868	* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
				1869	* always returns "UTF-8".
				1870	*
				1871	* @return returns the current default converter name.
				1872	* Storage owned by the library
				1873	* @see ucnv_setDefaultName
				1874	* @stable ICU 2.0
				1875	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1876	U_CAPI const char * U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1877	ucnv_getDefaultName(void);
				1878
				1879	#ifndef U_HIDE_SYSTEM_API
				1880	/**
				1881	* This function is not thread safe. DO NOT call this function when ANY ICU
				1882	* function is being used from more than one thread! This function sets the
				1883	* current default converter name. If this function needs to be called, it
				1884	* should be called during application initialization. Most of the time, the
				1885	* results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
				1886	* is sufficient for your application.
				1887	*
				1888	* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
				1889	* does nothing.
				1890	*
				1891	* @param name the converter name to be the default (must be known by ICU).
				1892	* @see ucnv_getDefaultName
				1893	* @system
				1894	* @stable ICU 2.0
				1895	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1896	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1897	ucnv_setDefaultName(const char *name);
				1898	#endif /* U_HIDE_SYSTEM_API */
				1899
				1900	/**
				1901	* Fixes the backslash character mismapping. For example, in SJIS, the backslash
				1902	* character in the ASCII portion is also used to represent the yen currency sign.
				1903	* When mapping from Unicode character 0x005C, it's unclear whether to map the
				1904	* character back to yen or backslash in SJIS. This function will take the input
				1905	* buffer and replace all the yen sign characters with backslash. This is necessary
				1906	* when the user tries to open a file with the input buffer on Windows.
				1907	* This function will test the converter to see whether such mapping is
				1908	* required. You can sometimes avoid using this function by using the correct version
				1909	* of Shift-JIS.
				1910	*
				1911	* @param cnv The converter representing the target codepage.
				1912	* @param source the input buffer to be fixed
				1913	* @param sourceLen the length of the input buffer
				1914	* @see ucnv_isAmbiguous
				1915	* @stable ICU 2.0
				1916	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1917	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1918	ucnv_fixFileSeparator(const UConverter cnv, UChar source, int32_t sourceLen);
				1919
				1920	/**
				1921	* Determines if the converter contains ambiguous mappings of the same
				1922	* character or not.
				1923	* @param cnv the converter to be tested
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1924	* @return true if the converter contains ambiguous mapping of the same
				1925	* character, false otherwise.
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1926	* @stable ICU 2.0
				1927	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1928	U_CAPI UBool U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1929	ucnv_isAmbiguous(const UConverter *cnv);
				1930
				1931	/**
				1932	* Sets the converter to use fallback mappings or not.
				1933	* Regardless of this flag, the converter will always use
				1934	* fallbacks from Unicode Private Use code points, as well as
				1935	* reverse fallbacks (to Unicode).
				1936	* For details see ".ucm File Format"
				1937	* in the Conversion Data chapter of the ICU User Guide:
				1938	* http://www.icu-project.org/userguide/conversion-data.html#ucmformat
				1939	*
				1940	* @param cnv The converter to set the fallback mapping usage on.
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1941	* @param usesFallback true if the user wants the converter to take advantage of the fallback
				1942	* mapping, false otherwise.
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1943	* @stable ICU 2.0
				1944	* @see ucnv_usesFallback
				1945	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1946	U_CAPI void U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1947	ucnv_setFallback(UConverter *cnv, UBool usesFallback);
				1948
				1949	/**
				1950	* Determines if the converter uses fallback mappings or not.
				1951	* This flag has restrictions, see ucnv_setFallback().
				1952	*
				1953	* @param cnv The converter to be tested
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1954	* @return true if the converter uses fallback, false otherwise.
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1955	* @stable ICU 2.0
				1956	* @see ucnv_setFallback
				1957	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1958	U_CAPI UBool U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1959	ucnv_usesFallback(const UConverter *cnv);
				1960
				1961	/**
				1962	* Detects Unicode signature byte sequences at the start of the byte stream
				1963	* and returns the charset name of the indicated Unicode charset.
				1964	* NULL is returned when no Unicode signature is recognized.
				1965	* The number of bytes in the signature is output as well.
				1966	*
				1967	* The caller can ucnv_open() a converter using the charset name.
				1968	* The first code unit (UChar) from the start of the stream will be U+FEFF
				1969	* (the Unicode BOM/signature character) and can usually be ignored.
				1970	*
				1971	* For most Unicode charsets it is also possible to ignore the indicated
				1972	* number of initial stream bytes and start converting after them.
				1973	* However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
				1974	* this will not work. Therefore, it is best to ignore the first output UChar
				1975	* instead of the input signature bytes.
				1976	* <p>
				1977	* Usage:
				1978	* \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
				1979	*
				1980	* @param source The source string in which the signature should be detected.
				1981	* @param sourceLength Length of the input string, or -1 if terminated with a NUL byte.
				1982	* @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature
				1983	* of the detected UTF. 0 if not detected.
				1984	* Can be a NULL pointer.
				1985	* @param pErrorCode ICU error code in/out parameter.
				1986	* Must fulfill U_SUCCESS before the function call.
				1987	* @return The name of the encoding detected. NULL if encoding is not detected.
				1988	* @stable ICU 2.4
				1989	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	1990	U_CAPI const char* U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1991	ucnv_detectUnicodeSignature(const char* source,
				1992	int32_t sourceLength,
				1993	int32_t *signatureLength,
				1994	UErrorCode *pErrorCode);
				1995
				1996	/**
				1997	* Returns the number of UChars held in the converter's internal state
				1998	* because more input is needed for completing the conversion. This function is
				1999	* useful for mapping semantics of ICU's converter interface to those of iconv,
				2000	* and this information is not needed for normal conversion.
				2001	* @param cnv The converter in which the input is held
				2002	* @param status ICU error code in/out parameter.
				2003	* Must fulfill U_SUCCESS before the function call.
				2004	* @return The number of UChars in the state. -1 if an error is encountered.
				2005	* @stable ICU 3.4
				2006	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	2007	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	2008	ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
				2009
				2010	/**
				2011	* Returns the number of chars held in the converter's internal state
				2012	* because more input is needed for completing the conversion. This function is
				2013	* useful for mapping semantics of ICU's converter interface to those of iconv,
				2014	* and this information is not needed for normal conversion.
				2015	* @param cnv The converter in which the input is held as internal state
				2016	* @param status ICU error code in/out parameter.
				2017	* Must fulfill U_SUCCESS before the function call.
				2018	* @return The number of chars in the state. -1 if an error is encountered.
				2019	* @stable ICU 3.4
				2020	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	2021	U_CAPI int32_t U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	2022	ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
				2023
				2024	/**
				2025	* Returns whether or not the charset of the converter has a fixed number of bytes
				2026	* per charset character.
				2027	* An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
				2028	* Another example is UTF-32 which is always 4 bytes per character.
				2029	* A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit
				2030	* but a UTF-32 converter encodes each code point with 4 bytes.
				2031	* Note: This method is not intended to be used to determine whether the charset has a
				2032	* fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form.
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	2033	* false is returned with the UErrorCode if error occurs or cnv is NULL.
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	2034	* @param cnv The converter to be tested
				2035	* @param status ICU error code in/out paramter
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	2036	* @return true if the converter is fixed-width
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	2037	* @stable ICU 4.8
				2038	*/
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	2039	U_CAPI UBool U_EXPORT2
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	2040	ucnv_isFixedWidth(UConverter cnv, UErrorCode status);
				2041
				2042	#endif
				2043
				2044	#endif
				2045	/_UCNV/