Blame - libicu/cts_headers/unicode/normalizer2.h - platform/external/icu

blob: 4aeb3bb3d8292107c8cb6098e29ad8119c6d86da [file] [log] [blame]

Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/*
				4	*******************************************************************************
				5	*
				6	* Copyright (C) 2009-2013, International Business Machines
				7	* Corporation and others. All Rights Reserved.
				8	*
				9	*******************************************************************************
				10	* file name: normalizer2.h
				11	* encoding: UTF-8
				12	* tab size: 8 (not used)
				13	* indentation:4
				14	*
				15	* created on: 2009nov22
				16	* created by: Markus W. Scherer
				17	*/
				18
				19	#ifndef __NORMALIZER2_H__
				20	#define __NORMALIZER2_H__
				21
				22	/**
				23	* \file
				24	* \brief C++ API: New API for Unicode Normalization.
				25	*/
				26
				27	#include "unicode/utypes.h"
				28
				29	#if U_SHOW_CPLUSPLUS_API
				30
				31	#if !UCONFIG_NO_NORMALIZATION
				32
				33	#include "unicode/stringpiece.h"
				34	#include "unicode/uniset.h"
				35	#include "unicode/unistr.h"
				36	#include "unicode/unorm2.h"
				37
				38	U_NAMESPACE_BEGIN
				39
				40	class ByteSink;
				41
				42	/**
				43	* Unicode normalization functionality for standard Unicode normalization or
				44	* for using custom mapping tables.
				45	* All instances of this class are unmodifiable/immutable.
				46	* Instances returned by getInstance() are singletons that must not be deleted by the caller.
				47	* The Normalizer2 class is not intended for public subclassing.
				48	*
				49	* The primary functions are to produce a normalized string and to detect whether
				50	* a string is already normalized.
				51	* The most commonly used normalization forms are those defined in
				52	* http://www.unicode.org/unicode/reports/tr15/
				53	* However, this API supports additional normalization forms for specialized purposes.
				54	* For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
				55	* and can be used in implementations of UTS #46.
				56	*
				57	* Not only are the standard compose and decompose modes supplied,
				58	* but additional modes are provided as documented in the Mode enum.
				59	*
				60	* Some of the functions in this class identify normalization boundaries.
				61	* At a normalization boundary, the portions of the string
				62	* before it and starting from it do not interact and can be handled independently.
				63	*
				64	* The spanQuickCheckYes() stops at a normalization boundary.
				65	* When the goal is a normalized string, then the text before the boundary
				66	* can be copied, and the remainder can be processed with normalizeSecondAndAppend().
				67	*
				68	* The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
				69	* a character is guaranteed to be at a normalization boundary,
				70	* regardless of context.
				71	* This is used for moving from one normalization boundary to the next
				72	* or preceding boundary, and for performing iterative normalization.
				73	*
				74	* Iterative normalization is useful when only a small portion of a
				75	* longer string needs to be processed.
				76	* For example, in ICU, iterative normalization is used by the NormalizationTransliterator
				77	* (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
				78	* (to process only the substring for which sort key bytes are computed).
				79	*
				80	* The set of normalization boundaries returned by these functions may not be
				81	* complete: There may be more boundaries that could be returned.
				82	* Different functions may return different boundaries.
				83	* @stable ICU 4.4
				84	*/
				85	class U_COMMON_API Normalizer2 : public UObject {
				86	public:
				87	/**
				88	* Destructor.
				89	* @stable ICU 4.4
				90	*/
				91	~Normalizer2();
				92
				93	/**
				94	* Returns a Normalizer2 instance for Unicode NFC normalization.
				95	* Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
				96	* Returns an unmodifiable singleton instance. Do not delete it.
				97	* @param errorCode Standard ICU error code. Its input value must
				98	* pass the U_SUCCESS() test, or else the function returns
				99	* immediately. Check for U_FAILURE() on output or use with
				100	* function chaining. (See User Guide for details.)
				101	* @return the requested Normalizer2, if successful
				102	* @stable ICU 49
				103	*/
				104	static const Normalizer2 *
				105	getNFCInstance(UErrorCode &errorCode);
				106
				107	/**
				108	* Returns a Normalizer2 instance for Unicode NFD normalization.
				109	* Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
				110	* Returns an unmodifiable singleton instance. Do not delete it.
				111	* @param errorCode Standard ICU error code. Its input value must
				112	* pass the U_SUCCESS() test, or else the function returns
				113	* immediately. Check for U_FAILURE() on output or use with
				114	* function chaining. (See User Guide for details.)
				115	* @return the requested Normalizer2, if successful
				116	* @stable ICU 49
				117	*/
				118	static const Normalizer2 *
				119	getNFDInstance(UErrorCode &errorCode);
				120
				121	/**
				122	* Returns a Normalizer2 instance for Unicode NFKC normalization.
				123	* Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
				124	* Returns an unmodifiable singleton instance. Do not delete it.
				125	* @param errorCode Standard ICU error code. Its input value must
				126	* pass the U_SUCCESS() test, or else the function returns
				127	* immediately. Check for U_FAILURE() on output or use with
				128	* function chaining. (See User Guide for details.)
				129	* @return the requested Normalizer2, if successful
				130	* @stable ICU 49
				131	*/
				132	static const Normalizer2 *
				133	getNFKCInstance(UErrorCode &errorCode);
				134
				135	/**
				136	* Returns a Normalizer2 instance for Unicode NFKD normalization.
				137	* Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
				138	* Returns an unmodifiable singleton instance. Do not delete it.
				139	* @param errorCode Standard ICU error code. Its input value must
				140	* pass the U_SUCCESS() test, or else the function returns
				141	* immediately. Check for U_FAILURE() on output or use with
				142	* function chaining. (See User Guide for details.)
				143	* @return the requested Normalizer2, if successful
				144	* @stable ICU 49
				145	*/
				146	static const Normalizer2 *
				147	getNFKDInstance(UErrorCode &errorCode);
				148
				149	/**
				150	* Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
				151	* Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
				152	* Returns an unmodifiable singleton instance. Do not delete it.
				153	* @param errorCode Standard ICU error code. Its input value must
				154	* pass the U_SUCCESS() test, or else the function returns
				155	* immediately. Check for U_FAILURE() on output or use with
				156	* function chaining. (See User Guide for details.)
				157	* @return the requested Normalizer2, if successful
				158	* @stable ICU 49
				159	*/
				160	static const Normalizer2 *
				161	getNFKCCasefoldInstance(UErrorCode &errorCode);
				162
				163	/**
				164	* Returns a Normalizer2 instance which uses the specified data file
				165	* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
				166	* and which composes or decomposes text according to the specified mode.
				167	* Returns an unmodifiable singleton instance. Do not delete it.
				168	*
				169	* Use packageName=NULL for data files that are part of ICU's own data.
				170	* Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
				171	* Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
				172	* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
				173	*
				174	* @param packageName NULL for ICU built-in data, otherwise application data package name
				175	* @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
				176	* @param mode normalization mode (compose or decompose etc.)
				177	* @param errorCode Standard ICU error code. Its input value must
				178	* pass the U_SUCCESS() test, or else the function returns
				179	* immediately. Check for U_FAILURE() on output or use with
				180	* function chaining. (See User Guide for details.)
				181	* @return the requested Normalizer2, if successful
				182	* @stable ICU 4.4
				183	*/
				184	static const Normalizer2 *
				185	getInstance(const char *packageName,
				186	const char *name,
				187	UNormalization2Mode mode,
				188	UErrorCode &errorCode);
				189
				190	/**
				191	* Returns the normalized form of the source string.
				192	* @param src source string
				193	* @param errorCode Standard ICU error code. Its input value must
				194	* pass the U_SUCCESS() test, or else the function returns
				195	* immediately. Check for U_FAILURE() on output or use with
				196	* function chaining. (See User Guide for details.)
				197	* @return normalized src
				198	* @stable ICU 4.4
				199	*/
				200	UnicodeString
				201	normalize(const UnicodeString &src, UErrorCode &errorCode) const {
				202	UnicodeString result;
				203	normalize(src, result, errorCode);
				204	return result;
				205	}
				206	/**
				207	* Writes the normalized form of the source string to the destination string
				208	* (replacing its contents) and returns the destination string.
				209	* The source and destination strings must be different objects.
				210	* @param src source string
				211	* @param dest destination string; its contents is replaced with normalized src
				212	* @param errorCode Standard ICU error code. Its input value must
				213	* pass the U_SUCCESS() test, or else the function returns
				214	* immediately. Check for U_FAILURE() on output or use with
				215	* function chaining. (See User Guide for details.)
				216	* @return dest
				217	* @stable ICU 4.4
				218	*/
				219	virtual UnicodeString &
				220	normalize(const UnicodeString &src,
				221	UnicodeString &dest,
				222	UErrorCode &errorCode) const = 0;
				223
				224	/**
				225	* Normalizes a UTF-8 string and optionally records how source substrings
				226	* relate to changed and unchanged result substrings.
				227	*
				228	* Currently implemented completely only for "compose" modes,
				229	* such as for NFC, NFKC, and NFKC_Casefold
				230	* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
				231	* Otherwise currently converts to & from UTF-16 and does not support edits.
				232	*
				233	* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
				234	* @param src Source UTF-8 string.
				235	* @param sink A ByteSink to which the normalized UTF-8 result string is written.
				236	* sink.Flush() is called at the end.
				237	* @param edits Records edits for index mapping, working with styled text,
				238	* and getting only changes (if any).
				239	* The Edits contents is undefined if any error occurs.
				240	* This function calls edits->reset() first unless
				241	* options includes U_EDITS_NO_RESET. edits can be nullptr.
				242	* @param errorCode Standard ICU error code. Its input value must
				243	* pass the U_SUCCESS() test, or else the function returns
				244	* immediately. Check for U_FAILURE() on output or use with
				245	* function chaining. (See User Guide for details.)
				246	* @stable ICU 60
				247	*/
				248	virtual void
				249	normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
				250	Edits *edits, UErrorCode &errorCode) const;
				251
				252	/**
				253	* Appends the normalized form of the second string to the first string
				254	* (merging them at the boundary) and returns the first string.
				255	* The result is normalized if the first string was normalized.
				256	* The first and second strings must be different objects.
				257	* @param first string, should be normalized
				258	* @param second string, will be normalized
				259	* @param errorCode Standard ICU error code. Its input value must
				260	* pass the U_SUCCESS() test, or else the function returns
				261	* immediately. Check for U_FAILURE() on output or use with
				262	* function chaining. (See User Guide for details.)
				263	* @return first
				264	* @stable ICU 4.4
				265	*/
				266	virtual UnicodeString &
				267	normalizeSecondAndAppend(UnicodeString &first,
				268	const UnicodeString &second,
				269	UErrorCode &errorCode) const = 0;
				270	/**
				271	* Appends the second string to the first string
				272	* (merging them at the boundary) and returns the first string.
				273	* The result is normalized if both the strings were normalized.
				274	* The first and second strings must be different objects.
				275	* @param first string, should be normalized
				276	* @param second string, should be normalized
				277	* @param errorCode Standard ICU error code. Its input value must
				278	* pass the U_SUCCESS() test, or else the function returns
				279	* immediately. Check for U_FAILURE() on output or use with
				280	* function chaining. (See User Guide for details.)
				281	* @return first
				282	* @stable ICU 4.4
				283	*/
				284	virtual UnicodeString &
				285	append(UnicodeString &first,
				286	const UnicodeString &second,
				287	UErrorCode &errorCode) const = 0;
				288
				289	/**
				290	* Gets the decomposition mapping of c.
				291	* Roughly equivalent to normalizing the String form of c
				292	* on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
				293	* returns FALSE and does not write a string
				294	* if c does not have a decomposition mapping in this instance's data.
				295	* This function is independent of the mode of the Normalizer2.
				296	* @param c code point
				297	* @param decomposition String object which will be set to c's
				298	* decomposition mapping, if there is one.
				299	* @return TRUE if c has a decomposition, otherwise FALSE
				300	* @stable ICU 4.6
				301	*/
				302	virtual UBool
				303	getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
				304
				305	/**
				306	* Gets the raw decomposition mapping of c.
				307	*
				308	* This is similar to the getDecomposition() method but returns the
				309	* raw decomposition mapping as specified in UnicodeData.txt or
				310	* (for custom data) in the mapping files processed by the gennorm2 tool.
				311	* By contrast, getDecomposition() returns the processed,
				312	* recursively-decomposed version of this mapping.
				313	*
				314	* When used on a standard NFKC Normalizer2 instance,
				315	* getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
				316	*
				317	* When used on a standard NFC Normalizer2 instance,
				318	* it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
				319	* in this case, the result contains either one or two code points (=1..4 char16_ts).
				320	*
				321	* This function is independent of the mode of the Normalizer2.
				322	* The default implementation returns FALSE.
				323	* @param c code point
				324	* @param decomposition String object which will be set to c's
				325	* raw decomposition mapping, if there is one.
				326	* @return TRUE if c has a decomposition, otherwise FALSE
				327	* @stable ICU 49
				328	*/
				329	virtual UBool
				330	getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
				331
				332	/**
				333	* Performs pairwise composition of a & b and returns the composite if there is one.
				334	*
				335	* Returns a composite code point c only if c has a two-way mapping to a+b.
				336	* In standard Unicode normalization, this means that
				337	* c has a canonical decomposition to a+b
				338	* and c does not have the Full_Composition_Exclusion property.
				339	*
				340	* This function is independent of the mode of the Normalizer2.
				341	* The default implementation returns a negative value.
				342	* @param a A (normalization starter) code point.
				343	* @param b Another code point.
				344	* @return The non-negative composite code point if there is one; otherwise a negative value.
				345	* @stable ICU 49
				346	*/
				347	virtual UChar32
				348	composePair(UChar32 a, UChar32 b) const;
				349
				350	/**
				351	* Gets the combining class of c.
				352	* The default implementation returns 0
				353	* but all standard implementations return the Unicode Canonical_Combining_Class value.
				354	* @param c code point
				355	* @return c's combining class
				356	* @stable ICU 49
				357	*/
				358	virtual uint8_t
				359	getCombiningClass(UChar32 c) const;
				360
				361	/**
				362	* Tests if the string is normalized.
				363	* Internally, in cases where the quickCheck() method would return "maybe"
				364	* (which is only possible for the two COMPOSE modes) this method
				365	* resolves to "yes" or "no" to provide a definitive result,
				366	* at the cost of doing more work in those cases.
				367	* @param s input string
				368	* @param errorCode Standard ICU error code. Its input value must
				369	* pass the U_SUCCESS() test, or else the function returns
				370	* immediately. Check for U_FAILURE() on output or use with
				371	* function chaining. (See User Guide for details.)
				372	* @return TRUE if s is normalized
				373	* @stable ICU 4.4
				374	*/
				375	virtual UBool
				376	isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
				377	/**
				378	* Tests if the UTF-8 string is normalized.
				379	* Internally, in cases where the quickCheck() method would return "maybe"
				380	* (which is only possible for the two COMPOSE modes) this method
				381	* resolves to "yes" or "no" to provide a definitive result,
				382	* at the cost of doing more work in those cases.
				383	*
				384	* This works for all normalization modes,
				385	* but it is currently optimized for UTF-8 only for "compose" modes,
				386	* such as for NFC, NFKC, and NFKC_Casefold
				387	* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
				388	* For other modes it currently converts to UTF-16 and calls isNormalized().
				389	*
				390	* @param s UTF-8 input string
				391	* @param errorCode Standard ICU error code. Its input value must
				392	* pass the U_SUCCESS() test, or else the function returns
				393	* immediately. Check for U_FAILURE() on output or use with
				394	* function chaining. (See User Guide for details.)
				395	* @return TRUE if s is normalized
				396	* @stable ICU 60
				397	*/
				398	virtual UBool
				399	isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
				400
				401
				402	/**
				403	* Tests if the string is normalized.
				404	* For the two COMPOSE modes, the result could be "maybe" in cases that
				405	* would take a little more work to resolve definitively.
				406	* Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
				407	* combination of quick check + normalization, to avoid
				408	* re-checking the "yes" prefix.
				409	* @param s input string
				410	* @param errorCode Standard ICU error code. Its input value must
				411	* pass the U_SUCCESS() test, or else the function returns
				412	* immediately. Check for U_FAILURE() on output or use with
				413	* function chaining. (See User Guide for details.)
				414	* @return UNormalizationCheckResult
				415	* @stable ICU 4.4
				416	*/
				417	virtual UNormalizationCheckResult
				418	quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
				419
				420	/**
				421	* Returns the end of the normalized substring of the input string.
				422	* In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
				423	* the substring <code>UnicodeString(s, 0, end)</code>
				424	* will pass the quick check with a "yes" result.
				425	*
				426	* The returned end index is usually one or more characters before the
				427	* "no" or "maybe" character: The end index is at a normalization boundary.
				428	* (See the class documentation for more about normalization boundaries.)
				429	*
				430	* When the goal is a normalized string and most input strings are expected
				431	* to be normalized already, then call this method,
				432	* and if it returns a prefix shorter than the input string,
				433	* copy that prefix and use normalizeSecondAndAppend() for the remainder.
				434	* @param s input string
				435	* @param errorCode Standard ICU error code. Its input value must
				436	* pass the U_SUCCESS() test, or else the function returns
				437	* immediately. Check for U_FAILURE() on output or use with
				438	* function chaining. (See User Guide for details.)
				439	* @return "yes" span end index
				440	* @stable ICU 4.4
				441	*/
				442	virtual int32_t
				443	spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
				444
				445	/**
				446	* Tests if the character always has a normalization boundary before it,
				447	* regardless of context.
				448	* If true, then the character does not normalization-interact with
				449	* preceding characters.
				450	* In other words, a string containing this character can be normalized
				451	* by processing portions before this character and starting from this
				452	* character independently.
				453	* This is used for iterative normalization. See the class documentation for details.
				454	* @param c character to test
				455	* @return TRUE if c has a normalization boundary before it
				456	* @stable ICU 4.4
				457	*/
				458	virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
				459
				460	/**
				461	* Tests if the character always has a normalization boundary after it,
				462	* regardless of context.
				463	* If true, then the character does not normalization-interact with
				464	* following characters.
				465	* In other words, a string containing this character can be normalized
				466	* by processing portions up to this character and after this
				467	* character independently.
				468	* This is used for iterative normalization. See the class documentation for details.
				469	* Note that this operation may be significantly slower than hasBoundaryBefore().
				470	* @param c character to test
				471	* @return TRUE if c has a normalization boundary after it
				472	* @stable ICU 4.4
				473	*/
				474	virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
				475
				476	/**
				477	* Tests if the character is normalization-inert.
				478	* If true, then the character does not change, nor normalization-interact with
				479	* preceding or following characters.
				480	* In other words, a string containing this character can be normalized
				481	* by processing portions before this character and after this
				482	* character independently.
				483	* This is used for iterative normalization. See the class documentation for details.
				484	* Note that this operation may be significantly slower than hasBoundaryBefore().
				485	* @param c character to test
				486	* @return TRUE if c is normalization-inert
				487	* @stable ICU 4.4
				488	*/
				489	virtual UBool isInert(UChar32 c) const = 0;
				490	};
				491
				492	/**
				493	* Normalization filtered by a UnicodeSet.
				494	* Normalizes portions of the text contained in the filter set and leaves
				495	* portions not contained in the filter set unchanged.
				496	* Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
				497	* Not-in-the-filter text is treated as "is normalized" and "quick check yes".
				498	* This class implements all of (and only) the Normalizer2 API.
				499	* An instance of this class is unmodifiable/immutable but is constructed and
				500	* must be destructed by the owner.
				501	* @stable ICU 4.4
				502	*/
				503	class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
				504	public:
				505	/**
				506	* Constructs a filtered normalizer wrapping any Normalizer2 instance
				507	* and a filter set.
				508	* Both are aliased and must not be modified or deleted while this object
				509	* is used.
				510	* The filter set should be frozen; otherwise the performance will suffer greatly.
				511	* @param n2 wrapped Normalizer2 instance
				512	* @param filterSet UnicodeSet which determines the characters to be normalized
				513	* @stable ICU 4.4
				514	*/
				515	FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
				516	norm2(n2), set(filterSet) {}
				517
				518	/**
				519	* Destructor.
				520	* @stable ICU 4.4
				521	*/
				522	~FilteredNormalizer2();
				523
				524	/**
				525	* Writes the normalized form of the source string to the destination string
				526	* (replacing its contents) and returns the destination string.
				527	* The source and destination strings must be different objects.
				528	* @param src source string
				529	* @param dest destination string; its contents is replaced with normalized src
				530	* @param errorCode Standard ICU error code. Its input value must
				531	* pass the U_SUCCESS() test, or else the function returns
				532	* immediately. Check for U_FAILURE() on output or use with
				533	* function chaining. (See User Guide for details.)
				534	* @return dest
				535	* @stable ICU 4.4
				536	*/
				537	virtual UnicodeString &
				538	normalize(const UnicodeString &src,
				539	UnicodeString &dest,
				540	UErrorCode &errorCode) const U_OVERRIDE;
				541
				542	/**
				543	* Normalizes a UTF-8 string and optionally records how source substrings
				544	* relate to changed and unchanged result substrings.
				545	*
				546	* Currently implemented completely only for "compose" modes,
				547	* such as for NFC, NFKC, and NFKC_Casefold
				548	* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
				549	* Otherwise currently converts to & from UTF-16 and does not support edits.
				550	*
				551	* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
				552	* @param src Source UTF-8 string.
				553	* @param sink A ByteSink to which the normalized UTF-8 result string is written.
				554	* sink.Flush() is called at the end.
				555	* @param edits Records edits for index mapping, working with styled text,
				556	* and getting only changes (if any).
				557	* The Edits contents is undefined if any error occurs.
				558	* This function calls edits->reset() first unless
				559	* options includes U_EDITS_NO_RESET. edits can be nullptr.
				560	* @param errorCode Standard ICU error code. Its input value must
				561	* pass the U_SUCCESS() test, or else the function returns
				562	* immediately. Check for U_FAILURE() on output or use with
				563	* function chaining. (See User Guide for details.)
				564	* @stable ICU 60
				565	*/
				566	virtual void
				567	normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
				568	Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
				569
				570	/**
				571	* Appends the normalized form of the second string to the first string
				572	* (merging them at the boundary) and returns the first string.
				573	* The result is normalized if the first string was normalized.
				574	* The first and second strings must be different objects.
				575	* @param first string, should be normalized
				576	* @param second string, will be normalized
				577	* @param errorCode Standard ICU error code. Its input value must
				578	* pass the U_SUCCESS() test, or else the function returns
				579	* immediately. Check for U_FAILURE() on output or use with
				580	* function chaining. (See User Guide for details.)
				581	* @return first
				582	* @stable ICU 4.4
				583	*/
				584	virtual UnicodeString &
				585	normalizeSecondAndAppend(UnicodeString &first,
				586	const UnicodeString &second,
				587	UErrorCode &errorCode) const U_OVERRIDE;
				588	/**
				589	* Appends the second string to the first string
				590	* (merging them at the boundary) and returns the first string.
				591	* The result is normalized if both the strings were normalized.
				592	* The first and second strings must be different objects.
				593	* @param first string, should be normalized
				594	* @param second string, should be normalized
				595	* @param errorCode Standard ICU error code. Its input value must
				596	* pass the U_SUCCESS() test, or else the function returns
				597	* immediately. Check for U_FAILURE() on output or use with
				598	* function chaining. (See User Guide for details.)
				599	* @return first
				600	* @stable ICU 4.4
				601	*/
				602	virtual UnicodeString &
				603	append(UnicodeString &first,
				604	const UnicodeString &second,
				605	UErrorCode &errorCode) const U_OVERRIDE;
				606
				607	/**
				608	* Gets the decomposition mapping of c.
				609	* For details see the base class documentation.
				610	*
				611	* This function is independent of the mode of the Normalizer2.
				612	* @param c code point
				613	* @param decomposition String object which will be set to c's
				614	* decomposition mapping, if there is one.
				615	* @return TRUE if c has a decomposition, otherwise FALSE
				616	* @stable ICU 4.6
				617	*/
				618	virtual UBool
				619	getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
				620
				621	/**
				622	* Gets the raw decomposition mapping of c.
				623	* For details see the base class documentation.
				624	*
				625	* This function is independent of the mode of the Normalizer2.
				626	* @param c code point
				627	* @param decomposition String object which will be set to c's
				628	* raw decomposition mapping, if there is one.
				629	* @return TRUE if c has a decomposition, otherwise FALSE
				630	* @stable ICU 49
				631	*/
				632	virtual UBool
				633	getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
				634
				635	/**
				636	* Performs pairwise composition of a & b and returns the composite if there is one.
				637	* For details see the base class documentation.
				638	*
				639	* This function is independent of the mode of the Normalizer2.
				640	* @param a A (normalization starter) code point.
				641	* @param b Another code point.
				642	* @return The non-negative composite code point if there is one; otherwise a negative value.
				643	* @stable ICU 49
				644	*/
				645	virtual UChar32
				646	composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
				647
				648	/**
				649	* Gets the combining class of c.
				650	* The default implementation returns 0
				651	* but all standard implementations return the Unicode Canonical_Combining_Class value.
				652	* @param c code point
				653	* @return c's combining class
				654	* @stable ICU 49
				655	*/
				656	virtual uint8_t
				657	getCombiningClass(UChar32 c) const U_OVERRIDE;
				658
				659	/**
				660	* Tests if the string is normalized.
				661	* For details see the Normalizer2 base class documentation.
				662	* @param s input string
				663	* @param errorCode Standard ICU error code. Its input value must
				664	* pass the U_SUCCESS() test, or else the function returns
				665	* immediately. Check for U_FAILURE() on output or use with
				666	* function chaining. (See User Guide for details.)
				667	* @return TRUE if s is normalized
				668	* @stable ICU 4.4
				669	*/
				670	virtual UBool
				671	isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
				672	/**
				673	* Tests if the UTF-8 string is normalized.
				674	* Internally, in cases where the quickCheck() method would return "maybe"
				675	* (which is only possible for the two COMPOSE modes) this method
				676	* resolves to "yes" or "no" to provide a definitive result,
				677	* at the cost of doing more work in those cases.
				678	*
				679	* This works for all normalization modes,
				680	* but it is currently optimized for UTF-8 only for "compose" modes,
				681	* such as for NFC, NFKC, and NFKC_Casefold
				682	* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
				683	* For other modes it currently converts to UTF-16 and calls isNormalized().
				684	*
				685	* @param s UTF-8 input string
				686	* @param errorCode Standard ICU error code. Its input value must
				687	* pass the U_SUCCESS() test, or else the function returns
				688	* immediately. Check for U_FAILURE() on output or use with
				689	* function chaining. (See User Guide for details.)
				690	* @return TRUE if s is normalized
				691	* @stable ICU 60
				692	*/
				693	virtual UBool
				694	isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
				695	/**
				696	* Tests if the string is normalized.
				697	* For details see the Normalizer2 base class documentation.
				698	* @param s input string
				699	* @param errorCode Standard ICU error code. Its input value must
				700	* pass the U_SUCCESS() test, or else the function returns
				701	* immediately. Check for U_FAILURE() on output or use with
				702	* function chaining. (See User Guide for details.)
				703	* @return UNormalizationCheckResult
				704	* @stable ICU 4.4
				705	*/
				706	virtual UNormalizationCheckResult
				707	quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
				708	/**
				709	* Returns the end of the normalized substring of the input string.
				710	* For details see the Normalizer2 base class documentation.
				711	* @param s input string
				712	* @param errorCode Standard ICU error code. Its input value must
				713	* pass the U_SUCCESS() test, or else the function returns
				714	* immediately. Check for U_FAILURE() on output or use with
				715	* function chaining. (See User Guide for details.)
				716	* @return "yes" span end index
				717	* @stable ICU 4.4
				718	*/
				719	virtual int32_t
				720	spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
				721
				722	/**
				723	* Tests if the character always has a normalization boundary before it,
				724	* regardless of context.
				725	* For details see the Normalizer2 base class documentation.
				726	* @param c character to test
				727	* @return TRUE if c has a normalization boundary before it
				728	* @stable ICU 4.4
				729	*/
				730	virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
				731
				732	/**
				733	* Tests if the character always has a normalization boundary after it,
				734	* regardless of context.
				735	* For details see the Normalizer2 base class documentation.
				736	* @param c character to test
				737	* @return TRUE if c has a normalization boundary after it
				738	* @stable ICU 4.4
				739	*/
				740	virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
				741
				742	/**
				743	* Tests if the character is normalization-inert.
				744	* For details see the Normalizer2 base class documentation.
				745	* @param c character to test
				746	* @return TRUE if c is normalization-inert
				747	* @stable ICU 4.4
				748	*/
				749	virtual UBool isInert(UChar32 c) const U_OVERRIDE;
				750	private:
				751	UnicodeString &
				752	normalize(const UnicodeString &src,
				753	UnicodeString &dest,
				754	USetSpanCondition spanCondition,
				755	UErrorCode &errorCode) const;
				756
				757	void
				758	normalizeUTF8(uint32_t options, const char *src, int32_t length,
				759	ByteSink &sink, Edits *edits,
				760	USetSpanCondition spanCondition,
				761	UErrorCode &errorCode) const;
				762
				763	UnicodeString &
				764	normalizeSecondAndAppend(UnicodeString &first,
				765	const UnicodeString &second,
				766	UBool doNormalize,
				767	UErrorCode &errorCode) const;
				768
				769	const Normalizer2 &norm2;
				770	const UnicodeSet &set;
				771	};
				772
				773	U_NAMESPACE_END
				774
				775	#endif // !UCONFIG_NO_NORMALIZATION
				776
				777	#endif /* U_SHOW_CPLUSPLUS_API */
				778
				779	#endif // __NORMALIZER2_H__