Blame - common/unicode/unistr.h - platform/external/icu

blob: 4ebeb516824d2be9f89f0df757cf6331adc02fdb [file] [log] [blame]

Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1	/*
				2	**********************************************************************
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	3	* Copyright (C) 1998-2012, International Business Machines
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4	* Corporation and others. All Rights Reserved.
				5	**********************************************************************
				6	*
				7	* File unistr.h
				8	*
				9	* Modification History:
				10	*
				11	* Date Name Description
				12	* 09/25/98 stephen Creation.
				13	* 11/11/98 stephen Changed per 11/9 code review.
				14	* 04/20/99 stephen Overhauled per 4/16 code review.
				15	* 11/18/99 aliu Made to inherit from Replaceable. Added method
				16	* handleReplaceBetween(); other methods unchanged.
				17	* 06/25/01 grhoten Remove dependency on iostream.
				18	******************************************************************************
				19	*/
				20
				21	#ifndef UNISTR_H
				22	#define UNISTR_H
				23
				24	/**
				25	* \file
				26	* \brief C++ API: Unicode String
				27	*/
				28
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	29	#include "unicode/utypes.h"
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	30	#include "unicode/rep.h"
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	31	#include "unicode/std_string.h"
				32	#include "unicode/stringpiece.h"
				33	#include "unicode/bytestream.h"
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	34	#include "unicode/ucasemap.h"
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	35
				36	struct UConverter; // unicode/ucnv.h
				37	class StringThreadTest;
				38
				39	#ifndef U_COMPARE_CODE_POINT_ORDER
				40	/* see also ustring.h and unorm.h */
				41	/**
				42	* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
				43	* Compare strings in code point order instead of code unit order.
				44	* @stable ICU 2.2
				45	*/
				46	#define U_COMPARE_CODE_POINT_ORDER 0x8000
				47	#endif
				48
				49	#ifndef USTRING_H
				50	/**
				51	* \ingroup ustring_ustrlen
				52	*/
				53	U_STABLE int32_t U_EXPORT2
				54	u_strlen(const UChar *s);
				55	#endif
				56
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	57	/**
				58	* \def U_STRING_CASE_MAPPER_DEFINED
				59	* @internal
				60	*/
				61
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	62	#ifndef U_STRING_CASE_MAPPER_DEFINED
				63	#define U_STRING_CASE_MAPPER_DEFINED
				64
				65	/**
				66	* Internal string case mapping function type.
				67	* @internal
				68	*/
				69	typedef int32_t U_CALLCONV
				70	UStringCaseMapper(const UCaseMap *csm,
				71	UChar *dest, int32_t destCapacity,
				72	const UChar *src, int32_t srcLength,
				73	UErrorCode *pErrorCode);
				74
				75	#endif
				76
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	77	U_NAMESPACE_BEGIN
				78
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	79	class BreakIterator; // unicode/brkiter.h
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	80	class Locale; // unicode/locid.h
				81	class StringCharacterIterator;
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	82	class UnicodeStringAppendable; // unicode/appendable.h
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	83
				84	/* The <iostream> include has been moved to unicode/ustream.h */
				85
				86	/**
				87	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
				88	* which constructs a Unicode string from an invariant-character char * string.
				89	* About invariant characters see utypes.h.
				90	* This constructor has no runtime dependency on conversion code and is
				91	* therefore recommended over ones taking a charset name string
				92	* (where the empty string "" indicates invariant-character conversion).
				93	*
				94	* @stable ICU 3.2
				95	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	96	#define US_INV icu::UnicodeString::kInvariant
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	97
				98	/**
				99	* Unicode String literals in C++.
				100	* Dependent on the platform properties, different UnicodeString
				101	* constructors should be used to create a UnicodeString object from
				102	* a string literal.
				103	* The macros are defined for maximum performance.
				104	* They work only for strings that contain "invariant characters", i.e.,
				105	* only latin letters, digits, and some punctuation.
				106	* See utypes.h for details.
				107	*
				108	* The string parameter must be a C string literal.
				109	* The length of the string, not including the terminating
				110	* <code>NUL</code>, must be specified as a constant.
				111	* The U_STRING_DECL macro should be invoked exactly once for one
				112	* such string variable before it is used.
				113	* @stable ICU 2.0
				114	*/
				115	#if defined(U_DECLARE_UTF16)
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	116	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	117	#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	118	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	119	#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	120	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	121	#else
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	122	# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	123	#endif
				124
				125	/**
				126	* Unicode String literals in C++.
				127	* Dependent on the platform properties, different UnicodeString
				128	* constructors should be used to create a UnicodeString object from
				129	* a string literal.
				130	* The macros are defined for improved performance.
				131	* They work only for strings that contain "invariant characters", i.e.,
				132	* only latin letters, digits, and some punctuation.
				133	* See utypes.h for details.
				134	*
				135	* The string parameter must be a C string literal.
				136	* @stable ICU 2.0
				137	*/
				138	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
				139
				140	/**
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	141	* \def UNISTR_FROM_CHAR_EXPLICIT
				142	* This can be defined to be empty or "explicit".
				143	* If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
				144	* constructors are marked as explicit, preventing their inadvertent use.
				145	* @draft ICU 49
				146	*/
				147	#ifndef UNISTR_FROM_CHAR_EXPLICIT
				148	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
				149	// Auto-"explicit" in ICU library code.
				150	# define UNISTR_FROM_CHAR_EXPLICIT explicit
				151	# else
				152	// Empty by default for source code compatibility.
				153	# define UNISTR_FROM_CHAR_EXPLICIT
				154	# endif
				155	#endif
				156
				157	/**
				158	* \def UNISTR_FROM_STRING_EXPLICIT
				159	* This can be defined to be empty or "explicit".
				160	* If explicit, then the UnicodeString(const char ) and UnicodeString(const UChar )
				161	* constructors are marked as explicit, preventing their inadvertent use.
				162	*
				163	* In particular, this helps prevent accidentally depending on ICU conversion code
				164	* by passing a string literal into an API with a const UnicodeString & parameter.
				165	* @draft ICU 49
				166	*/
				167	#ifndef UNISTR_FROM_STRING_EXPLICIT
				168	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
				169	// Auto-"explicit" in ICU library code.
				170	# define UNISTR_FROM_STRING_EXPLICIT explicit
				171	# else
				172	// Empty by default for source code compatibility.
				173	# define UNISTR_FROM_STRING_EXPLICIT
				174	# endif
				175	#endif
				176
				177	/**
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	178	* UnicodeString is a string class that stores Unicode characters directly and provides
				179	* similar functionality as the Java String and StringBuffer classes.
				180	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
				181	*
				182	* The UnicodeString class is not suitable for subclassing.
				183	*
				184	* <p>For an overview of Unicode strings in C and C++ see the
				185	* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
				186	*
				187	* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
				188	* A Unicode character may be stored with either one code unit
				189	* (the most common case) or with a matched pair of special code units
				190	* ("surrogates"). The data type for code units is UChar.
				191	* For single-character handling, a Unicode character code <em>point</em> is a value
				192	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
				193	*
				194	* <p>Indexes and offsets into and lengths of strings always count code units, not code points.
				195	* This is the same as with multi-byte char* strings in traditional string handling.
				196	* Operations on partial strings typically do not test for code point boundaries.
				197	* If necessary, the user needs to take care of such boundaries by testing for the code unit
				198	* values or by using functions like
				199	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
				200	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
				201	*
				202	* UnicodeString methods are more lenient with regard to input parameter values
				203	* than other ICU APIs. In particular:
				204	* - If indexes are out of bounds for a UnicodeString object
				205	* (<0 or >length()) then they are "pinned" to the nearest boundary.
				206	* - If primitive string pointer values (e.g., const UChar * or char *)
				207	* for input strings are NULL, then those input string parameters are treated
				208	* as if they pointed to an empty string.
				209	* However, this is <em>not</em> the case for char * parameters for charset names
				210	* or other IDs.
				211	* - Most UnicodeString methods do not take a UErrorCode parameter because
				212	* there are usually very few opportunities for failure other than a shortage
				213	* of memory, error codes in low-level C++ string methods would be inconvenient,
				214	* and the error code as the last parameter (ICU convention) would prevent
				215	* the use of default parameter values.
				216	* Instead, such methods set the UnicodeString into a "bogus" state
				217	* (see isBogus()) if an error occurs.
				218	*
				219	* In string comparisons, two UnicodeString objects that are both "bogus"
				220	* compare equal (to be transitive and prevent endless loops in sorting),
				221	* and a "bogus" string compares less than any non-"bogus" one.
				222	*
				223	* Const UnicodeString methods are thread-safe. Multiple threads can use
				224	* const methods on the same UnicodeString object simultaneously,
				225	* but non-const methods must not be called concurrently (in multiple threads)
				226	* with any other (const or non-const) methods.
				227	*
				228	* Similarly, const UnicodeString & parameters are thread-safe.
				229	* One object may be passed in as such a parameter concurrently in multiple threads.
				230	* This includes the const UnicodeString & parameters for
				231	* copy construction, assignment, and cloning.
				232	*
				233	* <p>UnicodeString uses several storage methods.
				234	* String contents can be stored inside the UnicodeString object itself,
				235	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
				236	* Most of this is done transparently, but careful aliasing in particular provides
				237	* significant performance improvements.
				238	* Also, the internal buffer is accessible via special functions.
				239	* For details see the
				240	* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
				241	*
				242	* @see utf.h
				243	* @see CharacterIterator
				244	* @stable ICU 2.0
				245	*/
				246	class U_COMMON_API UnicodeString : public Replaceable
				247	{
				248	public:
				249
				250	/**
				251	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
				252	* which constructs a Unicode string from an invariant-character char * string.
				253	* Use the macro US_INV instead of the full qualification for this value.
				254	*
				255	* @see US_INV
				256	* @stable ICU 3.2
				257	*/
				258	enum EInvariant {
				259	/**
				260	* @see EInvariant
				261	* @stable ICU 3.2
				262	*/
				263	kInvariant
				264	};
				265
				266	//========================================
				267	// Read-only operations
				268	//========================================
				269
				270	/* Comparison - bitwise only - for international comparison use collation */
				271
				272	/**
				273	* Equality operator. Performs only bitwise comparison.
				274	* @param text The UnicodeString to compare to this one.
				275	* @return TRUE if <TT>text</TT> contains the same characters as this one,
				276	* FALSE otherwise.
				277	* @stable ICU 2.0
				278	*/
				279	inline UBool operator== (const UnicodeString& text) const;
				280
				281	/**
				282	* Inequality operator. Performs only bitwise comparison.
				283	* @param text The UnicodeString to compare to this one.
				284	* @return FALSE if <TT>text</TT> contains the same characters as this one,
				285	* TRUE otherwise.
				286	* @stable ICU 2.0
				287	*/
				288	inline UBool operator!= (const UnicodeString& text) const;
				289
				290	/**
				291	* Greater than operator. Performs only bitwise comparison.
				292	* @param text The UnicodeString to compare to this one.
				293	* @return TRUE if the characters in this are bitwise
				294	* greater than the characters in <code>text</code>, FALSE otherwise
				295	* @stable ICU 2.0
				296	*/
				297	inline UBool operator> (const UnicodeString& text) const;
				298
				299	/**
				300	* Less than operator. Performs only bitwise comparison.
				301	* @param text The UnicodeString to compare to this one.
				302	* @return TRUE if the characters in this are bitwise
				303	* less than the characters in <code>text</code>, FALSE otherwise
				304	* @stable ICU 2.0
				305	*/
				306	inline UBool operator< (const UnicodeString& text) const;
				307
				308	/**
				309	* Greater than or equal operator. Performs only bitwise comparison.
				310	* @param text The UnicodeString to compare to this one.
				311	* @return TRUE if the characters in this are bitwise
				312	* greater than or equal to the characters in <code>text</code>, FALSE otherwise
				313	* @stable ICU 2.0
				314	*/
				315	inline UBool operator>= (const UnicodeString& text) const;
				316
				317	/**
				318	* Less than or equal operator. Performs only bitwise comparison.
				319	* @param text The UnicodeString to compare to this one.
				320	* @return TRUE if the characters in this are bitwise
				321	* less than or equal to the characters in <code>text</code>, FALSE otherwise
				322	* @stable ICU 2.0
				323	*/
				324	inline UBool operator<= (const UnicodeString& text) const;
				325
				326	/**
				327	* Compare the characters bitwise in this UnicodeString to
				328	* the characters in <code>text</code>.
				329	* @param text The UnicodeString to compare to this one.
				330	* @return The result of bitwise character comparison: 0 if this
				331	* contains the same characters as <code>text</code>, -1 if the characters in
				332	* this are bitwise less than the characters in <code>text</code>, +1 if the
				333	* characters in this are bitwise greater than the characters
				334	* in <code>text</code>.
				335	* @stable ICU 2.0
				336	*/
				337	inline int8_t compare(const UnicodeString& text) const;
				338
				339	/**
				340	* Compare the characters bitwise in the range
				341	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
				342	* in <TT>text</TT>
				343	* @param start the offset at which the compare operation begins
				344	* @param length the number of characters of text to compare.
				345	* @param text the other text to be compared against this string.
				346	* @return The result of bitwise character comparison: 0 if this
				347	* contains the same characters as <code>text</code>, -1 if the characters in
				348	* this are bitwise less than the characters in <code>text</code>, +1 if the
				349	* characters in this are bitwise greater than the characters
				350	* in <code>text</code>.
				351	* @stable ICU 2.0
				352	*/
				353	inline int8_t compare(int32_t start,
				354	int32_t length,
				355	const UnicodeString& text) const;
				356
				357	/**
				358	* Compare the characters bitwise in the range
				359	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
				360	* in <TT>srcText</TT> in the range
				361	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				362	* @param start the offset at which the compare operation begins
				363	* @param length the number of characters in this to compare.
				364	* @param srcText the text to be compared
				365	* @param srcStart the offset into <TT>srcText</TT> to start comparison
				366	* @param srcLength the number of characters in <TT>src</TT> to compare
				367	* @return The result of bitwise character comparison: 0 if this
				368	* contains the same characters as <code>srcText</code>, -1 if the characters in
				369	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
				370	* characters in this are bitwise greater than the characters
				371	* in <code>srcText</code>.
				372	* @stable ICU 2.0
				373	*/
				374	inline int8_t compare(int32_t start,
				375	int32_t length,
				376	const UnicodeString& srcText,
				377	int32_t srcStart,
				378	int32_t srcLength) const;
				379
				380	/**
				381	* Compare the characters bitwise in this UnicodeString with the first
				382	* <TT>srcLength</TT> characters in <TT>srcChars</TT>.
				383	* @param srcChars The characters to compare to this UnicodeString.
				384	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
				385	* @return The result of bitwise character comparison: 0 if this
				386	* contains the same characters as <code>srcChars</code>, -1 if the characters in
				387	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
				388	* characters in this are bitwise greater than the characters
				389	* in <code>srcChars</code>.
				390	* @stable ICU 2.0
				391	*/
				392	inline int8_t compare(const UChar *srcChars,
				393	int32_t srcLength) const;
				394
				395	/**
				396	* Compare the characters bitwise in the range
				397	* [<TT>start</TT>, <TT>start + length</TT>) with the first
				398	* <TT>length</TT> characters in <TT>srcChars</TT>
				399	* @param start the offset at which the compare operation begins
				400	* @param length the number of characters to compare.
				401	* @param srcChars the characters to be compared
				402	* @return The result of bitwise character comparison: 0 if this
				403	* contains the same characters as <code>srcChars</code>, -1 if the characters in
				404	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
				405	* characters in this are bitwise greater than the characters
				406	* in <code>srcChars</code>.
				407	* @stable ICU 2.0
				408	*/
				409	inline int8_t compare(int32_t start,
				410	int32_t length,
				411	const UChar *srcChars) const;
				412
				413	/**
				414	* Compare the characters bitwise in the range
				415	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
				416	* in <TT>srcChars</TT> in the range
				417	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				418	* @param start the offset at which the compare operation begins
				419	* @param length the number of characters in this to compare
				420	* @param srcChars the characters to be compared
				421	* @param srcStart the offset into <TT>srcChars</TT> to start comparison
				422	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
				423	* @return The result of bitwise character comparison: 0 if this
				424	* contains the same characters as <code>srcChars</code>, -1 if the characters in
				425	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
				426	* characters in this are bitwise greater than the characters
				427	* in <code>srcChars</code>.
				428	* @stable ICU 2.0
				429	*/
				430	inline int8_t compare(int32_t start,
				431	int32_t length,
				432	const UChar *srcChars,
				433	int32_t srcStart,
				434	int32_t srcLength) const;
				435
				436	/**
				437	* Compare the characters bitwise in the range
				438	* [<TT>start</TT>, <TT>limit</TT>) with the characters
				439	* in <TT>srcText</TT> in the range
				440	* [<TT>srcStart</TT>, <TT>srcLimit</TT>).
				441	* @param start the offset at which the compare operation begins
				442	* @param limit the offset immediately following the compare operation
				443	* @param srcText the text to be compared
				444	* @param srcStart the offset into <TT>srcText</TT> to start comparison
				445	* @param srcLimit the offset into <TT>srcText</TT> to limit comparison
				446	* @return The result of bitwise character comparison: 0 if this
				447	* contains the same characters as <code>srcText</code>, -1 if the characters in
				448	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
				449	* characters in this are bitwise greater than the characters
				450	* in <code>srcText</code>.
				451	* @stable ICU 2.0
				452	*/
				453	inline int8_t compareBetween(int32_t start,
				454	int32_t limit,
				455	const UnicodeString& srcText,
				456	int32_t srcStart,
				457	int32_t srcLimit) const;
				458
				459	/**
				460	* Compare two Unicode strings in code point order.
				461	* The result may be different from the results of compare(), operator<, etc.
				462	* if supplementary characters are present:
				463	*
				464	* In UTF-16, supplementary characters (with code points U+10000 and above) are
				465	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
				466	* which means that they compare as less than some other BMP characters like U+feff.
				467	* This function compares Unicode strings in code point order.
				468	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
				469	*
				470	* @param text Another string to compare this one to.
				471	* @return a negative/zero/positive integer corresponding to whether
				472	* this string is less than/equal to/greater than the second one
				473	* in code point order
				474	* @stable ICU 2.0
				475	*/
				476	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
				477
				478	/**
				479	* Compare two Unicode strings in code point order.
				480	* The result may be different from the results of compare(), operator<, etc.
				481	* if supplementary characters are present:
				482	*
				483	* In UTF-16, supplementary characters (with code points U+10000 and above) are
				484	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
				485	* which means that they compare as less than some other BMP characters like U+feff.
				486	* This function compares Unicode strings in code point order.
				487	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
				488	*
				489	* @param start The start offset in this string at which the compare operation begins.
				490	* @param length The number of code units from this string to compare.
				491	* @param srcText Another string to compare this one to.
				492	* @return a negative/zero/positive integer corresponding to whether
				493	* this string is less than/equal to/greater than the second one
				494	* in code point order
				495	* @stable ICU 2.0
				496	*/
				497	inline int8_t compareCodePointOrder(int32_t start,
				498	int32_t length,
				499	const UnicodeString& srcText) const;
				500
				501	/**
				502	* Compare two Unicode strings in code point order.
				503	* The result may be different from the results of compare(), operator<, etc.
				504	* if supplementary characters are present:
				505	*
				506	* In UTF-16, supplementary characters (with code points U+10000 and above) are
				507	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
				508	* which means that they compare as less than some other BMP characters like U+feff.
				509	* This function compares Unicode strings in code point order.
				510	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
				511	*
				512	* @param start The start offset in this string at which the compare operation begins.
				513	* @param length The number of code units from this string to compare.
				514	* @param srcText Another string to compare this one to.
				515	* @param srcStart The start offset in that string at which the compare operation begins.
				516	* @param srcLength The number of code units from that string to compare.
				517	* @return a negative/zero/positive integer corresponding to whether
				518	* this string is less than/equal to/greater than the second one
				519	* in code point order
				520	* @stable ICU 2.0
				521	*/
				522	inline int8_t compareCodePointOrder(int32_t start,
				523	int32_t length,
				524	const UnicodeString& srcText,
				525	int32_t srcStart,
				526	int32_t srcLength) const;
				527
				528	/**
				529	* Compare two Unicode strings in code point order.
				530	* The result may be different from the results of compare(), operator<, etc.
				531	* if supplementary characters are present:
				532	*
				533	* In UTF-16, supplementary characters (with code points U+10000 and above) are
				534	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
				535	* which means that they compare as less than some other BMP characters like U+feff.
				536	* This function compares Unicode strings in code point order.
				537	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
				538	*
				539	* @param srcChars A pointer to another string to compare this one to.
				540	* @param srcLength The number of code units from that string to compare.
				541	* @return a negative/zero/positive integer corresponding to whether
				542	* this string is less than/equal to/greater than the second one
				543	* in code point order
				544	* @stable ICU 2.0
				545	*/
				546	inline int8_t compareCodePointOrder(const UChar *srcChars,
				547	int32_t srcLength) const;
				548
				549	/**
				550	* Compare two Unicode strings in code point order.
				551	* The result may be different from the results of compare(), operator<, etc.
				552	* if supplementary characters are present:
				553	*
				554	* In UTF-16, supplementary characters (with code points U+10000 and above) are
				555	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
				556	* which means that they compare as less than some other BMP characters like U+feff.
				557	* This function compares Unicode strings in code point order.
				558	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
				559	*
				560	* @param start The start offset in this string at which the compare operation begins.
				561	* @param length The number of code units from this string to compare.
				562	* @param srcChars A pointer to another string to compare this one to.
				563	* @return a negative/zero/positive integer corresponding to whether
				564	* this string is less than/equal to/greater than the second one
				565	* in code point order
				566	* @stable ICU 2.0
				567	*/
				568	inline int8_t compareCodePointOrder(int32_t start,
				569	int32_t length,
				570	const UChar *srcChars) const;
				571
				572	/**
				573	* Compare two Unicode strings in code point order.
				574	* The result may be different from the results of compare(), operator<, etc.
				575	* if supplementary characters are present:
				576	*
				577	* In UTF-16, supplementary characters (with code points U+10000 and above) are
				578	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
				579	* which means that they compare as less than some other BMP characters like U+feff.
				580	* This function compares Unicode strings in code point order.
				581	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
				582	*
				583	* @param start The start offset in this string at which the compare operation begins.
				584	* @param length The number of code units from this string to compare.
				585	* @param srcChars A pointer to another string to compare this one to.
				586	* @param srcStart The start offset in that string at which the compare operation begins.
				587	* @param srcLength The number of code units from that string to compare.
				588	* @return a negative/zero/positive integer corresponding to whether
				589	* this string is less than/equal to/greater than the second one
				590	* in code point order
				591	* @stable ICU 2.0
				592	*/
				593	inline int8_t compareCodePointOrder(int32_t start,
				594	int32_t length,
				595	const UChar *srcChars,
				596	int32_t srcStart,
				597	int32_t srcLength) const;
				598
				599	/**
				600	* Compare two Unicode strings in code point order.
				601	* The result may be different from the results of compare(), operator<, etc.
				602	* if supplementary characters are present:
				603	*
				604	* In UTF-16, supplementary characters (with code points U+10000 and above) are
				605	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
				606	* which means that they compare as less than some other BMP characters like U+feff.
				607	* This function compares Unicode strings in code point order.
				608	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
				609	*
				610	* @param start The start offset in this string at which the compare operation begins.
				611	* @param limit The offset after the last code unit from this string to compare.
				612	* @param srcText Another string to compare this one to.
				613	* @param srcStart The start offset in that string at which the compare operation begins.
				614	* @param srcLimit The offset after the last code unit from that string to compare.
				615	* @return a negative/zero/positive integer corresponding to whether
				616	* this string is less than/equal to/greater than the second one
				617	* in code point order
				618	* @stable ICU 2.0
				619	*/
				620	inline int8_t compareCodePointOrderBetween(int32_t start,
				621	int32_t limit,
				622	const UnicodeString& srcText,
				623	int32_t srcStart,
				624	int32_t srcLimit) const;
				625
				626	/**
				627	* Compare two strings case-insensitively using full case folding.
				628	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
				629	*
				630	* @param text Another string to compare this one to.
				631	* @param options A bit set of options:
				632	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
				633	* Comparison in code unit order with default case folding.
				634	*
				635	* - U_COMPARE_CODE_POINT_ORDER
				636	* Set to choose code point order instead of code unit order
				637	* (see u_strCompare for details).
				638	*
				639	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
				640	*
				641	* @return A negative, zero, or positive integer indicating the comparison result.
				642	* @stable ICU 2.0
				643	*/
				644	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
				645
				646	/**
				647	* Compare two strings case-insensitively using full case folding.
				648	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
				649	*
				650	* @param start The start offset in this string at which the compare operation begins.
				651	* @param length The number of code units from this string to compare.
				652	* @param srcText Another string to compare this one to.
				653	* @param options A bit set of options:
				654	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
				655	* Comparison in code unit order with default case folding.
				656	*
				657	* - U_COMPARE_CODE_POINT_ORDER
				658	* Set to choose code point order instead of code unit order
				659	* (see u_strCompare for details).
				660	*
				661	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
				662	*
				663	* @return A negative, zero, or positive integer indicating the comparison result.
				664	* @stable ICU 2.0
				665	*/
				666	inline int8_t caseCompare(int32_t start,
				667	int32_t length,
				668	const UnicodeString& srcText,
				669	uint32_t options) const;
				670
				671	/**
				672	* Compare two strings case-insensitively using full case folding.
				673	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
				674	*
				675	* @param start The start offset in this string at which the compare operation begins.
				676	* @param length The number of code units from this string to compare.
				677	* @param srcText Another string to compare this one to.
				678	* @param srcStart The start offset in that string at which the compare operation begins.
				679	* @param srcLength The number of code units from that string to compare.
				680	* @param options A bit set of options:
				681	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
				682	* Comparison in code unit order with default case folding.
				683	*
				684	* - U_COMPARE_CODE_POINT_ORDER
				685	* Set to choose code point order instead of code unit order
				686	* (see u_strCompare for details).
				687	*
				688	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
				689	*
				690	* @return A negative, zero, or positive integer indicating the comparison result.
				691	* @stable ICU 2.0
				692	*/
				693	inline int8_t caseCompare(int32_t start,
				694	int32_t length,
				695	const UnicodeString& srcText,
				696	int32_t srcStart,
				697	int32_t srcLength,
				698	uint32_t options) const;
				699
				700	/**
				701	* Compare two strings case-insensitively using full case folding.
				702	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
				703	*
				704	* @param srcChars A pointer to another string to compare this one to.
				705	* @param srcLength The number of code units from that string to compare.
				706	* @param options A bit set of options:
				707	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
				708	* Comparison in code unit order with default case folding.
				709	*
				710	* - U_COMPARE_CODE_POINT_ORDER
				711	* Set to choose code point order instead of code unit order
				712	* (see u_strCompare for details).
				713	*
				714	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
				715	*
				716	* @return A negative, zero, or positive integer indicating the comparison result.
				717	* @stable ICU 2.0
				718	*/
				719	inline int8_t caseCompare(const UChar *srcChars,
				720	int32_t srcLength,
				721	uint32_t options) const;
				722
				723	/**
				724	* Compare two strings case-insensitively using full case folding.
				725	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
				726	*
				727	* @param start The start offset in this string at which the compare operation begins.
				728	* @param length The number of code units from this string to compare.
				729	* @param srcChars A pointer to another string to compare this one to.
				730	* @param options A bit set of options:
				731	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
				732	* Comparison in code unit order with default case folding.
				733	*
				734	* - U_COMPARE_CODE_POINT_ORDER
				735	* Set to choose code point order instead of code unit order
				736	* (see u_strCompare for details).
				737	*
				738	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
				739	*
				740	* @return A negative, zero, or positive integer indicating the comparison result.
				741	* @stable ICU 2.0
				742	*/
				743	inline int8_t caseCompare(int32_t start,
				744	int32_t length,
				745	const UChar *srcChars,
				746	uint32_t options) const;
				747
				748	/**
				749	* Compare two strings case-insensitively using full case folding.
				750	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
				751	*
				752	* @param start The start offset in this string at which the compare operation begins.
				753	* @param length The number of code units from this string to compare.
				754	* @param srcChars A pointer to another string to compare this one to.
				755	* @param srcStart The start offset in that string at which the compare operation begins.
				756	* @param srcLength The number of code units from that string to compare.
				757	* @param options A bit set of options:
				758	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
				759	* Comparison in code unit order with default case folding.
				760	*
				761	* - U_COMPARE_CODE_POINT_ORDER
				762	* Set to choose code point order instead of code unit order
				763	* (see u_strCompare for details).
				764	*
				765	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
				766	*
				767	* @return A negative, zero, or positive integer indicating the comparison result.
				768	* @stable ICU 2.0
				769	*/
				770	inline int8_t caseCompare(int32_t start,
				771	int32_t length,
				772	const UChar *srcChars,
				773	int32_t srcStart,
				774	int32_t srcLength,
				775	uint32_t options) const;
				776
				777	/**
				778	* Compare two strings case-insensitively using full case folding.
				779	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
				780	*
				781	* @param start The start offset in this string at which the compare operation begins.
				782	* @param limit The offset after the last code unit from this string to compare.
				783	* @param srcText Another string to compare this one to.
				784	* @param srcStart The start offset in that string at which the compare operation begins.
				785	* @param srcLimit The offset after the last code unit from that string to compare.
				786	* @param options A bit set of options:
				787	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
				788	* Comparison in code unit order with default case folding.
				789	*
				790	* - U_COMPARE_CODE_POINT_ORDER
				791	* Set to choose code point order instead of code unit order
				792	* (see u_strCompare for details).
				793	*
				794	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
				795	*
				796	* @return A negative, zero, or positive integer indicating the comparison result.
				797	* @stable ICU 2.0
				798	*/
				799	inline int8_t caseCompareBetween(int32_t start,
				800	int32_t limit,
				801	const UnicodeString& srcText,
				802	int32_t srcStart,
				803	int32_t srcLimit,
				804	uint32_t options) const;
				805
				806	/**
				807	* Determine if this starts with the characters in <TT>text</TT>
				808	* @param text The text to match.
				809	* @return TRUE if this starts with the characters in <TT>text</TT>,
				810	* FALSE otherwise
				811	* @stable ICU 2.0
				812	*/
				813	inline UBool startsWith(const UnicodeString& text) const;
				814
				815	/**
				816	* Determine if this starts with the characters in <TT>srcText</TT>
				817	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				818	* @param srcText The text to match.
				819	* @param srcStart the offset into <TT>srcText</TT> to start matching
				820	* @param srcLength the number of characters in <TT>srcText</TT> to match
				821	* @return TRUE if this starts with the characters in <TT>text</TT>,
				822	* FALSE otherwise
				823	* @stable ICU 2.0
				824	*/
				825	inline UBool startsWith(const UnicodeString& srcText,
				826	int32_t srcStart,
				827	int32_t srcLength) const;
				828
				829	/**
				830	* Determine if this starts with the characters in <TT>srcChars</TT>
				831	* @param srcChars The characters to match.
				832	* @param srcLength the number of characters in <TT>srcChars</TT>
				833	* @return TRUE if this starts with the characters in <TT>srcChars</TT>,
				834	* FALSE otherwise
				835	* @stable ICU 2.0
				836	*/
				837	inline UBool startsWith(const UChar *srcChars,
				838	int32_t srcLength) const;
				839
				840	/**
				841	* Determine if this ends with the characters in <TT>srcChars</TT>
				842	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				843	* @param srcChars The characters to match.
				844	* @param srcStart the offset into <TT>srcText</TT> to start matching
				845	* @param srcLength the number of characters in <TT>srcChars</TT> to match
				846	* @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
				847	* @stable ICU 2.0
				848	*/
				849	inline UBool startsWith(const UChar *srcChars,
				850	int32_t srcStart,
				851	int32_t srcLength) const;
				852
				853	/**
				854	* Determine if this ends with the characters in <TT>text</TT>
				855	* @param text The text to match.
				856	* @return TRUE if this ends with the characters in <TT>text</TT>,
				857	* FALSE otherwise
				858	* @stable ICU 2.0
				859	*/
				860	inline UBool endsWith(const UnicodeString& text) const;
				861
				862	/**
				863	* Determine if this ends with the characters in <TT>srcText</TT>
				864	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				865	* @param srcText The text to match.
				866	* @param srcStart the offset into <TT>srcText</TT> to start matching
				867	* @param srcLength the number of characters in <TT>srcText</TT> to match
				868	* @return TRUE if this ends with the characters in <TT>text</TT>,
				869	* FALSE otherwise
				870	* @stable ICU 2.0
				871	*/
				872	inline UBool endsWith(const UnicodeString& srcText,
				873	int32_t srcStart,
				874	int32_t srcLength) const;
				875
				876	/**
				877	* Determine if this ends with the characters in <TT>srcChars</TT>
				878	* @param srcChars The characters to match.
				879	* @param srcLength the number of characters in <TT>srcChars</TT>
				880	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
				881	* FALSE otherwise
				882	* @stable ICU 2.0
				883	*/
				884	inline UBool endsWith(const UChar *srcChars,
				885	int32_t srcLength) const;
				886
				887	/**
				888	* Determine if this ends with the characters in <TT>srcChars</TT>
				889	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				890	* @param srcChars The characters to match.
				891	* @param srcStart the offset into <TT>srcText</TT> to start matching
				892	* @param srcLength the number of characters in <TT>srcChars</TT> to match
				893	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
				894	* FALSE otherwise
				895	* @stable ICU 2.0
				896	*/
				897	inline UBool endsWith(const UChar *srcChars,
				898	int32_t srcStart,
				899	int32_t srcLength) const;
				900
				901
				902	/* Searching - bitwise only */
				903
				904	/**
				905	* Locate in this the first occurrence of the characters in <TT>text</TT>,
				906	* using bitwise comparison.
				907	* @param text The text to search for.
				908	* @return The offset into this of the start of <TT>text</TT>,
				909	* or -1 if not found.
				910	* @stable ICU 2.0
				911	*/
				912	inline int32_t indexOf(const UnicodeString& text) const;
				913
				914	/**
				915	* Locate in this the first occurrence of the characters in <TT>text</TT>
				916	* starting at offset <TT>start</TT>, using bitwise comparison.
				917	* @param text The text to search for.
				918	* @param start The offset at which searching will start.
				919	* @return The offset into this of the start of <TT>text</TT>,
				920	* or -1 if not found.
				921	* @stable ICU 2.0
				922	*/
				923	inline int32_t indexOf(const UnicodeString& text,
				924	int32_t start) const;
				925
				926	/**
				927	* Locate in this the first occurrence in the range
				928	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				929	* in <TT>text</TT>, using bitwise comparison.
				930	* @param text The text to search for.
				931	* @param start The offset at which searching will start.
				932	* @param length The number of characters to search
				933	* @return The offset into this of the start of <TT>text</TT>,
				934	* or -1 if not found.
				935	* @stable ICU 2.0
				936	*/
				937	inline int32_t indexOf(const UnicodeString& text,
				938	int32_t start,
				939	int32_t length) const;
				940
				941	/**
				942	* Locate in this the first occurrence in the range
				943	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				944	* in <TT>srcText</TT> in the range
				945	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
				946	* using bitwise comparison.
				947	* @param srcText The text to search for.
				948	* @param srcStart the offset into <TT>srcText</TT> at which
				949	* to start matching
				950	* @param srcLength the number of characters in <TT>srcText</TT> to match
				951	* @param start the offset into this at which to start matching
				952	* @param length the number of characters in this to search
				953	* @return The offset into this of the start of <TT>text</TT>,
				954	* or -1 if not found.
				955	* @stable ICU 2.0
				956	*/
				957	inline int32_t indexOf(const UnicodeString& srcText,
				958	int32_t srcStart,
				959	int32_t srcLength,
				960	int32_t start,
				961	int32_t length) const;
				962
				963	/**
				964	* Locate in this the first occurrence of the characters in
				965	* <TT>srcChars</TT>
				966	* starting at offset <TT>start</TT>, using bitwise comparison.
				967	* @param srcChars The text to search for.
				968	* @param srcLength the number of characters in <TT>srcChars</TT> to match
				969	* @param start the offset into this at which to start matching
				970	* @return The offset into this of the start of <TT>text</TT>,
				971	* or -1 if not found.
				972	* @stable ICU 2.0
				973	*/
				974	inline int32_t indexOf(const UChar *srcChars,
				975	int32_t srcLength,
				976	int32_t start) const;
				977
				978	/**
				979	* Locate in this the first occurrence in the range
				980	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				981	* in <TT>srcChars</TT>, using bitwise comparison.
				982	* @param srcChars The text to search for.
				983	* @param srcLength the number of characters in <TT>srcChars</TT>
				984	* @param start The offset at which searching will start.
				985	* @param length The number of characters to search
				986	* @return The offset into this of the start of <TT>srcChars</TT>,
				987	* or -1 if not found.
				988	* @stable ICU 2.0
				989	*/
				990	inline int32_t indexOf(const UChar *srcChars,
				991	int32_t srcLength,
				992	int32_t start,
				993	int32_t length) const;
				994
				995	/**
				996	* Locate in this the first occurrence in the range
				997	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				998	* in <TT>srcChars</TT> in the range
				999	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
				1000	* using bitwise comparison.
				1001	* @param srcChars The text to search for.
				1002	* @param srcStart the offset into <TT>srcChars</TT> at which
				1003	* to start matching
				1004	* @param srcLength the number of characters in <TT>srcChars</TT> to match
				1005	* @param start the offset into this at which to start matching
				1006	* @param length the number of characters in this to search
				1007	* @return The offset into this of the start of <TT>text</TT>,
				1008	* or -1 if not found.
				1009	* @stable ICU 2.0
				1010	*/
				1011	int32_t indexOf(const UChar *srcChars,
				1012	int32_t srcStart,
				1013	int32_t srcLength,
				1014	int32_t start,
				1015	int32_t length) const;
				1016
				1017	/**
				1018	* Locate in this the first occurrence of the BMP code point <code>c</code>,
				1019	* using bitwise comparison.
				1020	* @param c The code unit to search for.
				1021	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1022	* @stable ICU 2.0
				1023	*/
				1024	inline int32_t indexOf(UChar c) const;
				1025
				1026	/**
				1027	* Locate in this the first occurrence of the code point <TT>c</TT>,
				1028	* using bitwise comparison.
				1029	*
				1030	* @param c The code point to search for.
				1031	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1032	* @stable ICU 2.0
				1033	*/
				1034	inline int32_t indexOf(UChar32 c) const;
				1035
				1036	/**
				1037	* Locate in this the first occurrence of the BMP code point <code>c</code>,
				1038	* starting at offset <TT>start</TT>, using bitwise comparison.
				1039	* @param c The code unit to search for.
				1040	* @param start The offset at which searching will start.
				1041	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1042	* @stable ICU 2.0
				1043	*/
				1044	inline int32_t indexOf(UChar c,
				1045	int32_t start) const;
				1046
				1047	/**
				1048	* Locate in this the first occurrence of the code point <TT>c</TT>
				1049	* starting at offset <TT>start</TT>, using bitwise comparison.
				1050	*
				1051	* @param c The code point to search for.
				1052	* @param start The offset at which searching will start.
				1053	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1054	* @stable ICU 2.0
				1055	*/
				1056	inline int32_t indexOf(UChar32 c,
				1057	int32_t start) const;
				1058
				1059	/**
				1060	* Locate in this the first occurrence of the BMP code point <code>c</code>
				1061	* in the range [<TT>start</TT>, <TT>start + length</TT>),
				1062	* using bitwise comparison.
				1063	* @param c The code unit to search for.
				1064	* @param start the offset into this at which to start matching
				1065	* @param length the number of characters in this to search
				1066	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1067	* @stable ICU 2.0
				1068	*/
				1069	inline int32_t indexOf(UChar c,
				1070	int32_t start,
				1071	int32_t length) const;
				1072
				1073	/**
				1074	* Locate in this the first occurrence of the code point <TT>c</TT>
				1075	* in the range [<TT>start</TT>, <TT>start + length</TT>),
				1076	* using bitwise comparison.
				1077	*
				1078	* @param c The code point to search for.
				1079	* @param start the offset into this at which to start matching
				1080	* @param length the number of characters in this to search
				1081	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1082	* @stable ICU 2.0
				1083	*/
				1084	inline int32_t indexOf(UChar32 c,
				1085	int32_t start,
				1086	int32_t length) const;
				1087
				1088	/**
				1089	* Locate in this the last occurrence of the characters in <TT>text</TT>,
				1090	* using bitwise comparison.
				1091	* @param text The text to search for.
				1092	* @return The offset into this of the start of <TT>text</TT>,
				1093	* or -1 if not found.
				1094	* @stable ICU 2.0
				1095	*/
				1096	inline int32_t lastIndexOf(const UnicodeString& text) const;
				1097
				1098	/**
				1099	* Locate in this the last occurrence of the characters in <TT>text</TT>
				1100	* starting at offset <TT>start</TT>, using bitwise comparison.
				1101	* @param text The text to search for.
				1102	* @param start The offset at which searching will start.
				1103	* @return The offset into this of the start of <TT>text</TT>,
				1104	* or -1 if not found.
				1105	* @stable ICU 2.0
				1106	*/
				1107	inline int32_t lastIndexOf(const UnicodeString& text,
				1108	int32_t start) const;
				1109
				1110	/**
				1111	* Locate in this the last occurrence in the range
				1112	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				1113	* in <TT>text</TT>, using bitwise comparison.
				1114	* @param text The text to search for.
				1115	* @param start The offset at which searching will start.
				1116	* @param length The number of characters to search
				1117	* @return The offset into this of the start of <TT>text</TT>,
				1118	* or -1 if not found.
				1119	* @stable ICU 2.0
				1120	*/
				1121	inline int32_t lastIndexOf(const UnicodeString& text,
				1122	int32_t start,
				1123	int32_t length) const;
				1124
				1125	/**
				1126	* Locate in this the last occurrence in the range
				1127	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				1128	* in <TT>srcText</TT> in the range
				1129	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
				1130	* using bitwise comparison.
				1131	* @param srcText The text to search for.
				1132	* @param srcStart the offset into <TT>srcText</TT> at which
				1133	* to start matching
				1134	* @param srcLength the number of characters in <TT>srcText</TT> to match
				1135	* @param start the offset into this at which to start matching
				1136	* @param length the number of characters in this to search
				1137	* @return The offset into this of the start of <TT>text</TT>,
				1138	* or -1 if not found.
				1139	* @stable ICU 2.0
				1140	*/
				1141	inline int32_t lastIndexOf(const UnicodeString& srcText,
				1142	int32_t srcStart,
				1143	int32_t srcLength,
				1144	int32_t start,
				1145	int32_t length) const;
				1146
				1147	/**
				1148	* Locate in this the last occurrence of the characters in <TT>srcChars</TT>
				1149	* starting at offset <TT>start</TT>, using bitwise comparison.
				1150	* @param srcChars The text to search for.
				1151	* @param srcLength the number of characters in <TT>srcChars</TT> to match
				1152	* @param start the offset into this at which to start matching
				1153	* @return The offset into this of the start of <TT>text</TT>,
				1154	* or -1 if not found.
				1155	* @stable ICU 2.0
				1156	*/
				1157	inline int32_t lastIndexOf(const UChar *srcChars,
				1158	int32_t srcLength,
				1159	int32_t start) const;
				1160
				1161	/**
				1162	* Locate in this the last occurrence in the range
				1163	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				1164	* in <TT>srcChars</TT>, using bitwise comparison.
				1165	* @param srcChars The text to search for.
				1166	* @param srcLength the number of characters in <TT>srcChars</TT>
				1167	* @param start The offset at which searching will start.
				1168	* @param length The number of characters to search
				1169	* @return The offset into this of the start of <TT>srcChars</TT>,
				1170	* or -1 if not found.
				1171	* @stable ICU 2.0
				1172	*/
				1173	inline int32_t lastIndexOf(const UChar *srcChars,
				1174	int32_t srcLength,
				1175	int32_t start,
				1176	int32_t length) const;
				1177
				1178	/**
				1179	* Locate in this the last occurrence in the range
				1180	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
				1181	* in <TT>srcChars</TT> in the range
				1182	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
				1183	* using bitwise comparison.
				1184	* @param srcChars The text to search for.
				1185	* @param srcStart the offset into <TT>srcChars</TT> at which
				1186	* to start matching
				1187	* @param srcLength the number of characters in <TT>srcChars</TT> to match
				1188	* @param start the offset into this at which to start matching
				1189	* @param length the number of characters in this to search
				1190	* @return The offset into this of the start of <TT>text</TT>,
				1191	* or -1 if not found.
				1192	* @stable ICU 2.0
				1193	*/
				1194	int32_t lastIndexOf(const UChar *srcChars,
				1195	int32_t srcStart,
				1196	int32_t srcLength,
				1197	int32_t start,
				1198	int32_t length) const;
				1199
				1200	/**
				1201	* Locate in this the last occurrence of the BMP code point <code>c</code>,
				1202	* using bitwise comparison.
				1203	* @param c The code unit to search for.
				1204	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1205	* @stable ICU 2.0
				1206	*/
				1207	inline int32_t lastIndexOf(UChar c) const;
				1208
				1209	/**
				1210	* Locate in this the last occurrence of the code point <TT>c</TT>,
				1211	* using bitwise comparison.
				1212	*
				1213	* @param c The code point to search for.
				1214	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1215	* @stable ICU 2.0
				1216	*/
				1217	inline int32_t lastIndexOf(UChar32 c) const;
				1218
				1219	/**
				1220	* Locate in this the last occurrence of the BMP code point <code>c</code>
				1221	* starting at offset <TT>start</TT>, using bitwise comparison.
				1222	* @param c The code unit to search for.
				1223	* @param start The offset at which searching will start.
				1224	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1225	* @stable ICU 2.0
				1226	*/
				1227	inline int32_t lastIndexOf(UChar c,
				1228	int32_t start) const;
				1229
				1230	/**
				1231	* Locate in this the last occurrence of the code point <TT>c</TT>
				1232	* starting at offset <TT>start</TT>, using bitwise comparison.
				1233	*
				1234	* @param c The code point to search for.
				1235	* @param start The offset at which searching will start.
				1236	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1237	* @stable ICU 2.0
				1238	*/
				1239	inline int32_t lastIndexOf(UChar32 c,
				1240	int32_t start) const;
				1241
				1242	/**
				1243	* Locate in this the last occurrence of the BMP code point <code>c</code>
				1244	* in the range [<TT>start</TT>, <TT>start + length</TT>),
				1245	* using bitwise comparison.
				1246	* @param c The code unit to search for.
				1247	* @param start the offset into this at which to start matching
				1248	* @param length the number of characters in this to search
				1249	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1250	* @stable ICU 2.0
				1251	*/
				1252	inline int32_t lastIndexOf(UChar c,
				1253	int32_t start,
				1254	int32_t length) const;
				1255
				1256	/**
				1257	* Locate in this the last occurrence of the code point <TT>c</TT>
				1258	* in the range [<TT>start</TT>, <TT>start + length</TT>),
				1259	* using bitwise comparison.
				1260	*
				1261	* @param c The code point to search for.
				1262	* @param start the offset into this at which to start matching
				1263	* @param length the number of characters in this to search
				1264	* @return The offset into this of <TT>c</TT>, or -1 if not found.
				1265	* @stable ICU 2.0
				1266	*/
				1267	inline int32_t lastIndexOf(UChar32 c,
				1268	int32_t start,
				1269	int32_t length) const;
				1270
				1271
				1272	/* Character access */
				1273
				1274	/**
				1275	* Return the code unit at offset <tt>offset</tt>.
				1276	* If the offset is not valid (0..length()-1) then U+ffff is returned.
				1277	* @param offset a valid offset into the text
				1278	* @return the code unit at offset <tt>offset</tt>
				1279	* or 0xffff if the offset is not valid for this string
				1280	* @stable ICU 2.0
				1281	*/
				1282	inline UChar charAt(int32_t offset) const;
				1283
				1284	/**
				1285	* Return the code unit at offset <tt>offset</tt>.
				1286	* If the offset is not valid (0..length()-1) then U+ffff is returned.
				1287	* @param offset a valid offset into the text
				1288	* @return the code unit at offset <tt>offset</tt>
				1289	* @stable ICU 2.0
				1290	*/
				1291	inline UChar operator[] (int32_t offset) const;
				1292
				1293	/**
				1294	* Return the code point that contains the code unit
				1295	* at offset <tt>offset</tt>.
				1296	* If the offset is not valid (0..length()-1) then U+ffff is returned.
				1297	* @param offset a valid offset into the text
				1298	* that indicates the text offset of any of the code units
				1299	* that will be assembled into a code point (21-bit value) and returned
				1300	* @return the code point of text at <tt>offset</tt>
				1301	* or 0xffff if the offset is not valid for this string
				1302	* @stable ICU 2.0
				1303	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	1304	UChar32 char32At(int32_t offset) const;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1305
				1306	/**
				1307	* Adjust a random-access offset so that
				1308	* it points to the beginning of a Unicode character.
				1309	* The offset that is passed in points to
				1310	* any code unit of a code point,
				1311	* while the returned offset will point to the first code unit
				1312	* of the same code point.
				1313	* In UTF-16, if the input offset points to a second surrogate
				1314	* of a surrogate pair, then the returned offset will point
				1315	* to the first surrogate.
				1316	* @param offset a valid offset into one code point of the text
				1317	* @return offset of the first code unit of the same code point
				1318	* @see U16_SET_CP_START
				1319	* @stable ICU 2.0
				1320	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	1321	int32_t getChar32Start(int32_t offset) const;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1322
				1323	/**
				1324	* Adjust a random-access offset so that
				1325	* it points behind a Unicode character.
				1326	* The offset that is passed in points behind
				1327	* any code unit of a code point,
				1328	* while the returned offset will point behind the last code unit
				1329	* of the same code point.
				1330	* In UTF-16, if the input offset points behind the first surrogate
				1331	* (i.e., to the second surrogate)
				1332	* of a surrogate pair, then the returned offset will point
				1333	* behind the second surrogate (i.e., to the first surrogate).
				1334	* @param offset a valid offset after any code unit of a code point of the text
				1335	* @return offset of the first code unit after the same code point
				1336	* @see U16_SET_CP_LIMIT
				1337	* @stable ICU 2.0
				1338	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	1339	int32_t getChar32Limit(int32_t offset) const;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1340
				1341	/**
				1342	* Move the code unit index along the string by delta code points.
				1343	* Interpret the input index as a code unit-based offset into the string,
				1344	* move the index forward or backward by delta code points, and
				1345	* return the resulting index.
				1346	* The input index should point to the first code unit of a code point,
				1347	* if there is more than one.
				1348	*
				1349	* Both input and output indexes are code unit-based as for all
				1350	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
				1351	* If delta<0 then the index is moved backward (toward the start of the string).
				1352	* If delta>0 then the index is moved forward (toward the end of the string).
				1353	*
				1354	* This behaves like CharacterIterator::move32(delta, kCurrent).
				1355	*
				1356	* Behavior for out-of-bounds indexes:
				1357	* <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
				1358	* if the input index<0 then it is pinned to 0;
				1359	* if it is index>length() then it is pinned to length().
				1360	* Afterwards, the index is moved by <code>delta</code> code points
				1361	* forward or backward,
				1362	* but no further backward than to 0 and no further forward than to length().
				1363	* The resulting index return value will be in between 0 and length(), inclusively.
				1364	*
				1365	* Examples:
				1366	* <pre>
				1367	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
				1368	* UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
				1369	*
				1370	* // initial index: position of U+10000
				1371	* int32_t index=1;
				1372	*
				1373	* // the following examples will all result in index==4, position of U+10ffff
				1374	*
				1375	* // skip 2 code points from some position in the string
				1376	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
				1377	*
				1378	* // go to the 3rd code point from the start of s (0-based)
				1379	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
				1380	*
				1381	* // go to the next-to-last code point of s
				1382	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
				1383	* </pre>
				1384	*
				1385	* @param index input code unit index
				1386	* @param delta (signed) code point count to move the index forward or backward
				1387	* in the string
				1388	* @return the resulting code unit index
				1389	* @stable ICU 2.0
				1390	*/
				1391	int32_t moveIndex32(int32_t index, int32_t delta) const;
				1392
				1393	/* Substring extraction */
				1394
				1395	/**
				1396	* Copy the characters in the range
				1397	* [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
				1398	* beginning at <tt>dstStart</tt>.
				1399	* If the string aliases to <code>dst</code> itself as an external buffer,
				1400	* then extract() will not copy the contents.
				1401	*
				1402	* @param start offset of first character which will be copied into the array
				1403	* @param length the number of characters to extract
				1404	* @param dst array in which to copy characters. The length of <tt>dst</tt>
				1405	* must be at least (<tt>dstStart + length</tt>).
				1406	* @param dstStart the offset in <TT>dst</TT> where the first character
				1407	* will be extracted
				1408	* @stable ICU 2.0
				1409	*/
				1410	inline void extract(int32_t start,
				1411	int32_t length,
				1412	UChar *dst,
				1413	int32_t dstStart = 0) const;
				1414
				1415	/**
				1416	* Copy the contents of the string into dest.
				1417	* This is a convenience function that
				1418	* checks if there is enough space in dest,
				1419	* extracts the entire string if possible,
				1420	* and NUL-terminates dest if possible.
				1421	*
				1422	* If the string fits into dest but cannot be NUL-terminated
				1423	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
				1424	* If the string itself does not fit into dest
				1425	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
				1426	*
				1427	* If the string aliases to <code>dest</code> itself as an external buffer,
				1428	* then extract() will not copy the contents.
				1429	*
				1430	* @param dest Destination string buffer.
				1431	* @param destCapacity Number of UChars available at dest.
				1432	* @param errorCode ICU error code.
				1433	* @return length()
				1434	* @stable ICU 2.0
				1435	*/
				1436	int32_t
				1437	extract(UChar *dest, int32_t destCapacity,
				1438	UErrorCode &errorCode) const;
				1439
				1440	/**
				1441	* Copy the characters in the range
				1442	* [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
				1443	* <tt>target</tt>.
				1444	* @param start offset of first character which will be copied
				1445	* @param length the number of characters to extract
				1446	* @param target UnicodeString into which to copy characters.
				1447	* @return A reference to <TT>target</TT>
				1448	* @stable ICU 2.0
				1449	*/
				1450	inline void extract(int32_t start,
				1451	int32_t length,
				1452	UnicodeString& target) const;
				1453
				1454	/**
				1455	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
				1456	* into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
				1457	* @param start offset of first character which will be copied into the array
				1458	* @param limit offset immediately following the last character to be copied
				1459	* @param dst array in which to copy characters. The length of <tt>dst</tt>
				1460	* must be at least (<tt>dstStart + (limit - start)</tt>).
				1461	* @param dstStart the offset in <TT>dst</TT> where the first character
				1462	* will be extracted
				1463	* @stable ICU 2.0
				1464	*/
				1465	inline void extractBetween(int32_t start,
				1466	int32_t limit,
				1467	UChar *dst,
				1468	int32_t dstStart = 0) const;
				1469
				1470	/**
				1471	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
				1472	* into the UnicodeString <tt>target</tt>. Replaceable API.
				1473	* @param start offset of first character which will be copied
				1474	* @param limit offset immediately following the last character to be copied
				1475	* @param target UnicodeString into which to copy characters.
				1476	* @return A reference to <TT>target</TT>
				1477	* @stable ICU 2.0
				1478	*/
				1479	virtual void extractBetween(int32_t start,
				1480	int32_t limit,
				1481	UnicodeString& target) const;
				1482
				1483	/**
				1484	* Copy the characters in the range
				1485	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
				1486	* All characters must be invariant (see utypes.h).
				1487	* Use US_INV as the last, signature-distinguishing parameter.
				1488	*
				1489	* This function does not write any more than <code>targetLength</code>
				1490	* characters but returns the length of the entire output string
				1491	* so that one can allocate a larger buffer and call the function again
				1492	* if necessary.
				1493	* The output string is NUL-terminated if possible.
				1494	*
				1495	* @param start offset of first character which will be copied
				1496	* @param startLength the number of characters to extract
				1497	* @param target the target buffer for extraction, can be NULL
				1498	* if targetLength is 0
				1499	* @param targetCapacity the length of the target buffer
				1500	* @param inv Signature-distinguishing paramater, use US_INV.
				1501	* @return the output string length, not including the terminating NUL
				1502	* @stable ICU 3.2
				1503	*/
				1504	int32_t extract(int32_t start,
				1505	int32_t startLength,
				1506	char *target,
				1507	int32_t targetCapacity,
				1508	enum EInvariant inv) const;
				1509
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1510	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
				1511
				1512	/**
				1513	* Copy the characters in the range
				1514	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
				1515	* in the platform's default codepage.
				1516	* This function does not write any more than <code>targetLength</code>
				1517	* characters but returns the length of the entire output string
				1518	* so that one can allocate a larger buffer and call the function again
				1519	* if necessary.
				1520	* The output string is NUL-terminated if possible.
				1521	*
				1522	* @param start offset of first character which will be copied
				1523	* @param startLength the number of characters to extract
				1524	* @param target the target buffer for extraction
				1525	* @param targetLength the length of the target buffer
				1526	* If <TT>target</TT> is NULL, then the number of bytes required for
				1527	* <TT>target</TT> is returned.
				1528	* @return the output string length, not including the terminating NUL
				1529	* @stable ICU 2.0
				1530	*/
				1531	int32_t extract(int32_t start,
				1532	int32_t startLength,
				1533	char *target,
				1534	uint32_t targetLength) const;
				1535
				1536	#endif
				1537
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1538	#if !UCONFIG_NO_CONVERSION
				1539
				1540	/**
				1541	* Copy the characters in the range
				1542	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
				1543	* in a specified codepage.
				1544	* The output string is NUL-terminated.
				1545	*
				1546	* Recommendation: For invariant-character strings use
				1547	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
				1548	* because it avoids object code dependencies of UnicodeString on
				1549	* the conversion code.
				1550	*
				1551	* @param start offset of first character which will be copied
				1552	* @param startLength the number of characters to extract
				1553	* @param target the target buffer for extraction
				1554	* @param codepage the desired codepage for the characters. 0 has
				1555	* the special meaning of the default codepage
				1556	* If <code>codepage</code> is an empty string (<code>""</code>),
				1557	* then a simple conversion is performed on the codepage-invariant
				1558	* subset ("invariant characters") of the platform encoding. See utypes.h.
				1559	* If <TT>target</TT> is NULL, then the number of bytes required for
				1560	* <TT>target</TT> is returned. It is assumed that the target is big enough
				1561	* to fit all of the characters.
				1562	* @return the output string length, not including the terminating NUL
				1563	* @stable ICU 2.0
				1564	*/
				1565	inline int32_t extract(int32_t start,
				1566	int32_t startLength,
				1567	char *target,
				1568	const char *codepage = 0) const;
				1569
				1570	/**
				1571	* Copy the characters in the range
				1572	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
				1573	* in a specified codepage.
				1574	* This function does not write any more than <code>targetLength</code>
				1575	* characters but returns the length of the entire output string
				1576	* so that one can allocate a larger buffer and call the function again
				1577	* if necessary.
				1578	* The output string is NUL-terminated if possible.
				1579	*
				1580	* Recommendation: For invariant-character strings use
				1581	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
				1582	* because it avoids object code dependencies of UnicodeString on
				1583	* the conversion code.
				1584	*
				1585	* @param start offset of first character which will be copied
				1586	* @param startLength the number of characters to extract
				1587	* @param target the target buffer for extraction
				1588	* @param targetLength the length of the target buffer
				1589	* @param codepage the desired codepage for the characters. 0 has
				1590	* the special meaning of the default codepage
				1591	* If <code>codepage</code> is an empty string (<code>""</code>),
				1592	* then a simple conversion is performed on the codepage-invariant
				1593	* subset ("invariant characters") of the platform encoding. See utypes.h.
				1594	* If <TT>target</TT> is NULL, then the number of bytes required for
				1595	* <TT>target</TT> is returned.
				1596	* @return the output string length, not including the terminating NUL
				1597	* @stable ICU 2.0
				1598	*/
				1599	int32_t extract(int32_t start,
				1600	int32_t startLength,
				1601	char *target,
				1602	uint32_t targetLength,
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1603	const char *codepage) const;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1604
				1605	/**
				1606	* Convert the UnicodeString into a codepage string using an existing UConverter.
				1607	* The output string is NUL-terminated if possible.
				1608	*
				1609	* This function avoids the overhead of opening and closing a converter if
				1610	* multiple strings are extracted.
				1611	*
				1612	* @param dest destination string buffer, can be NULL if destCapacity==0
				1613	* @param destCapacity the number of chars available at dest
				1614	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
				1615	* or NULL for the default converter
				1616	* @param errorCode normal ICU error code
				1617	* @return the length of the output string, not counting the terminating NUL;
				1618	* if the length is greater than destCapacity, then the string will not fit
				1619	* and a buffer of the indicated length would need to be passed in
				1620	* @stable ICU 2.0
				1621	*/
				1622	int32_t extract(char *dest, int32_t destCapacity,
				1623	UConverter *cnv,
				1624	UErrorCode &errorCode) const;
				1625
				1626	#endif
				1627
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1628	/**
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1629	* Create a temporary substring for the specified range.
				1630	* Unlike the substring constructor and setTo() functions,
				1631	* the object returned here will be a read-only alias (using getBuffer())
				1632	* rather than copying the text.
				1633	* As a result, this substring operation is much faster but requires
				1634	* that the original string not be modified or deleted during the lifetime
				1635	* of the returned substring object.
				1636	* @param start offset of the first character visible in the substring
				1637	* @param length length of the substring
				1638	* @return a read-only alias UnicodeString object for the substring
claireho	27f6547	2011-06-09 11:11:49 -0700	[diff] [blame]	1639	* @stable ICU 4.4
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1640	*/
				1641	UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
				1642
				1643	/**
				1644	* Create a temporary substring for the specified range.
				1645	* Same as tempSubString(start, length) except that the substring range
				1646	* is specified as a (start, limit) pair (with an exclusive limit index)
				1647	* rather than a (start, length) pair.
				1648	* @param start offset of the first character visible in the substring
				1649	* @param limit offset immediately following the last character visible in the substring
				1650	* @return a read-only alias UnicodeString object for the substring
claireho	27f6547	2011-06-09 11:11:49 -0700	[diff] [blame]	1651	* @stable ICU 4.4
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1652	*/
				1653	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
				1654
				1655	/**
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1656	* Convert the UnicodeString to UTF-8 and write the result
				1657	* to a ByteSink. This is called by toUTF8String().
				1658	* Unpaired surrogates are replaced with U+FFFD.
				1659	* Calls u_strToUTF8WithSub().
				1660	*
				1661	* @param sink A ByteSink to which the UTF-8 version of the string is written.
claireho	27f6547	2011-06-09 11:11:49 -0700	[diff] [blame]	1662	* sink.Flush() is called at the end.
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1663	* @stable ICU 4.2
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1664	* @see toUTF8String
				1665	*/
				1666	void toUTF8(ByteSink &sink) const;
				1667
				1668	#if U_HAVE_STD_STRING
				1669
				1670	/**
				1671	* Convert the UnicodeString to UTF-8 and append the result
				1672	* to a standard string.
				1673	* Unpaired surrogates are replaced with U+FFFD.
				1674	* Calls toUTF8().
				1675	*
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1676	* @param result A standard string (or a compatible object)
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1677	* to which the UTF-8 version of the string is appended.
				1678	* @return The string object.
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1679	* @stable ICU 4.2
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1680	* @see toUTF8
				1681	*/
				1682	template<typename StringClass>
				1683	StringClass &toUTF8String(StringClass &result) const {
				1684	StringByteSink<StringClass> sbs(&result);
				1685	toUTF8(sbs);
				1686	return result;
				1687	}
				1688
				1689	#endif
				1690
				1691	/**
				1692	* Convert the UnicodeString to UTF-32.
				1693	* Unpaired surrogates are replaced with U+FFFD.
				1694	* Calls u_strToUTF32WithSub().
				1695	*
				1696	* @param utf32 destination string buffer, can be NULL if capacity==0
				1697	* @param capacity the number of UChar32s available at utf32
				1698	* @param errorCode Standard ICU error code. Its input value must
				1699	* pass the U_SUCCESS() test, or else the function returns
				1700	* immediately. Check for U_FAILURE() on output or use with
				1701	* function chaining. (See User Guide for details.)
				1702	* @return The length of the UTF-32 string.
				1703	* @see fromUTF32
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1704	* @stable ICU 4.2
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	1705	*/
				1706	int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
				1707
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1708	/* Length operations */
				1709
				1710	/**
				1711	* Return the length of the UnicodeString object.
				1712	* The length is the number of UChar code units are in the UnicodeString.
				1713	* If you want the number of code points, please use countChar32().
				1714	* @return the length of the UnicodeString object
				1715	* @see countChar32
				1716	* @stable ICU 2.0
				1717	*/
				1718	inline int32_t length(void) const;
				1719
				1720	/**
				1721	* Count Unicode code points in the length UChar code units of the string.
				1722	* A code point may occupy either one or two UChar code units.
				1723	* Counting code points involves reading all code units.
				1724	*
				1725	* This functions is basically the inverse of moveIndex32().
				1726	*
				1727	* @param start the index of the first code unit to check
				1728	* @param length the number of UChar code units to check
				1729	* @return the number of code points in the specified code units
				1730	* @see length
				1731	* @stable ICU 2.0
				1732	*/
				1733	int32_t
				1734	countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
				1735
				1736	/**
				1737	* Check if the length UChar code units of the string
				1738	* contain more Unicode code points than a certain number.
				1739	* This is more efficient than counting all code points in this part of the string
				1740	* and comparing that number with a threshold.
				1741	* This function may not need to scan the string at all if the length
				1742	* falls within a certain range, and
				1743	* never needs to count more than 'number+1' code points.
				1744	* Logically equivalent to (countChar32(start, length)>number).
				1745	* A Unicode code point may occupy either one or two UChar code units.
				1746	*
				1747	* @param start the index of the first code unit to check (0 for the entire string)
				1748	* @param length the number of UChar code units to check
				1749	* (use INT32_MAX for the entire string; remember that start/length
				1750	* values are pinned)
				1751	* @param number The number of code points in the (sub)string is compared against
				1752	* the 'number' parameter.
				1753	* @return Boolean value for whether the string contains more Unicode code points
				1754	* than 'number'. Same as (u_countChar32(s, length)>number).
				1755	* @see countChar32
				1756	* @see u_strHasMoreChar32Than
				1757	* @stable ICU 2.4
				1758	*/
				1759	UBool
				1760	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
				1761
				1762	/**
				1763	* Determine if this string is empty.
				1764	* @return TRUE if this string contains 0 characters, FALSE otherwise.
				1765	* @stable ICU 2.0
				1766	*/
				1767	inline UBool isEmpty(void) const;
				1768
				1769	/**
				1770	* Return the capacity of the internal buffer of the UnicodeString object.
				1771	* This is useful together with the getBuffer functions.
				1772	* See there for details.
				1773	*
				1774	* @return the number of UChars available in the internal buffer
				1775	* @see getBuffer
				1776	* @stable ICU 2.0
				1777	*/
				1778	inline int32_t getCapacity(void) const;
				1779
				1780	/* Other operations */
				1781
				1782	/**
				1783	* Generate a hash code for this object.
				1784	* @return The hash code of this UnicodeString.
				1785	* @stable ICU 2.0
				1786	*/
				1787	inline int32_t hashCode(void) const;
				1788
				1789	/**
				1790	* Determine if this object contains a valid string.
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	1791	* A bogus string has no value. It is different from an empty string,
				1792	* although in both cases isEmpty() returns TRUE and length() returns 0.
				1793	* setToBogus() and isBogus() can be used to indicate that no string value is available.
				1794	* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1795	* length() returns 0.
				1796	*
				1797	* @return TRUE if the string is valid, FALSE otherwise
				1798	* @see setToBogus()
				1799	* @stable ICU 2.0
				1800	*/
				1801	inline UBool isBogus(void) const;
				1802
				1803
				1804	//========================================
				1805	// Write operations
				1806	//========================================
				1807
				1808	/* Assignment operations */
				1809
				1810	/**
				1811	* Assignment operator. Replace the characters in this UnicodeString
				1812	* with the characters from <TT>srcText</TT>.
				1813	* @param srcText The text containing the characters to replace
				1814	* @return a reference to this
				1815	* @stable ICU 2.0
				1816	*/
				1817	UnicodeString &operator=(const UnicodeString &srcText);
				1818
				1819	/**
				1820	* Almost the same as the assignment operator.
				1821	* Replace the characters in this UnicodeString
				1822	* with the characters from <code>srcText</code>.
				1823	*
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	1824	* This function works the same as the assignment operator
				1825	* for all strings except for ones that are readonly aliases.
				1826	*
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1827	* Starting with ICU 2.4, the assignment operator and the copy constructor
				1828	* allocate a new buffer and copy the buffer contents even for readonly aliases.
				1829	* This function implements the old, more efficient but less safe behavior
				1830	* of making this string also a readonly alias to the same buffer.
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	1831	*
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1832	* The fastCopyFrom function must be used only if it is known that the lifetime of
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	1833	* this UnicodeString does not exceed the lifetime of the aliased buffer
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1834	* including its contents, for example for strings from resource bundles
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	1835	* or aliases to string constants.
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1836	*
				1837	* @param src The text containing the characters to replace.
				1838	* @return a reference to this
				1839	* @stable ICU 2.4
				1840	*/
				1841	UnicodeString &fastCopyFrom(const UnicodeString &src);
				1842
				1843	/**
				1844	* Assignment operator. Replace the characters in this UnicodeString
				1845	* with the code unit <TT>ch</TT>.
				1846	* @param ch the code unit to replace
				1847	* @return a reference to this
				1848	* @stable ICU 2.0
				1849	*/
				1850	inline UnicodeString& operator= (UChar ch);
				1851
				1852	/**
				1853	* Assignment operator. Replace the characters in this UnicodeString
				1854	* with the code point <TT>ch</TT>.
				1855	* @param ch the code point to replace
				1856	* @return a reference to this
				1857	* @stable ICU 2.0
				1858	*/
				1859	inline UnicodeString& operator= (UChar32 ch);
				1860
				1861	/**
				1862	* Set the text in the UnicodeString object to the characters
				1863	* in <TT>srcText</TT> in the range
				1864	* [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
				1865	* <TT>srcText</TT> is not modified.
				1866	* @param srcText the source for the new characters
				1867	* @param srcStart the offset into <TT>srcText</TT> where new characters
				1868	* will be obtained
				1869	* @return a reference to this
				1870	* @stable ICU 2.2
				1871	*/
				1872	inline UnicodeString& setTo(const UnicodeString& srcText,
				1873	int32_t srcStart);
				1874
				1875	/**
				1876	* Set the text in the UnicodeString object to the characters
				1877	* in <TT>srcText</TT> in the range
				1878	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				1879	* <TT>srcText</TT> is not modified.
				1880	* @param srcText the source for the new characters
				1881	* @param srcStart the offset into <TT>srcText</TT> where new characters
				1882	* will be obtained
				1883	* @param srcLength the number of characters in <TT>srcText</TT> in the
				1884	* replace string.
				1885	* @return a reference to this
				1886	* @stable ICU 2.0
				1887	*/
				1888	inline UnicodeString& setTo(const UnicodeString& srcText,
				1889	int32_t srcStart,
				1890	int32_t srcLength);
				1891
				1892	/**
				1893	* Set the text in the UnicodeString object to the characters in
				1894	* <TT>srcText</TT>.
				1895	* <TT>srcText</TT> is not modified.
				1896	* @param srcText the source for the new characters
				1897	* @return a reference to this
				1898	* @stable ICU 2.0
				1899	*/
				1900	inline UnicodeString& setTo(const UnicodeString& srcText);
				1901
				1902	/**
				1903	* Set the characters in the UnicodeString object to the characters
				1904	* in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
				1905	* @param srcChars the source for the new characters
				1906	* @param srcLength the number of Unicode characters in srcChars.
				1907	* @return a reference to this
				1908	* @stable ICU 2.0
				1909	*/
				1910	inline UnicodeString& setTo(const UChar *srcChars,
				1911	int32_t srcLength);
				1912
				1913	/**
				1914	* Set the characters in the UnicodeString object to the code unit
				1915	* <TT>srcChar</TT>.
				1916	* @param srcChar the code unit which becomes the UnicodeString's character
				1917	* content
				1918	* @return a reference to this
				1919	* @stable ICU 2.0
				1920	*/
				1921	UnicodeString& setTo(UChar srcChar);
				1922
				1923	/**
				1924	* Set the characters in the UnicodeString object to the code point
				1925	* <TT>srcChar</TT>.
				1926	* @param srcChar the code point which becomes the UnicodeString's character
				1927	* content
				1928	* @return a reference to this
				1929	* @stable ICU 2.0
				1930	*/
				1931	UnicodeString& setTo(UChar32 srcChar);
				1932
				1933	/**
				1934	* Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
				1935	* The text will be used for the UnicodeString object, but
				1936	* it will not be released when the UnicodeString is destroyed.
				1937	* This has copy-on-write semantics:
				1938	* When the string is modified, then the buffer is first copied into
				1939	* newly allocated memory.
				1940	* The aliased buffer is never modified.
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	1941	*
				1942	* In an assignment to another UnicodeString, when using the copy constructor
				1943	* or the assignment operator, the text will be copied.
				1944	* When using fastCopyFrom(), the text will be aliased again,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	1945	* so that both strings then alias the same readonly-text.
				1946	*
				1947	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
				1948	* This must be true if <code>textLength==-1</code>.
				1949	* @param text The characters to alias for the UnicodeString.
				1950	* @param textLength The number of Unicode characters in <code>text</code> to alias.
				1951	* If -1, then this constructor will determine the length
				1952	* by calling <code>u_strlen()</code>.
				1953	* @return a reference to this
				1954	* @stable ICU 2.0
				1955	*/
				1956	UnicodeString &setTo(UBool isTerminated,
				1957	const UChar *text,
				1958	int32_t textLength);
				1959
				1960	/**
				1961	* Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
				1962	* The text will be used for the UnicodeString object, but
				1963	* it will not be released when the UnicodeString is destroyed.
				1964	* This has write-through semantics:
				1965	* For as long as the capacity of the buffer is sufficient, write operations
				1966	* will directly affect the buffer. When more capacity is necessary, then
				1967	* a new buffer will be allocated and the contents copied as with regularly
				1968	* constructed strings.
				1969	* In an assignment to another UnicodeString, the buffer will be copied.
				1970	* The extract(UChar *dst) function detects whether the dst pointer is the same
				1971	* as the string buffer itself and will in this case not copy the contents.
				1972	*
				1973	* @param buffer The characters to alias for the UnicodeString.
				1974	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
				1975	* @param buffCapacity The size of <code>buffer</code> in UChars.
				1976	* @return a reference to this
				1977	* @stable ICU 2.0
				1978	*/
				1979	UnicodeString &setTo(UChar *buffer,
				1980	int32_t buffLength,
				1981	int32_t buffCapacity);
				1982
				1983	/**
				1984	* Make this UnicodeString object invalid.
				1985	* The string will test TRUE with isBogus().
				1986	*
				1987	* A bogus string has no value. It is different from an empty string.
				1988	* It can be used to indicate that no string value is available.
				1989	* getBuffer() and getTerminatedBuffer() return NULL, and
				1990	* length() returns 0.
				1991	*
				1992	* This utility function is used throughout the UnicodeString
				1993	* implementation to indicate that a UnicodeString operation failed,
				1994	* and may be used in other functions,
				1995	* especially but not exclusively when such functions do not
				1996	* take a UErrorCode for simplicity.
				1997	*
				1998	* The following methods, and no others, will clear a string object's bogus flag:
				1999	* - remove()
				2000	* - remove(0, INT32_MAX)
				2001	* - truncate(0)
				2002	* - operator=() (assignment operator)
				2003	* - setTo(...)
				2004	*
				2005	* The simplest ways to turn a bogus string into an empty one
				2006	* is to use the remove() function.
				2007	* Examples for other functions that are equivalent to "set to empty string":
				2008	* \code
				2009	* if(s.isBogus()) {
				2010	* s.remove(); // set to an empty string (remove all), or
				2011	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
				2012	* s.truncate(0); // set to an empty string (complete truncation), or
				2013	* s=UnicodeString(); // assign an empty string, or
				2014	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
				2015	* static const UChar nul=0;
				2016	* s.setTo(&nul, 0); // set to an empty C Unicode string
				2017	* }
				2018	* \endcode
				2019	*
				2020	* @see isBogus()
				2021	* @stable ICU 2.0
				2022	*/
				2023	void setToBogus();
				2024
				2025	/**
				2026	* Set the character at the specified offset to the specified character.
				2027	* @param offset A valid offset into the text of the character to set
				2028	* @param ch The new character
				2029	* @return A reference to this
				2030	* @stable ICU 2.0
				2031	*/
				2032	UnicodeString& setCharAt(int32_t offset,
				2033	UChar ch);
				2034
				2035
				2036	/* Append operations */
				2037
				2038	/**
				2039	* Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
				2040	* object.
				2041	* @param ch the code unit to be appended
				2042	* @return a reference to this
				2043	* @stable ICU 2.0
				2044	*/
				2045	inline UnicodeString& operator+= (UChar ch);
				2046
				2047	/**
				2048	* Append operator. Append the code point <TT>ch</TT> to the UnicodeString
				2049	* object.
				2050	* @param ch the code point to be appended
				2051	* @return a reference to this
				2052	* @stable ICU 2.0
				2053	*/
				2054	inline UnicodeString& operator+= (UChar32 ch);
				2055
				2056	/**
				2057	* Append operator. Append the characters in <TT>srcText</TT> to the
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	2058	* UnicodeString object. <TT>srcText</TT> is not modified.
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2059	* @param srcText the source for the new characters
				2060	* @return a reference to this
				2061	* @stable ICU 2.0
				2062	*/
				2063	inline UnicodeString& operator+= (const UnicodeString& srcText);
				2064
				2065	/**
				2066	* Append the characters
				2067	* in <TT>srcText</TT> in the range
				2068	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
				2069	* UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
				2070	* is not modified.
				2071	* @param srcText the source for the new characters
				2072	* @param srcStart the offset into <TT>srcText</TT> where new characters
				2073	* will be obtained
				2074	* @param srcLength the number of characters in <TT>srcText</TT> in
				2075	* the append string
				2076	* @return a reference to this
				2077	* @stable ICU 2.0
				2078	*/
				2079	inline UnicodeString& append(const UnicodeString& srcText,
				2080	int32_t srcStart,
				2081	int32_t srcLength);
				2082
				2083	/**
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	2084	* Append the characters in <TT>srcText</TT> to the UnicodeString object.
				2085	* <TT>srcText</TT> is not modified.
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2086	* @param srcText the source for the new characters
				2087	* @return a reference to this
				2088	* @stable ICU 2.0
				2089	*/
				2090	inline UnicodeString& append(const UnicodeString& srcText);
				2091
				2092	/**
				2093	* Append the characters in <TT>srcChars</TT> in the range
				2094	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
				2095	* object at offset
				2096	* <TT>start</TT>. <TT>srcChars</TT> is not modified.
				2097	* @param srcChars the source for the new characters
				2098	* @param srcStart the offset into <TT>srcChars</TT> where new characters
				2099	* will be obtained
				2100	* @param srcLength the number of characters in <TT>srcChars</TT> in
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	2101	* the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2102	* @return a reference to this
				2103	* @stable ICU 2.0
				2104	*/
				2105	inline UnicodeString& append(const UChar *srcChars,
				2106	int32_t srcStart,
				2107	int32_t srcLength);
				2108
				2109	/**
				2110	* Append the characters in <TT>srcChars</TT> to the UnicodeString object
				2111	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
				2112	* @param srcChars the source for the new characters
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	2113	* @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
				2114	* can be -1 if <TT>srcChars</TT> is NUL-terminated
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2115	* @return a reference to this
				2116	* @stable ICU 2.0
				2117	*/
				2118	inline UnicodeString& append(const UChar *srcChars,
				2119	int32_t srcLength);
				2120
				2121	/**
				2122	* Append the code unit <TT>srcChar</TT> to the UnicodeString object.
				2123	* @param srcChar the code unit to append
				2124	* @return a reference to this
				2125	* @stable ICU 2.0
				2126	*/
				2127	inline UnicodeString& append(UChar srcChar);
				2128
				2129	/**
				2130	* Append the code point <TT>srcChar</TT> to the UnicodeString object.
				2131	* @param srcChar the code point to append
				2132	* @return a reference to this
				2133	* @stable ICU 2.0
				2134	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2135	UnicodeString& append(UChar32 srcChar);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2136
				2137
				2138	/* Insert operations */
				2139
				2140	/**
				2141	* Insert the characters in <TT>srcText</TT> in the range
				2142	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
				2143	* object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
				2144	* @param start the offset where the insertion begins
				2145	* @param srcText the source for the new characters
				2146	* @param srcStart the offset into <TT>srcText</TT> where new characters
				2147	* will be obtained
				2148	* @param srcLength the number of characters in <TT>srcText</TT> in
				2149	* the insert string
				2150	* @return a reference to this
				2151	* @stable ICU 2.0
				2152	*/
				2153	inline UnicodeString& insert(int32_t start,
				2154	const UnicodeString& srcText,
				2155	int32_t srcStart,
				2156	int32_t srcLength);
				2157
				2158	/**
				2159	* Insert the characters in <TT>srcText</TT> into the UnicodeString object
				2160	* at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
				2161	* @param start the offset where the insertion begins
				2162	* @param srcText the source for the new characters
				2163	* @return a reference to this
				2164	* @stable ICU 2.0
				2165	*/
				2166	inline UnicodeString& insert(int32_t start,
				2167	const UnicodeString& srcText);
				2168
				2169	/**
				2170	* Insert the characters in <TT>srcChars</TT> in the range
				2171	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
				2172	* object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
				2173	* @param start the offset at which the insertion begins
				2174	* @param srcChars the source for the new characters
				2175	* @param srcStart the offset into <TT>srcChars</TT> where new characters
				2176	* will be obtained
				2177	* @param srcLength the number of characters in <TT>srcChars</TT>
				2178	* in the insert string
				2179	* @return a reference to this
				2180	* @stable ICU 2.0
				2181	*/
				2182	inline UnicodeString& insert(int32_t start,
				2183	const UChar *srcChars,
				2184	int32_t srcStart,
				2185	int32_t srcLength);
				2186
				2187	/**
				2188	* Insert the characters in <TT>srcChars</TT> into the UnicodeString object
				2189	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
				2190	* @param start the offset where the insertion begins
				2191	* @param srcChars the source for the new characters
				2192	* @param srcLength the number of Unicode characters in srcChars.
				2193	* @return a reference to this
				2194	* @stable ICU 2.0
				2195	*/
				2196	inline UnicodeString& insert(int32_t start,
				2197	const UChar *srcChars,
				2198	int32_t srcLength);
				2199
				2200	/**
				2201	* Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
				2202	* offset <TT>start</TT>.
				2203	* @param start the offset at which the insertion occurs
				2204	* @param srcChar the code unit to insert
				2205	* @return a reference to this
				2206	* @stable ICU 2.0
				2207	*/
				2208	inline UnicodeString& insert(int32_t start,
				2209	UChar srcChar);
				2210
				2211	/**
				2212	* Insert the code point <TT>srcChar</TT> into the UnicodeString object at
				2213	* offset <TT>start</TT>.
				2214	* @param start the offset at which the insertion occurs
				2215	* @param srcChar the code point to insert
				2216	* @return a reference to this
				2217	* @stable ICU 2.0
				2218	*/
				2219	inline UnicodeString& insert(int32_t start,
				2220	UChar32 srcChar);
				2221
				2222
				2223	/* Replace operations */
				2224
				2225	/**
				2226	* Replace the characters in the range
				2227	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
				2228	* <TT>srcText</TT> in the range
				2229	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
				2230	* <TT>srcText</TT> is not modified.
				2231	* @param start the offset at which the replace operation begins
				2232	* @param length the number of characters to replace. The character at
				2233	* <TT>start + length</TT> is not modified.
				2234	* @param srcText the source for the new characters
				2235	* @param srcStart the offset into <TT>srcText</TT> where new characters
				2236	* will be obtained
				2237	* @param srcLength the number of characters in <TT>srcText</TT> in
				2238	* the replace string
				2239	* @return a reference to this
				2240	* @stable ICU 2.0
				2241	*/
				2242	UnicodeString& replace(int32_t start,
				2243	int32_t length,
				2244	const UnicodeString& srcText,
				2245	int32_t srcStart,
				2246	int32_t srcLength);
				2247
				2248	/**
				2249	* Replace the characters in the range
				2250	* [<TT>start</TT>, <TT>start + length</TT>)
				2251	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
				2252	* not modified.
				2253	* @param start the offset at which the replace operation begins
				2254	* @param length the number of characters to replace. The character at
				2255	* <TT>start + length</TT> is not modified.
				2256	* @param srcText the source for the new characters
				2257	* @return a reference to this
				2258	* @stable ICU 2.0
				2259	*/
				2260	UnicodeString& replace(int32_t start,
				2261	int32_t length,
				2262	const UnicodeString& srcText);
				2263
				2264	/**
				2265	* Replace the characters in the range
				2266	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
				2267	* <TT>srcChars</TT> in the range
				2268	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
				2269	* is not modified.
				2270	* @param start the offset at which the replace operation begins
				2271	* @param length the number of characters to replace. The character at
				2272	* <TT>start + length</TT> is not modified.
				2273	* @param srcChars the source for the new characters
				2274	* @param srcStart the offset into <TT>srcChars</TT> where new characters
				2275	* will be obtained
				2276	* @param srcLength the number of characters in <TT>srcChars</TT>
				2277	* in the replace string
				2278	* @return a reference to this
				2279	* @stable ICU 2.0
				2280	*/
				2281	UnicodeString& replace(int32_t start,
				2282	int32_t length,
				2283	const UChar *srcChars,
				2284	int32_t srcStart,
				2285	int32_t srcLength);
				2286
				2287	/**
				2288	* Replace the characters in the range
				2289	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
				2290	* <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
				2291	* @param start the offset at which the replace operation begins
				2292	* @param length number of characters to replace. The character at
				2293	* <TT>start + length</TT> is not modified.
				2294	* @param srcChars the source for the new characters
				2295	* @param srcLength the number of Unicode characters in srcChars
				2296	* @return a reference to this
				2297	* @stable ICU 2.0
				2298	*/
				2299	inline UnicodeString& replace(int32_t start,
				2300	int32_t length,
				2301	const UChar *srcChars,
				2302	int32_t srcLength);
				2303
				2304	/**
				2305	* Replace the characters in the range
				2306	* [<TT>start</TT>, <TT>start + length</TT>) with the code unit
				2307	* <TT>srcChar</TT>.
				2308	* @param start the offset at which the replace operation begins
				2309	* @param length the number of characters to replace. The character at
				2310	* <TT>start + length</TT> is not modified.
				2311	* @param srcChar the new code unit
				2312	* @return a reference to this
				2313	* @stable ICU 2.0
				2314	*/
				2315	inline UnicodeString& replace(int32_t start,
				2316	int32_t length,
				2317	UChar srcChar);
				2318
				2319	/**
				2320	* Replace the characters in the range
				2321	* [<TT>start</TT>, <TT>start + length</TT>) with the code point
				2322	* <TT>srcChar</TT>.
				2323	* @param start the offset at which the replace operation begins
				2324	* @param length the number of characters to replace. The character at
				2325	* <TT>start + length</TT> is not modified.
				2326	* @param srcChar the new code point
				2327	* @return a reference to this
				2328	* @stable ICU 2.0
				2329	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2330	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2331
				2332	/**
				2333	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
				2334	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
				2335	* @param start the offset at which the replace operation begins
				2336	* @param limit the offset immediately following the replace range
				2337	* @param srcText the source for the new characters
				2338	* @return a reference to this
				2339	* @stable ICU 2.0
				2340	*/
				2341	inline UnicodeString& replaceBetween(int32_t start,
				2342	int32_t limit,
				2343	const UnicodeString& srcText);
				2344
				2345	/**
				2346	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
				2347	* with the characters in <TT>srcText</TT> in the range
				2348	* [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
				2349	* @param start the offset at which the replace operation begins
				2350	* @param limit the offset immediately following the replace range
				2351	* @param srcText the source for the new characters
				2352	* @param srcStart the offset into <TT>srcChars</TT> where new characters
				2353	* will be obtained
				2354	* @param srcLimit the offset immediately following the range to copy
				2355	* in <TT>srcText</TT>
				2356	* @return a reference to this
				2357	* @stable ICU 2.0
				2358	*/
				2359	inline UnicodeString& replaceBetween(int32_t start,
				2360	int32_t limit,
				2361	const UnicodeString& srcText,
				2362	int32_t srcStart,
				2363	int32_t srcLimit);
				2364
				2365	/**
				2366	* Replace a substring of this object with the given text.
				2367	* @param start the beginning index, inclusive; <code>0 <= start
				2368	* <= limit</code>.
				2369	* @param limit the ending index, exclusive; <code>start <= limit
				2370	* <= length()</code>.
				2371	* @param text the text to replace characters <code>start</code>
				2372	* to <code>limit - 1</code>
				2373	* @stable ICU 2.0
				2374	*/
				2375	virtual void handleReplaceBetween(int32_t start,
				2376	int32_t limit,
				2377	const UnicodeString& text);
				2378
				2379	/**
				2380	* Replaceable API
				2381	* @return TRUE if it has MetaData
				2382	* @stable ICU 2.4
				2383	*/
				2384	virtual UBool hasMetaData() const;
				2385
				2386	/**
				2387	* Copy a substring of this object, retaining attribute (out-of-band)
				2388	* information. This method is used to duplicate or reorder substrings.
				2389	* The destination index must not overlap the source range.
				2390	*
				2391	* @param start the beginning index, inclusive; <code>0 <= start <=
				2392	* limit</code>.
				2393	* @param limit the ending index, exclusive; <code>start <= limit <=
				2394	* length()</code>.
				2395	* @param dest the destination index. The characters from
				2396	* <code>start..limit-1</code> will be copied to <code>dest</code>.
				2397	* Implementations of this method may assume that <code>dest <= start \|\|
				2398	* dest >= limit</code>.
				2399	* @stable ICU 2.0
				2400	*/
				2401	virtual void copy(int32_t start, int32_t limit, int32_t dest);
				2402
				2403	/* Search and replace operations */
				2404
				2405	/**
				2406	* Replace all occurrences of characters in oldText with the characters
				2407	* in newText
				2408	* @param oldText the text containing the search text
				2409	* @param newText the text containing the replacement text
				2410	* @return a reference to this
				2411	* @stable ICU 2.0
				2412	*/
				2413	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
				2414	const UnicodeString& newText);
				2415
				2416	/**
				2417	* Replace all occurrences of characters in oldText with characters
				2418	* in newText
				2419	* in the range [<TT>start</TT>, <TT>start + length</TT>).
				2420	* @param start the start of the range in which replace will performed
				2421	* @param length the length of the range in which replace will be performed
				2422	* @param oldText the text containing the search text
				2423	* @param newText the text containing the replacement text
				2424	* @return a reference to this
				2425	* @stable ICU 2.0
				2426	*/
				2427	inline UnicodeString& findAndReplace(int32_t start,
				2428	int32_t length,
				2429	const UnicodeString& oldText,
				2430	const UnicodeString& newText);
				2431
				2432	/**
				2433	* Replace all occurrences of characters in oldText in the range
				2434	* [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
				2435	* in newText in the range
				2436	* [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
				2437	* in the range [<TT>start</TT>, <TT>start + length</TT>).
				2438	* @param start the start of the range in which replace will performed
				2439	* @param length the length of the range in which replace will be performed
				2440	* @param oldText the text containing the search text
				2441	* @param oldStart the start of the search range in <TT>oldText</TT>
				2442	* @param oldLength the length of the search range in <TT>oldText</TT>
				2443	* @param newText the text containing the replacement text
				2444	* @param newStart the start of the replacement range in <TT>newText</TT>
				2445	* @param newLength the length of the replacement range in <TT>newText</TT>
				2446	* @return a reference to this
				2447	* @stable ICU 2.0
				2448	*/
				2449	UnicodeString& findAndReplace(int32_t start,
				2450	int32_t length,
				2451	const UnicodeString& oldText,
				2452	int32_t oldStart,
				2453	int32_t oldLength,
				2454	const UnicodeString& newText,
				2455	int32_t newStart,
				2456	int32_t newLength);
				2457
				2458
				2459	/* Remove operations */
				2460
				2461	/**
				2462	* Remove all characters from the UnicodeString object.
				2463	* @return a reference to this
				2464	* @stable ICU 2.0
				2465	*/
				2466	inline UnicodeString& remove(void);
				2467
				2468	/**
				2469	* Remove the characters in the range
				2470	* [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
				2471	* @param start the offset of the first character to remove
				2472	* @param length the number of characters to remove
				2473	* @return a reference to this
				2474	* @stable ICU 2.0
				2475	*/
				2476	inline UnicodeString& remove(int32_t start,
				2477	int32_t length = (int32_t)INT32_MAX);
				2478
				2479	/**
				2480	* Remove the characters in the range
				2481	* [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
				2482	* @param start the offset of the first character to remove
				2483	* @param limit the offset immediately following the range to remove
				2484	* @return a reference to this
				2485	* @stable ICU 2.0
				2486	*/
				2487	inline UnicodeString& removeBetween(int32_t start,
				2488	int32_t limit = (int32_t)INT32_MAX);
				2489
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	2490	/**
				2491	* Retain only the characters in the range
				2492	* [<code>start</code>, <code>limit</code>) from the UnicodeString object.
				2493	* Removes characters before <code>start</code> and at and after <code>limit</code>.
				2494	* @param start the offset of the first character to retain
				2495	* @param limit the offset immediately following the range to retain
				2496	* @return a reference to this
claireho	27f6547	2011-06-09 11:11:49 -0700	[diff] [blame]	2497	* @stable ICU 4.4
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	2498	*/
				2499	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2500
				2501	/* Length operations */
				2502
				2503	/**
				2504	* Pad the start of this UnicodeString with the character <TT>padChar</TT>.
				2505	* If the length of this UnicodeString is less than targetLength,
				2506	* length() - targetLength copies of padChar will be added to the
				2507	* beginning of this UnicodeString.
				2508	* @param targetLength the desired length of the string
				2509	* @param padChar the character to use for padding. Defaults to
				2510	* space (U+0020)
				2511	* @return TRUE if the text was padded, FALSE otherwise.
				2512	* @stable ICU 2.0
				2513	*/
				2514	UBool padLeading(int32_t targetLength,
				2515	UChar padChar = 0x0020);
				2516
				2517	/**
				2518	* Pad the end of this UnicodeString with the character <TT>padChar</TT>.
				2519	* If the length of this UnicodeString is less than targetLength,
				2520	* length() - targetLength copies of padChar will be added to the
				2521	* end of this UnicodeString.
				2522	* @param targetLength the desired length of the string
				2523	* @param padChar the character to use for padding. Defaults to
				2524	* space (U+0020)
				2525	* @return TRUE if the text was padded, FALSE otherwise.
				2526	* @stable ICU 2.0
				2527	*/
				2528	UBool padTrailing(int32_t targetLength,
				2529	UChar padChar = 0x0020);
				2530
				2531	/**
				2532	* Truncate this UnicodeString to the <TT>targetLength</TT>.
				2533	* @param targetLength the desired length of this UnicodeString.
				2534	* @return TRUE if the text was truncated, FALSE otherwise
				2535	* @stable ICU 2.0
				2536	*/
				2537	inline UBool truncate(int32_t targetLength);
				2538
				2539	/**
				2540	* Trims leading and trailing whitespace from this UnicodeString.
				2541	* @return a reference to this
				2542	* @stable ICU 2.0
				2543	*/
				2544	UnicodeString& trim(void);
				2545
				2546
				2547	/* Miscellaneous operations */
				2548
				2549	/**
				2550	* Reverse this UnicodeString in place.
				2551	* @return a reference to this
				2552	* @stable ICU 2.0
				2553	*/
				2554	inline UnicodeString& reverse(void);
				2555
				2556	/**
				2557	* Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
				2558	* this UnicodeString.
				2559	* @param start the start of the range to reverse
				2560	* @param length the number of characters to to reverse
				2561	* @return a reference to this
				2562	* @stable ICU 2.0
				2563	*/
				2564	inline UnicodeString& reverse(int32_t start,
				2565	int32_t length);
				2566
				2567	/**
				2568	* Convert the characters in this to UPPER CASE following the conventions of
				2569	* the default locale.
				2570	* @return A reference to this.
				2571	* @stable ICU 2.0
				2572	*/
				2573	UnicodeString& toUpper(void);
				2574
				2575	/**
				2576	* Convert the characters in this to UPPER CASE following the conventions of
				2577	* a specific locale.
				2578	* @param locale The locale containing the conventions to use.
				2579	* @return A reference to this.
				2580	* @stable ICU 2.0
				2581	*/
				2582	UnicodeString& toUpper(const Locale& locale);
				2583
				2584	/**
				2585	* Convert the characters in this to lower case following the conventions of
				2586	* the default locale.
				2587	* @return A reference to this.
				2588	* @stable ICU 2.0
				2589	*/
				2590	UnicodeString& toLower(void);
				2591
				2592	/**
				2593	* Convert the characters in this to lower case following the conventions of
				2594	* a specific locale.
				2595	* @param locale The locale containing the conventions to use.
				2596	* @return A reference to this.
				2597	* @stable ICU 2.0
				2598	*/
				2599	UnicodeString& toLower(const Locale& locale);
				2600
				2601	#if !UCONFIG_NO_BREAK_ITERATION
				2602
				2603	/**
				2604	* Titlecase this string, convenience function using the default locale.
				2605	*
				2606	* Casing is locale-dependent and context-sensitive.
				2607	* Titlecasing uses a break iterator to find the first characters of words
				2608	* that are to be titlecased. It titlecases those characters and lowercases
				2609	* all others.
				2610	*
				2611	* The titlecase break iterator can be provided to customize for arbitrary
				2612	* styles, using rules and dictionaries beyond the standard iterators.
				2613	* It may be more efficient to always provide an iterator to avoid
				2614	* opening and closing one for each string.
				2615	* The standard titlecase iterator for the root locale implements the
				2616	* algorithm of Unicode TR 21.
				2617	*
				2618	* This function uses only the setText(), first() and next() methods of the
				2619	* provided break iterator.
				2620	*
				2621	* @param titleIter A break iterator to find the first characters of words
				2622	* that are to be titlecased.
				2623	* If none is provided (0), then a standard titlecase
				2624	* break iterator is opened.
				2625	* Otherwise the provided iterator is set to the string's text.
				2626	* @return A reference to this.
				2627	* @stable ICU 2.1
				2628	*/
				2629	UnicodeString &toTitle(BreakIterator *titleIter);
				2630
				2631	/**
				2632	* Titlecase this string.
				2633	*
				2634	* Casing is locale-dependent and context-sensitive.
				2635	* Titlecasing uses a break iterator to find the first characters of words
				2636	* that are to be titlecased. It titlecases those characters and lowercases
				2637	* all others.
				2638	*
				2639	* The titlecase break iterator can be provided to customize for arbitrary
				2640	* styles, using rules and dictionaries beyond the standard iterators.
				2641	* It may be more efficient to always provide an iterator to avoid
				2642	* opening and closing one for each string.
				2643	* The standard titlecase iterator for the root locale implements the
				2644	* algorithm of Unicode TR 21.
				2645	*
				2646	* This function uses only the setText(), first() and next() methods of the
				2647	* provided break iterator.
				2648	*
				2649	* @param titleIter A break iterator to find the first characters of words
				2650	* that are to be titlecased.
				2651	* If none is provided (0), then a standard titlecase
				2652	* break iterator is opened.
				2653	* Otherwise the provided iterator is set to the string's text.
				2654	* @param locale The locale to consider.
				2655	* @return A reference to this.
				2656	* @stable ICU 2.1
				2657	*/
				2658	UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
				2659
				2660	/**
				2661	* Titlecase this string, with options.
				2662	*
				2663	* Casing is locale-dependent and context-sensitive.
				2664	* Titlecasing uses a break iterator to find the first characters of words
				2665	* that are to be titlecased. It titlecases those characters and lowercases
				2666	* all others. (This can be modified with options.)
				2667	*
				2668	* The titlecase break iterator can be provided to customize for arbitrary
				2669	* styles, using rules and dictionaries beyond the standard iterators.
				2670	* It may be more efficient to always provide an iterator to avoid
				2671	* opening and closing one for each string.
				2672	* The standard titlecase iterator for the root locale implements the
				2673	* algorithm of Unicode TR 21.
				2674	*
				2675	* This function uses only the setText(), first() and next() methods of the
				2676	* provided break iterator.
				2677	*
				2678	* @param titleIter A break iterator to find the first characters of words
				2679	* that are to be titlecased.
				2680	* If none is provided (0), then a standard titlecase
				2681	* break iterator is opened.
				2682	* Otherwise the provided iterator is set to the string's text.
				2683	* @param locale The locale to consider.
				2684	* @param options Options bit set, see ucasemap_open().
				2685	* @return A reference to this.
				2686	* @see U_TITLECASE_NO_LOWERCASE
				2687	* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
				2688	* @see ucasemap_open
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	2689	* @stable ICU 3.8
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2690	*/
				2691	UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
				2692
				2693	#endif
				2694
				2695	/**
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	2696	* Case-folds the characters in this string.
				2697	*
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2698	* Case-folding is locale-independent and not context-sensitive,
				2699	* but there is an option for whether to include or exclude mappings for dotted I
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	2700	* and dotless i that are marked with 'T' in CaseFolding.txt.
				2701	*
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2702	* The result may be longer or shorter than the original.
				2703	*
				2704	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
				2705	* @return A reference to this.
				2706	* @stable ICU 2.0
				2707	*/
				2708	UnicodeString &foldCase(uint32_t options=0 /U_FOLD_CASE_DEFAULT/);
				2709
				2710	//========================================
				2711	// Access to the internal buffer
				2712	//========================================
				2713
				2714	/**
				2715	* Get a read/write pointer to the internal buffer.
				2716	* The buffer is guaranteed to be large enough for at least minCapacity UChars,
				2717	* writable, and is still owned by the UnicodeString object.
				2718	* Calls to getBuffer(minCapacity) must not be nested, and
				2719	* must be matched with calls to releaseBuffer(newLength).
				2720	* If the string buffer was read-only or shared,
				2721	* then it will be reallocated and copied.
				2722	*
				2723	* An attempted nested call will return 0, and will not further modify the
				2724	* state of the UnicodeString object.
				2725	* It also returns 0 if the string is bogus.
				2726	*
				2727	* The actual capacity of the string buffer may be larger than minCapacity.
				2728	* getCapacity() returns the actual capacity.
				2729	* For many operations, the full capacity should be used to avoid reallocations.
				2730	*
				2731	* While the buffer is "open" between getBuffer(minCapacity)
				2732	* and releaseBuffer(newLength), the following applies:
				2733	* - The string length is set to 0.
				2734	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
				2735	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
				2736	* - You can read from and write to the returned buffer.
				2737	* - The previous string contents will still be in the buffer;
				2738	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
				2739	* If the length() was greater than minCapacity, then any contents after minCapacity
				2740	* may be lost.
				2741	* The buffer contents is not NUL-terminated by getBuffer().
				2742	* If length()<getCapacity() then you can terminate it by writing a NUL
				2743	* at index length().
				2744	* - You must call releaseBuffer(newLength) before and in order to
				2745	* return to normal UnicodeString operation.
				2746	*
				2747	* @param minCapacity the minimum number of UChars that are to be available
				2748	* in the buffer, starting at the returned pointer;
				2749	* default to the current string capacity if minCapacity==-1
				2750	* @return a writable pointer to the internal string buffer,
				2751	* or 0 if an error occurs (nested calls, out of memory)
				2752	*
				2753	* @see releaseBuffer
				2754	* @see getTerminatedBuffer()
				2755	* @stable ICU 2.0
				2756	*/
				2757	UChar *getBuffer(int32_t minCapacity);
				2758
				2759	/**
				2760	* Release a read/write buffer on a UnicodeString object with an
				2761	* "open" getBuffer(minCapacity).
				2762	* This function must be called in a matched pair with getBuffer(minCapacity).
				2763	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
				2764	*
				2765	* It will set the string length to newLength, at most to the current capacity.
				2766	* If newLength==-1 then it will set the length according to the
				2767	* first NUL in the buffer, or to the capacity if there is no NUL.
				2768	*
				2769	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
				2770	*
				2771	* @param newLength the new length of the UnicodeString object;
				2772	* defaults to the current capacity if newLength is greater than that;
				2773	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
				2774	* the current capacity of the string
				2775	*
				2776	* @see getBuffer(int32_t minCapacity)
				2777	* @stable ICU 2.0
				2778	*/
				2779	void releaseBuffer(int32_t newLength=-1);
				2780
				2781	/**
				2782	* Get a read-only pointer to the internal buffer.
				2783	* This can be called at any time on a valid UnicodeString.
				2784	*
				2785	* It returns 0 if the string is bogus, or
				2786	* during an "open" getBuffer(minCapacity).
				2787	*
				2788	* It can be called as many times as desired.
				2789	* The pointer that it returns will remain valid until the UnicodeString object is modified,
				2790	* at which time the pointer is semantically invalidated and must not be used any more.
				2791	*
				2792	* The capacity of the buffer can be determined with getCapacity().
				2793	* The part after length() may or may not be initialized and valid,
				2794	* depending on the history of the UnicodeString object.
				2795	*
				2796	* The buffer contents is (probably) not NUL-terminated.
				2797	* You can check if it is with
				2798	* <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
				2799	* (See getTerminatedBuffer().)
				2800	*
				2801	* The buffer may reside in read-only memory. Its contents must not
				2802	* be modified.
				2803	*
				2804	* @return a read-only pointer to the internal string buffer,
				2805	* or 0 if the string is empty or bogus
				2806	*
				2807	* @see getBuffer(int32_t minCapacity)
				2808	* @see getTerminatedBuffer()
				2809	* @stable ICU 2.0
				2810	*/
				2811	inline const UChar *getBuffer() const;
				2812
				2813	/**
				2814	* Get a read-only pointer to the internal buffer,
				2815	* making sure that it is NUL-terminated.
				2816	* This can be called at any time on a valid UnicodeString.
				2817	*
				2818	* It returns 0 if the string is bogus, or
				2819	* during an "open" getBuffer(minCapacity), or if the buffer cannot
				2820	* be NUL-terminated (because memory allocation failed).
				2821	*
				2822	* It can be called as many times as desired.
				2823	* The pointer that it returns will remain valid until the UnicodeString object is modified,
				2824	* at which time the pointer is semantically invalidated and must not be used any more.
				2825	*
				2826	* The capacity of the buffer can be determined with getCapacity().
				2827	* The part after length()+1 may or may not be initialized and valid,
				2828	* depending on the history of the UnicodeString object.
				2829	*
				2830	* The buffer contents is guaranteed to be NUL-terminated.
				2831	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
				2832	* is written.
				2833	* For this reason, this function is not const, unlike getBuffer().
				2834	* Note that a UnicodeString may also contain NUL characters as part of its contents.
				2835	*
				2836	* The buffer may reside in read-only memory. Its contents must not
				2837	* be modified.
				2838	*
				2839	* @return a read-only pointer to the internal string buffer,
				2840	* or 0 if the string is empty or bogus
				2841	*
				2842	* @see getBuffer(int32_t minCapacity)
				2843	* @see getBuffer()
				2844	* @stable ICU 2.2
				2845	*/
				2846	inline const UChar *getTerminatedBuffer();
				2847
				2848	//========================================
				2849	// Constructors
				2850	//========================================
				2851
				2852	/** Construct an empty UnicodeString.
				2853	* @stable ICU 2.0
				2854	*/
				2855	UnicodeString();
				2856
				2857	/**
				2858	* Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
				2859	* @param capacity the number of UChars this UnicodeString should hold
				2860	* before a resize is necessary; if count is greater than 0 and count
				2861	* code points c take up more space than capacity, then capacity is adjusted
				2862	* accordingly.
				2863	* @param c is used to initially fill the string
				2864	* @param count specifies how many code points c are to be written in the
				2865	* string
				2866	* @stable ICU 2.0
				2867	*/
				2868	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
				2869
				2870	/**
				2871	* Single UChar (code unit) constructor.
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2872	*
				2873	* It is recommended to mark this constructor "explicit" by
				2874	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
				2875	* on the compiler command line or similar.
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2876	* @param ch the character to place in the UnicodeString
				2877	* @stable ICU 2.0
				2878	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2879	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2880
				2881	/**
				2882	* Single UChar32 (code point) constructor.
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2883	*
				2884	* It is recommended to mark this constructor "explicit" by
				2885	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
				2886	* on the compiler command line or similar.
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2887	* @param ch the character to place in the UnicodeString
				2888	* @stable ICU 2.0
				2889	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2890	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2891
				2892	/**
				2893	* UChar* constructor.
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2894	*
				2895	* It is recommended to mark this constructor "explicit" by
				2896	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
				2897	* on the compiler command line or similar.
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2898	* @param text The characters to place in the UnicodeString. <TT>text</TT>
				2899	* must be NULL (U+0000) terminated.
				2900	* @stable ICU 2.0
				2901	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2902	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2903
				2904	/**
				2905	* UChar* constructor.
				2906	* @param text The characters to place in the UnicodeString.
				2907	* @param textLength The number of Unicode characters in <TT>text</TT>
				2908	* to copy.
				2909	* @stable ICU 2.0
				2910	*/
				2911	UnicodeString(const UChar *text,
				2912	int32_t textLength);
				2913
				2914	/**
				2915	* Readonly-aliasing UChar* constructor.
				2916	* The text will be used for the UnicodeString object, but
				2917	* it will not be released when the UnicodeString is destroyed.
				2918	* This has copy-on-write semantics:
				2919	* When the string is modified, then the buffer is first copied into
				2920	* newly allocated memory.
				2921	* The aliased buffer is never modified.
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	2922	*
				2923	* In an assignment to another UnicodeString, when using the copy constructor
				2924	* or the assignment operator, the text will be copied.
				2925	* When using fastCopyFrom(), the text will be aliased again,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2926	* so that both strings then alias the same readonly-text.
				2927	*
				2928	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
				2929	* This must be true if <code>textLength==-1</code>.
				2930	* @param text The characters to alias for the UnicodeString.
				2931	* @param textLength The number of Unicode characters in <code>text</code> to alias.
				2932	* If -1, then this constructor will determine the length
				2933	* by calling <code>u_strlen()</code>.
				2934	* @stable ICU 2.0
				2935	*/
				2936	UnicodeString(UBool isTerminated,
				2937	const UChar *text,
				2938	int32_t textLength);
				2939
				2940	/**
				2941	* Writable-aliasing UChar* constructor.
				2942	* The text will be used for the UnicodeString object, but
				2943	* it will not be released when the UnicodeString is destroyed.
				2944	* This has write-through semantics:
				2945	* For as long as the capacity of the buffer is sufficient, write operations
				2946	* will directly affect the buffer. When more capacity is necessary, then
				2947	* a new buffer will be allocated and the contents copied as with regularly
				2948	* constructed strings.
				2949	* In an assignment to another UnicodeString, the buffer will be copied.
				2950	* The extract(UChar *dst) function detects whether the dst pointer is the same
				2951	* as the string buffer itself and will in this case not copy the contents.
				2952	*
				2953	* @param buffer The characters to alias for the UnicodeString.
				2954	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
				2955	* @param buffCapacity The size of <code>buffer</code> in UChars.
				2956	* @stable ICU 2.0
				2957	*/
				2958	UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
				2959
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	2960	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
				2961
				2962	/**
				2963	* char* constructor.
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2964	* Uses the default converter (and thus depends on the ICU conversion code)
				2965	* unless U_CHARSET_IS_UTF8 is set to 1.
				2966	*
				2967	* For ASCII (really "invariant character") strings it is more efficient to use
				2968	* the constructor that takes a US_INV (for its enum EInvariant).
				2969	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
				2970	* UNICODE_STRING_SIMPLE.
				2971	*
				2972	* It is recommended to mark this constructor "explicit" by
				2973	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
				2974	* on the compiler command line or similar.
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	2975	* @param codepageData an array of bytes, null-terminated,
				2976	* in the platform's default codepage.
				2977	* @stable ICU 2.0
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2978	* @see UNICODE_STRING
				2979	* @see UNICODE_STRING_SIMPLE
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	2980	*/
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2981	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	2982
				2983	/**
				2984	* char* constructor.
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	2985	* Uses the default converter (and thus depends on the ICU conversion code)
				2986	* unless U_CHARSET_IS_UTF8 is set to 1.
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	2987	* @param codepageData an array of bytes in the platform's default codepage.
				2988	* @param dataLength The number of bytes in <TT>codepageData</TT>.
				2989	* @stable ICU 2.0
				2990	*/
				2991	UnicodeString(const char *codepageData, int32_t dataLength);
				2992
				2993	#endif
				2994
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	2995	#if !UCONFIG_NO_CONVERSION
				2996
				2997	/**
				2998	* char* constructor.
				2999	* @param codepageData an array of bytes, null-terminated
				3000	* @param codepage the encoding of <TT>codepageData</TT>. The special
				3001	* value 0 for <TT>codepage</TT> indicates that the text is in the
				3002	* platform's default codepage.
				3003	*
				3004	* If <code>codepage</code> is an empty string (<code>""</code>),
				3005	* then a simple conversion is performed on the codepage-invariant
				3006	* subset ("invariant characters") of the platform encoding. See utypes.h.
				3007	* Recommendation: For invariant-character strings use the constructor
				3008	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
				3009	* because it avoids object code dependencies of UnicodeString on
				3010	* the conversion code.
				3011	*
				3012	* @stable ICU 2.0
				3013	*/
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	3014	UnicodeString(const char codepageData, const char codepage);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3015
				3016	/**
				3017	* char* constructor.
				3018	* @param codepageData an array of bytes.
				3019	* @param dataLength The number of bytes in <TT>codepageData</TT>.
				3020	* @param codepage the encoding of <TT>codepageData</TT>. The special
				3021	* value 0 for <TT>codepage</TT> indicates that the text is in the
				3022	* platform's default codepage.
				3023	* If <code>codepage</code> is an empty string (<code>""</code>),
				3024	* then a simple conversion is performed on the codepage-invariant
				3025	* subset ("invariant characters") of the platform encoding. See utypes.h.
				3026	* Recommendation: For invariant-character strings use the constructor
				3027	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
				3028	* because it avoids object code dependencies of UnicodeString on
				3029	* the conversion code.
				3030	*
				3031	* @stable ICU 2.0
				3032	*/
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	3033	UnicodeString(const char codepageData, int32_t dataLength, const char codepage);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3034
				3035	/**
				3036	* char * / UConverter constructor.
				3037	* This constructor uses an existing UConverter object to
				3038	* convert the codepage string to Unicode and construct a UnicodeString
				3039	* from that.
				3040	*
				3041	* The converter is reset at first.
				3042	* If the error code indicates a failure before this constructor is called,
				3043	* or if an error occurs during conversion or construction,
				3044	* then the string will be bogus.
				3045	*
				3046	* This function avoids the overhead of opening and closing a converter if
				3047	* multiple strings are constructed.
				3048	*
				3049	* @param src input codepage string
				3050	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
				3051	* @param cnv converter object (ucnv_resetToUnicode() will be called),
				3052	* can be NULL for the default converter
				3053	* @param errorCode normal ICU error code
				3054	* @stable ICU 2.0
				3055	*/
				3056	UnicodeString(
				3057	const char *src, int32_t srcLength,
				3058	UConverter *cnv,
				3059	UErrorCode &errorCode);
				3060
				3061	#endif
				3062
				3063	/**
				3064	* Constructs a Unicode string from an invariant-character char * string.
				3065	* About invariant characters see utypes.h.
				3066	* This constructor has no runtime dependency on conversion code and is
				3067	* therefore recommended over ones taking a charset name string
				3068	* (where the empty string "" indicates invariant-character conversion).
				3069	*
				3070	* Use the macro US_INV as the third, signature-distinguishing parameter.
				3071	*
				3072	* For example:
				3073	* \code
				3074	* void fn(const char *s) {
				3075	* UnicodeString ustr(s, -1, US_INV);
				3076	* // use ustr ...
				3077	* }
				3078	* \endcode
				3079	*
				3080	* @param src String using only invariant characters.
				3081	* @param length Length of src, or -1 if NUL-terminated.
				3082	* @param inv Signature-distinguishing paramater, use US_INV.
				3083	*
				3084	* @see US_INV
				3085	* @stable ICU 3.2
				3086	*/
				3087	UnicodeString(const char *src, int32_t length, enum EInvariant inv);
				3088
				3089
				3090	/**
				3091	* Copy constructor.
				3092	* @param that The UnicodeString object to copy.
				3093	* @stable ICU 2.0
				3094	*/
				3095	UnicodeString(const UnicodeString& that);
				3096
				3097	/**
				3098	* 'Substring' constructor from tail of source string.
				3099	* @param src The UnicodeString object to copy.
				3100	* @param srcStart The offset into <tt>src</tt> at which to start copying.
				3101	* @stable ICU 2.2
				3102	*/
				3103	UnicodeString(const UnicodeString& src, int32_t srcStart);
				3104
				3105	/**
				3106	* 'Substring' constructor from subrange of source string.
				3107	* @param src The UnicodeString object to copy.
				3108	* @param srcStart The offset into <tt>src</tt> at which to start copying.
				3109	* @param srcLength The number of characters from <tt>src</tt> to copy.
				3110	* @stable ICU 2.2
				3111	*/
				3112	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
				3113
				3114	/**
				3115	* Clone this object, an instance of a subclass of Replaceable.
				3116	* Clones can be used concurrently in multiple threads.
				3117	* If a subclass does not implement clone(), or if an error occurs,
				3118	* then NULL is returned.
				3119	* The clone functions in all subclasses return a pointer to a Replaceable
				3120	* because some compilers do not support covariant (same-as-this)
				3121	* return types; cast to the appropriate subclass if necessary.
				3122	* The caller must delete the clone.
				3123	*
				3124	* @return a clone of this object
				3125	*
				3126	* @see Replaceable::clone
				3127	* @see getDynamicClassID
				3128	* @stable ICU 2.6
				3129	*/
				3130	virtual Replaceable *clone() const;
				3131
				3132	/** Destructor.
				3133	* @stable ICU 2.0
				3134	*/
				3135	virtual ~UnicodeString();
				3136
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	3137	/**
				3138	* Create a UnicodeString from a UTF-8 string.
				3139	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
				3140	* Calls u_strFromUTF8WithSub().
				3141	*
				3142	* @param utf8 UTF-8 input string.
				3143	* Note that a StringPiece can be implicitly constructed
				3144	* from a std::string or a NUL-terminated const char * string.
				3145	* @return A UnicodeString with equivalent UTF-16 contents.
				3146	* @see toUTF8
				3147	* @see toUTF8String
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	3148	* @stable ICU 4.2
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	3149	*/
				3150	static UnicodeString fromUTF8(const StringPiece &utf8);
				3151
				3152	/**
				3153	* Create a UnicodeString from a UTF-32 string.
				3154	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
				3155	* Calls u_strFromUTF32WithSub().
				3156	*
				3157	* @param utf32 UTF-32 input string. Must not be NULL.
				3158	* @param length Length of the input string, or -1 if NUL-terminated.
				3159	* @return A UnicodeString with equivalent UTF-16 contents.
				3160	* @see toUTF32
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	3161	* @stable ICU 4.2
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	3162	*/
				3163	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3164
				3165	/* Miscellaneous operations */
				3166
				3167	/**
				3168	* Unescape a string of characters and return a string containing
				3169	* the result. The following escape sequences are recognized:
				3170	*
				3171	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
				3172	* \\Uhhhhhhhh 8 hex digits
				3173	* \\xhh 1-2 hex digits
				3174	* \\ooo 1-3 octal digits; o in [0-7]
				3175	* \\cX control-X; X is masked with 0x1F
				3176	*
				3177	* as well as the standard ANSI C escapes:
				3178	*
				3179	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
				3180	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	3181	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3182	*
				3183	* Anything else following a backslash is generically escaped. For
				3184	* example, "[a\\-z]" returns "[a-z]".
				3185	*
				3186	* If an escape sequence is ill-formed, this method returns an empty
				3187	* string. An example of an ill-formed sequence is "\\u" followed by
				3188	* fewer than 4 hex digits.
				3189	*
				3190	* This function is similar to u_unescape() but not identical to it.
				3191	* The latter takes a source char*, so it does escape recognition
				3192	* and also invariant conversion.
				3193	*
				3194	* @return a string with backslash escapes interpreted, or an
				3195	* empty string on error.
				3196	* @see UnicodeString#unescapeAt()
				3197	* @see u_unescape()
				3198	* @see u_unescapeAt()
				3199	* @stable ICU 2.0
				3200	*/
				3201	UnicodeString unescape() const;
				3202
				3203	/**
				3204	* Unescape a single escape sequence and return the represented
				3205	* character. See unescape() for a listing of the recognized escape
				3206	* sequences. The character at offset-1 is assumed (without
				3207	* checking) to be a backslash. If the escape sequence is
				3208	* ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
				3209	* returned.
				3210	*
				3211	* @param offset an input output parameter. On input, it is the
				3212	* offset into this string where the escape sequence is located,
				3213	* after the initial backslash. On output, it is advanced after the
				3214	* last character parsed. On error, it is not advanced at all.
				3215	* @return the character represented by the escape sequence at
				3216	* offset, or (UChar32)0xFFFFFFFF on error.
				3217	* @see UnicodeString#unescape()
				3218	* @see u_unescape()
				3219	* @see u_unescapeAt()
				3220	* @stable ICU 2.0
				3221	*/
				3222	UChar32 unescapeAt(int32_t &offset) const;
				3223
				3224	/**
				3225	* ICU "poor man's RTTI", returns a UClassID for this class.
				3226	*
				3227	* @stable ICU 2.2
				3228	*/
				3229	static UClassID U_EXPORT2 getStaticClassID();
				3230
				3231	/**
				3232	* ICU "poor man's RTTI", returns a UClassID for the actual class.
				3233	*
				3234	* @stable ICU 2.2
				3235	*/
				3236	virtual UClassID getDynamicClassID() const;
				3237
				3238	//========================================
				3239	// Implementation methods
				3240	//========================================
				3241
				3242	protected:
				3243	/**
				3244	* Implement Replaceable::getLength() (see jitterbug 1027).
				3245	* @stable ICU 2.4
				3246	*/
				3247	virtual int32_t getLength() const;
				3248
				3249	/**
				3250	* The change in Replaceable to use virtual getCharAt() allows
				3251	* UnicodeString::charAt() to be inline again (see jitterbug 709).
				3252	* @stable ICU 2.4
				3253	*/
				3254	virtual UChar getCharAt(int32_t offset) const;
				3255
				3256	/**
				3257	* The change in Replaceable to use virtual getChar32At() allows
				3258	* UnicodeString::char32At() to be inline again (see jitterbug 709).
				3259	* @stable ICU 2.4
				3260	*/
				3261	virtual UChar32 getChar32At(int32_t offset) const;
				3262
				3263	private:
Jean-Baptiste Queru	b0ac937	2009-07-20 15:09:32 -0700	[diff] [blame]	3264	// For char* constructors. Could be made public.
				3265	UnicodeString &setToUTF8(const StringPiece &utf8);
				3266	// For extract(char*).
				3267	// We could make a toUTF8(target, capacity, errorCode) public but not
				3268	// this version: New API will be cleaner if we make callers create substrings
				3269	// rather than having start+length on every method,
				3270	// and it should take a UErrorCode&.
				3271	int32_t
				3272	toUTF8(int32_t start, int32_t len,
				3273	char *target, int32_t capacity) const;
				3274
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	3275	/**
				3276	* Internal string contents comparison, called by operator==.
				3277	* Requires: this & text not bogus and have same lengths.
				3278	*/
				3279	UBool doEquals(const UnicodeString &text, int32_t len) const;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3280
				3281	inline int8_t
				3282	doCompare(int32_t start,
				3283	int32_t length,
				3284	const UnicodeString& srcText,
				3285	int32_t srcStart,
				3286	int32_t srcLength) const;
				3287
				3288	int8_t doCompare(int32_t start,
				3289	int32_t length,
				3290	const UChar *srcChars,
				3291	int32_t srcStart,
				3292	int32_t srcLength) const;
				3293
				3294	inline int8_t
				3295	doCompareCodePointOrder(int32_t start,
				3296	int32_t length,
				3297	const UnicodeString& srcText,
				3298	int32_t srcStart,
				3299	int32_t srcLength) const;
				3300
				3301	int8_t doCompareCodePointOrder(int32_t start,
				3302	int32_t length,
				3303	const UChar *srcChars,
				3304	int32_t srcStart,
				3305	int32_t srcLength) const;
				3306
				3307	inline int8_t
				3308	doCaseCompare(int32_t start,
				3309	int32_t length,
				3310	const UnicodeString &srcText,
				3311	int32_t srcStart,
				3312	int32_t srcLength,
				3313	uint32_t options) const;
				3314
				3315	int8_t
				3316	doCaseCompare(int32_t start,
				3317	int32_t length,
				3318	const UChar *srcChars,
				3319	int32_t srcStart,
				3320	int32_t srcLength,
				3321	uint32_t options) const;
				3322
				3323	int32_t doIndexOf(UChar c,
				3324	int32_t start,
				3325	int32_t length) const;
				3326
				3327	int32_t doIndexOf(UChar32 c,
				3328	int32_t start,
				3329	int32_t length) const;
				3330
				3331	int32_t doLastIndexOf(UChar c,
				3332	int32_t start,
				3333	int32_t length) const;
				3334
				3335	int32_t doLastIndexOf(UChar32 c,
				3336	int32_t start,
				3337	int32_t length) const;
				3338
				3339	void doExtract(int32_t start,
				3340	int32_t length,
				3341	UChar *dst,
				3342	int32_t dstStart) const;
				3343
				3344	inline void doExtract(int32_t start,
				3345	int32_t length,
				3346	UnicodeString& target) const;
				3347
				3348	inline UChar doCharAt(int32_t offset) const;
				3349
				3350	UnicodeString& doReplace(int32_t start,
				3351	int32_t length,
				3352	const UnicodeString& srcText,
				3353	int32_t srcStart,
				3354	int32_t srcLength);
				3355
				3356	UnicodeString& doReplace(int32_t start,
				3357	int32_t length,
				3358	const UChar *srcChars,
				3359	int32_t srcStart,
				3360	int32_t srcLength);
				3361
				3362	UnicodeString& doReverse(int32_t start,
				3363	int32_t length);
				3364
				3365	// calculate hash code
				3366	int32_t doHashCode(void) const;
				3367
				3368	// get pointer to start of array
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3369	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3370	inline UChar* getArrayStart(void);
				3371	inline const UChar* getArrayStart(void) const;
				3372
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3373	// A UnicodeString object (not necessarily its current buffer)
				3374	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
				3375	inline UBool isWritable() const;
				3376
				3377	// Is the current buffer writable?
				3378	inline UBool isBufferWritable() const;
				3379
				3380	// None of the following does releaseArray().
				3381	inline void setLength(int32_t len); // sets only fShortLength and fLength
				3382	inline void setToEmpty(); // sets fFlags=kShortString
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3383	inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
				3384
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3385	// allocate the array; result may be fStackBuffer
				3386	// sets refCount to 1 if appropriate
				3387	// sets fArray, fCapacity, and fFlags
				3388	// returns boolean for success or failure
				3389	UBool allocate(int32_t capacity);
				3390
				3391	// release the array if owned
				3392	void releaseArray(void);
				3393
				3394	// turn a bogus string into an empty one
				3395	void unBogus();
				3396
				3397	// implements assigment operator, copy constructor, and fastCopyFrom()
				3398	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
				3399
				3400	// Pin start and limit to acceptable values.
				3401	inline void pinIndex(int32_t& start) const;
				3402	inline void pinIndices(int32_t& start,
				3403	int32_t& length) const;
				3404
				3405	#if !UCONFIG_NO_CONVERSION
				3406
				3407	/* Internal extract() using UConverter. */
				3408	int32_t doExtract(int32_t start, int32_t length,
				3409	char *dest, int32_t destCapacity,
				3410	UConverter *cnv,
				3411	UErrorCode &errorCode) const;
				3412
				3413	/*
				3414	* Real constructor for converting from codepage data.
				3415	* It assumes that it is called with !fRefCounted.
				3416	*
				3417	* If <code>codepage==0</code>, then the default converter
				3418	* is used for the platform encoding.
				3419	* If <code>codepage</code> is an empty string (<code>""</code>),
				3420	* then a simple conversion is performed on the codepage-invariant
				3421	* subset ("invariant characters") of the platform encoding. See utypes.h.
				3422	*/
				3423	void doCodepageCreate(const char *codepageData,
				3424	int32_t dataLength,
				3425	const char *codepage);
				3426
				3427	/*
				3428	* Worker function for creating a UnicodeString from
				3429	* a codepage string using a UConverter.
				3430	*/
				3431	void
				3432	doCodepageCreate(const char *codepageData,
				3433	int32_t dataLength,
				3434	UConverter *converter,
				3435	UErrorCode &status);
				3436
				3437	#endif
				3438
				3439	/*
				3440	* This function is called when write access to the array
				3441	* is necessary.
				3442	*
				3443	* We need to make a copy of the array if
				3444	* the buffer is read-only, or
				3445	* the buffer is refCounted (shared), and refCount>1, or
				3446	* the buffer is too small.
				3447	*
				3448	* Return FALSE if memory could not be allocated.
				3449	*/
				3450	UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
				3451	int32_t growCapacity = -1,
				3452	UBool doCopyArray = TRUE,
				3453	int32_t **pBufferToDelete = 0,
				3454	UBool forceClone = FALSE);
				3455
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	3456	/**
				3457	* Common function for UnicodeString case mappings.
				3458	* The stringCaseMapper has the same type UStringCaseMapper
				3459	* as in ustr_imp.h for ustrcase_map().
				3460	*/
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3461	UnicodeString &
Craig Cornelius	103e9ff	2012-10-09 17:03:29 -0700	[diff] [blame]	3462	caseMap(const UCaseMap csm, UStringCaseMapper stringCaseMapper);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3463
				3464	// ref counting
				3465	void addRef(void);
				3466	int32_t removeRef(void);
				3467	int32_t refCount(void) const;
				3468
				3469	// constants
				3470	enum {
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3471	// Set the stack buffer size so that sizeof(UnicodeString) is,
				3472	// naturally (without padding), a multiple of sizeof(pointer).
				3473	US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3474	kInvalidUChar=0xffff, // invalid UChar index
				3475	kGrowSize=128, // grow size for this buffer
				3476	kInvalidHashCode=0, // invalid hash code
				3477	kEmptyHashCode=1, // hash code for empty string
				3478
				3479	// bit flag values for fFlags
				3480	kIsBogus=1, // this string is bogus, i.e., not valid or NULL
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3481	kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3482	kRefCounted=4, // there is a refCount field before the characters in fArray
				3483	kBufferIsReadonly=8,// do not write to this buffer
				3484	kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
				3485	// and releaseBuffer(newLength) must be called
				3486
				3487	// combined values for convenience
				3488	kShortString=kUsingStackBuffer,
				3489	kLongString=kRefCounted,
				3490	kReadonlyAlias=kBufferIsReadonly,
				3491	kWritableAlias=0
				3492	};
				3493
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3494	friend class StringThreadTest;
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3495	friend class UnicodeStringAppendable;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3496
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3497	union StackBufferOrFields; // forward declaration necessary before friend declaration
				3498	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
				3499
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3500	/*
				3501	* The following are all the class fields that are stored
				3502	* in each UnicodeString object.
				3503	* Note that UnicodeString has virtual functions,
				3504	* therefore there is an implicit vtable pointer
				3505	* as the first real field.
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3506	* The fields should be aligned such that no padding is necessary.
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3507	* On 32-bit machines, the size should be 32 bytes,
				3508	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3509	*
				3510	* We use a hack to achieve this.
				3511	*
				3512	* With at least some compilers, each of the following is forced to
				3513	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
				3514	* rounded up with additional padding if the fields do not already fit that requirement:
				3515	* - sizeof(class UnicodeString)
				3516	* - offsetof(UnicodeString, fUnion)
				3517	* - sizeof(fUnion)
				3518	* - sizeof(fFields)
				3519	*
				3520	* In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
				3521	* which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
				3522	* (Padding at the end of fFields is ok:
				3523	* As long as there is no padding after fStackBuffer, it is not wasted space.)
				3524	*
				3525	* We further assume that the compiler does not reorder the fields,
				3526	* so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
				3527	* with at most some padding (but no other field) in between.
				3528	* (Padding there would be wasted space, but functionally harmless.)
				3529	*
				3530	* We use a few more sizeof(pointer)'s chunks of space with
				3531	* fRestOfStackBuffer, fShortLength and fFlags,
				3532	* to get up exactly to the intended sizeof(UnicodeString).
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3533	*/
				3534	// (implicit) *vtable;
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3535	union StackBufferOrFields {
				3536	// fStackBuffer is used iff (fFlags&kUsingStackBuffer)
				3537	// else fFields is used
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3538	UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3539	struct {
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3540	UChar *fArray; // the Unicode data
				3541	int32_t fCapacity; // capacity of fArray (in UChars)
				3542	int32_t fLength; // number of characters in fArray if >127; else undefined
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3543	} fFields;
				3544	} fUnion;
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	3545	UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
				3546	int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
				3547	uint8_t fFlags; // bit flags: see constants above
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3548	};
				3549
				3550	/**
				3551	* Create a new UnicodeString with the concatenation of two others.
				3552	*
				3553	* @param s1 The first string to be copied to the new one.
				3554	* @param s2 The second string to be copied to the new one, after s1.
				3555	* @return UnicodeString(s1).append(s2)
				3556	* @stable ICU 2.8
				3557	*/
				3558	U_COMMON_API UnicodeString U_EXPORT2
				3559	operator+ (const UnicodeString &s1, const UnicodeString &s2);
				3560
				3561	//========================================
				3562	// Inline members
				3563	//========================================
				3564
				3565	//========================================
				3566	// Privates
				3567	//========================================
				3568
				3569	inline void
				3570	UnicodeString::pinIndex(int32_t& start) const
				3571	{
				3572	// pin index
				3573	if(start < 0) {
				3574	start = 0;
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3575	} else if(start > length()) {
				3576	start = length();
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3577	}
				3578	}
				3579
				3580	inline void
				3581	UnicodeString::pinIndices(int32_t& start,
				3582	int32_t& _length) const
				3583	{
				3584	// pin indices
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3585	int32_t len = length();
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3586	if(start < 0) {
				3587	start = 0;
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3588	} else if(start > len) {
				3589	start = len;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3590	}
				3591	if(_length < 0) {
				3592	_length = 0;
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3593	} else if(_length > (len - start)) {
				3594	_length = (len - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3595	}
				3596	}
				3597
				3598	inline UChar*
				3599	UnicodeString::getArrayStart()
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3600	{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3601
				3602	inline const UChar*
				3603	UnicodeString::getArrayStart() const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3604	{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3605
				3606	//========================================
				3607	// Read-only implementation methods
				3608	//========================================
				3609	inline int32_t
				3610	UnicodeString::length() const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3611	{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3612
				3613	inline int32_t
				3614	UnicodeString::getCapacity() const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3615	{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3616
				3617	inline int32_t
				3618	UnicodeString::hashCode() const
				3619	{ return doHashCode(); }
				3620
				3621	inline UBool
				3622	UnicodeString::isBogus() const
				3623	{ return (UBool)(fFlags & kIsBogus); }
				3624
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3625	inline UBool
				3626	UnicodeString::isWritable() const
				3627	{ return (UBool)!(fFlags&(kOpenGetBuffer\|kIsBogus)); }
				3628
				3629	inline UBool
				3630	UnicodeString::isBufferWritable() const
				3631	{
				3632	return (UBool)(
				3633	!(fFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
				3634	(!(fFlags&kRefCounted) \|\| refCount()==1));
				3635	}
				3636
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3637	inline const UChar *
				3638	UnicodeString::getBuffer() const {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3639	if(fFlags&(kIsBogus\|kOpenGetBuffer)) {
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3640	return 0;
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3641	} else if(fFlags&kUsingStackBuffer) {
				3642	return fUnion.fStackBuffer;
				3643	} else {
				3644	return fUnion.fFields.fArray;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3645	}
				3646	}
				3647
				3648	//========================================
				3649	// Read-only alias methods
				3650	//========================================
				3651	inline int8_t
				3652	UnicodeString::doCompare(int32_t start,
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3653	int32_t thisLength,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3654	const UnicodeString& srcText,
				3655	int32_t srcStart,
				3656	int32_t srcLength) const
				3657	{
				3658	if(srcText.isBogus()) {
				3659	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
				3660	} else {
				3661	srcText.pinIndices(srcStart, srcLength);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3662	return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3663	}
				3664	}
				3665
				3666	inline UBool
				3667	UnicodeString::operator== (const UnicodeString& text) const
				3668	{
				3669	if(isBogus()) {
				3670	return text.isBogus();
				3671	} else {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3672	int32_t len = length(), textLength = text.length();
Craig Cornelius	54dcd9b	2013-02-15 14:03:14 -0800	[diff] [blame^]	3673	return !text.isBogus() && len == textLength && doEquals(text, len);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3674	}
				3675	}
				3676
				3677	inline UBool
				3678	UnicodeString::operator!= (const UnicodeString& text) const
				3679	{ return (! operator==(text)); }
				3680
				3681	inline UBool
				3682	UnicodeString::operator> (const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3683	{ return doCompare(0, length(), text, 0, text.length()) == 1; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3684
				3685	inline UBool
				3686	UnicodeString::operator< (const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3687	{ return doCompare(0, length(), text, 0, text.length()) == -1; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3688
				3689	inline UBool
				3690	UnicodeString::operator>= (const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3691	{ return doCompare(0, length(), text, 0, text.length()) != -1; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3692
				3693	inline UBool
				3694	UnicodeString::operator<= (const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3695	{ return doCompare(0, length(), text, 0, text.length()) != 1; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3696
				3697	inline int8_t
				3698	UnicodeString::compare(const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3699	{ return doCompare(0, length(), text, 0, text.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3700
				3701	inline int8_t
				3702	UnicodeString::compare(int32_t start,
				3703	int32_t _length,
				3704	const UnicodeString& srcText) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3705	{ return doCompare(start, _length, srcText, 0, srcText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3706
				3707	inline int8_t
				3708	UnicodeString::compare(const UChar *srcChars,
				3709	int32_t srcLength) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3710	{ return doCompare(0, length(), srcChars, 0, srcLength); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3711
				3712	inline int8_t
				3713	UnicodeString::compare(int32_t start,
				3714	int32_t _length,
				3715	const UnicodeString& srcText,
				3716	int32_t srcStart,
				3717	int32_t srcLength) const
				3718	{ return doCompare(start, _length, srcText, srcStart, srcLength); }
				3719
				3720	inline int8_t
				3721	UnicodeString::compare(int32_t start,
				3722	int32_t _length,
				3723	const UChar *srcChars) const
				3724	{ return doCompare(start, _length, srcChars, 0, _length); }
				3725
				3726	inline int8_t
				3727	UnicodeString::compare(int32_t start,
				3728	int32_t _length,
				3729	const UChar *srcChars,
				3730	int32_t srcStart,
				3731	int32_t srcLength) const
				3732	{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
				3733
				3734	inline int8_t
				3735	UnicodeString::compareBetween(int32_t start,
				3736	int32_t limit,
				3737	const UnicodeString& srcText,
				3738	int32_t srcStart,
				3739	int32_t srcLimit) const
				3740	{ return doCompare(start, limit - start,
				3741	srcText, srcStart, srcLimit - srcStart); }
				3742
				3743	inline int8_t
				3744	UnicodeString::doCompareCodePointOrder(int32_t start,
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3745	int32_t thisLength,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3746	const UnicodeString& srcText,
				3747	int32_t srcStart,
				3748	int32_t srcLength) const
				3749	{
				3750	if(srcText.isBogus()) {
				3751	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
				3752	} else {
				3753	srcText.pinIndices(srcStart, srcLength);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3754	return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3755	}
				3756	}
				3757
				3758	inline int8_t
				3759	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3760	{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3761
				3762	inline int8_t
				3763	UnicodeString::compareCodePointOrder(int32_t start,
				3764	int32_t _length,
				3765	const UnicodeString& srcText) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3766	{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3767
				3768	inline int8_t
				3769	UnicodeString::compareCodePointOrder(const UChar *srcChars,
				3770	int32_t srcLength) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3771	{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3772
				3773	inline int8_t
				3774	UnicodeString::compareCodePointOrder(int32_t start,
				3775	int32_t _length,
				3776	const UnicodeString& srcText,
				3777	int32_t srcStart,
				3778	int32_t srcLength) const
				3779	{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
				3780
				3781	inline int8_t
				3782	UnicodeString::compareCodePointOrder(int32_t start,
				3783	int32_t _length,
				3784	const UChar *srcChars) const
				3785	{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
				3786
				3787	inline int8_t
				3788	UnicodeString::compareCodePointOrder(int32_t start,
				3789	int32_t _length,
				3790	const UChar *srcChars,
				3791	int32_t srcStart,
				3792	int32_t srcLength) const
				3793	{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
				3794
				3795	inline int8_t
				3796	UnicodeString::compareCodePointOrderBetween(int32_t start,
				3797	int32_t limit,
				3798	const UnicodeString& srcText,
				3799	int32_t srcStart,
				3800	int32_t srcLimit) const
				3801	{ return doCompareCodePointOrder(start, limit - start,
				3802	srcText, srcStart, srcLimit - srcStart); }
				3803
				3804	inline int8_t
				3805	UnicodeString::doCaseCompare(int32_t start,
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3806	int32_t thisLength,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3807	const UnicodeString &srcText,
				3808	int32_t srcStart,
				3809	int32_t srcLength,
				3810	uint32_t options) const
				3811	{
				3812	if(srcText.isBogus()) {
				3813	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
				3814	} else {
				3815	srcText.pinIndices(srcStart, srcLength);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3816	return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3817	}
				3818	}
				3819
				3820	inline int8_t
				3821	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3822	return doCaseCompare(0, length(), text, 0, text.length(), options);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3823	}
				3824
				3825	inline int8_t
				3826	UnicodeString::caseCompare(int32_t start,
				3827	int32_t _length,
				3828	const UnicodeString &srcText,
				3829	uint32_t options) const {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3830	return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3831	}
				3832
				3833	inline int8_t
				3834	UnicodeString::caseCompare(const UChar *srcChars,
				3835	int32_t srcLength,
				3836	uint32_t options) const {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3837	return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3838	}
				3839
				3840	inline int8_t
				3841	UnicodeString::caseCompare(int32_t start,
				3842	int32_t _length,
				3843	const UnicodeString &srcText,
				3844	int32_t srcStart,
				3845	int32_t srcLength,
				3846	uint32_t options) const {
				3847	return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
				3848	}
				3849
				3850	inline int8_t
				3851	UnicodeString::caseCompare(int32_t start,
				3852	int32_t _length,
				3853	const UChar *srcChars,
				3854	uint32_t options) const {
				3855	return doCaseCompare(start, _length, srcChars, 0, _length, options);
				3856	}
				3857
				3858	inline int8_t
				3859	UnicodeString::caseCompare(int32_t start,
				3860	int32_t _length,
				3861	const UChar *srcChars,
				3862	int32_t srcStart,
				3863	int32_t srcLength,
				3864	uint32_t options) const {
				3865	return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
				3866	}
				3867
				3868	inline int8_t
				3869	UnicodeString::caseCompareBetween(int32_t start,
				3870	int32_t limit,
				3871	const UnicodeString &srcText,
				3872	int32_t srcStart,
				3873	int32_t srcLimit,
				3874	uint32_t options) const {
				3875	return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
				3876	}
				3877
				3878	inline int32_t
				3879	UnicodeString::indexOf(const UnicodeString& srcText,
				3880	int32_t srcStart,
				3881	int32_t srcLength,
				3882	int32_t start,
				3883	int32_t _length) const
				3884	{
				3885	if(!srcText.isBogus()) {
				3886	srcText.pinIndices(srcStart, srcLength);
				3887	if(srcLength > 0) {
				3888	return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
				3889	}
				3890	}
				3891	return -1;
				3892	}
				3893
				3894	inline int32_t
				3895	UnicodeString::indexOf(const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3896	{ return indexOf(text, 0, text.length(), 0, length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3897
				3898	inline int32_t
				3899	UnicodeString::indexOf(const UnicodeString& text,
				3900	int32_t start) const {
				3901	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3902	return indexOf(text, 0, text.length(), start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3903	}
				3904
				3905	inline int32_t
				3906	UnicodeString::indexOf(const UnicodeString& text,
				3907	int32_t start,
				3908	int32_t _length) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3909	{ return indexOf(text, 0, text.length(), start, _length); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3910
				3911	inline int32_t
				3912	UnicodeString::indexOf(const UChar *srcChars,
				3913	int32_t srcLength,
				3914	int32_t start) const {
				3915	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3916	return indexOf(srcChars, 0, srcLength, start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3917	}
				3918
				3919	inline int32_t
				3920	UnicodeString::indexOf(const UChar *srcChars,
				3921	int32_t srcLength,
				3922	int32_t start,
				3923	int32_t _length) const
				3924	{ return indexOf(srcChars, 0, srcLength, start, _length); }
				3925
				3926	inline int32_t
				3927	UnicodeString::indexOf(UChar c,
				3928	int32_t start,
				3929	int32_t _length) const
				3930	{ return doIndexOf(c, start, _length); }
				3931
				3932	inline int32_t
				3933	UnicodeString::indexOf(UChar32 c,
				3934	int32_t start,
				3935	int32_t _length) const
				3936	{ return doIndexOf(c, start, _length); }
				3937
				3938	inline int32_t
				3939	UnicodeString::indexOf(UChar c) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3940	{ return doIndexOf(c, 0, length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3941
				3942	inline int32_t
				3943	UnicodeString::indexOf(UChar32 c) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3944	{ return indexOf(c, 0, length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3945
				3946	inline int32_t
				3947	UnicodeString::indexOf(UChar c,
				3948	int32_t start) const {
				3949	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3950	return doIndexOf(c, start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3951	}
				3952
				3953	inline int32_t
				3954	UnicodeString::indexOf(UChar32 c,
				3955	int32_t start) const {
				3956	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3957	return indexOf(c, start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3958	}
				3959
				3960	inline int32_t
				3961	UnicodeString::lastIndexOf(const UChar *srcChars,
				3962	int32_t srcLength,
				3963	int32_t start,
				3964	int32_t _length) const
				3965	{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
				3966
				3967	inline int32_t
				3968	UnicodeString::lastIndexOf(const UChar *srcChars,
				3969	int32_t srcLength,
				3970	int32_t start) const {
				3971	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3972	return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3973	}
				3974
				3975	inline int32_t
				3976	UnicodeString::lastIndexOf(const UnicodeString& srcText,
				3977	int32_t srcStart,
				3978	int32_t srcLength,
				3979	int32_t start,
				3980	int32_t _length) const
				3981	{
				3982	if(!srcText.isBogus()) {
				3983	srcText.pinIndices(srcStart, srcLength);
				3984	if(srcLength > 0) {
				3985	return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
				3986	}
				3987	}
				3988	return -1;
				3989	}
				3990
				3991	inline int32_t
				3992	UnicodeString::lastIndexOf(const UnicodeString& text,
				3993	int32_t start,
				3994	int32_t _length) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	3995	{ return lastIndexOf(text, 0, text.length(), start, _length); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	3996
				3997	inline int32_t
				3998	UnicodeString::lastIndexOf(const UnicodeString& text,
				3999	int32_t start) const {
				4000	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4001	return lastIndexOf(text, 0, text.length(), start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4002	}
				4003
				4004	inline int32_t
				4005	UnicodeString::lastIndexOf(const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4006	{ return lastIndexOf(text, 0, text.length(), 0, length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4007
				4008	inline int32_t
				4009	UnicodeString::lastIndexOf(UChar c,
				4010	int32_t start,
				4011	int32_t _length) const
				4012	{ return doLastIndexOf(c, start, _length); }
				4013
				4014	inline int32_t
				4015	UnicodeString::lastIndexOf(UChar32 c,
				4016	int32_t start,
				4017	int32_t _length) const {
				4018	return doLastIndexOf(c, start, _length);
				4019	}
				4020
				4021	inline int32_t
				4022	UnicodeString::lastIndexOf(UChar c) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4023	{ return doLastIndexOf(c, 0, length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4024
				4025	inline int32_t
				4026	UnicodeString::lastIndexOf(UChar32 c) const {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4027	return lastIndexOf(c, 0, length());
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4028	}
				4029
				4030	inline int32_t
				4031	UnicodeString::lastIndexOf(UChar c,
				4032	int32_t start) const {
				4033	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4034	return doLastIndexOf(c, start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4035	}
				4036
				4037	inline int32_t
				4038	UnicodeString::lastIndexOf(UChar32 c,
				4039	int32_t start) const {
				4040	pinIndex(start);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4041	return lastIndexOf(c, start, length() - start);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4042	}
				4043
				4044	inline UBool
				4045	UnicodeString::startsWith(const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4046	{ return compare(0, text.length(), text, 0, text.length()) == 0; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4047
				4048	inline UBool
				4049	UnicodeString::startsWith(const UnicodeString& srcText,
				4050	int32_t srcStart,
				4051	int32_t srcLength) const
				4052	{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
				4053
				4054	inline UBool
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	4055	UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
				4056	if(srcLength < 0) {
				4057	srcLength = u_strlen(srcChars);
				4058	}
				4059	return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
				4060	}
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4061
				4062	inline UBool
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	4063	UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
				4064	if(srcLength < 0) {
				4065	srcLength = u_strlen(srcChars);
				4066	}
				4067	return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
				4068	}
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4069
				4070	inline UBool
				4071	UnicodeString::endsWith(const UnicodeString& text) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4072	{ return doCompare(length() - text.length(), text.length(),
				4073	text, 0, text.length()) == 0; }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4074
				4075	inline UBool
				4076	UnicodeString::endsWith(const UnicodeString& srcText,
				4077	int32_t srcStart,
				4078	int32_t srcLength) const {
				4079	srcText.pinIndices(srcStart, srcLength);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4080	return doCompare(length() - srcLength, srcLength,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4081	srcText, srcStart, srcLength) == 0;
				4082	}
				4083
				4084	inline UBool
				4085	UnicodeString::endsWith(const UChar *srcChars,
				4086	int32_t srcLength) const {
				4087	if(srcLength < 0) {
				4088	srcLength = u_strlen(srcChars);
				4089	}
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4090	return doCompare(length() - srcLength, srcLength,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4091	srcChars, 0, srcLength) == 0;
				4092	}
				4093
				4094	inline UBool
				4095	UnicodeString::endsWith(const UChar *srcChars,
				4096	int32_t srcStart,
				4097	int32_t srcLength) const {
				4098	if(srcLength < 0) {
				4099	srcLength = u_strlen(srcChars + srcStart);
				4100	}
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4101	return doCompare(length() - srcLength, srcLength,
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4102	srcChars, srcStart, srcLength) == 0;
				4103	}
				4104
				4105	//========================================
				4106	// replace
				4107	//========================================
				4108	inline UnicodeString&
				4109	UnicodeString::replace(int32_t start,
				4110	int32_t _length,
				4111	const UnicodeString& srcText)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4112	{ return doReplace(start, _length, srcText, 0, srcText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4113
				4114	inline UnicodeString&
				4115	UnicodeString::replace(int32_t start,
				4116	int32_t _length,
				4117	const UnicodeString& srcText,
				4118	int32_t srcStart,
				4119	int32_t srcLength)
				4120	{ return doReplace(start, _length, srcText, srcStart, srcLength); }
				4121
				4122	inline UnicodeString&
				4123	UnicodeString::replace(int32_t start,
				4124	int32_t _length,
				4125	const UChar *srcChars,
				4126	int32_t srcLength)
				4127	{ return doReplace(start, _length, srcChars, 0, srcLength); }
				4128
				4129	inline UnicodeString&
				4130	UnicodeString::replace(int32_t start,
				4131	int32_t _length,
				4132	const UChar *srcChars,
				4133	int32_t srcStart,
				4134	int32_t srcLength)
				4135	{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
				4136
				4137	inline UnicodeString&
				4138	UnicodeString::replace(int32_t start,
				4139	int32_t _length,
				4140	UChar srcChar)
				4141	{ return doReplace(start, _length, &srcChar, 0, 1); }
				4142
				4143	inline UnicodeString&
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4144	UnicodeString::replaceBetween(int32_t start,
				4145	int32_t limit,
				4146	const UnicodeString& srcText)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4147	{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4148
				4149	inline UnicodeString&
				4150	UnicodeString::replaceBetween(int32_t start,
				4151	int32_t limit,
				4152	const UnicodeString& srcText,
				4153	int32_t srcStart,
				4154	int32_t srcLimit)
				4155	{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
				4156
				4157	inline UnicodeString&
				4158	UnicodeString::findAndReplace(const UnicodeString& oldText,
				4159	const UnicodeString& newText)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4160	{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
				4161	newText, 0, newText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4162
				4163	inline UnicodeString&
				4164	UnicodeString::findAndReplace(int32_t start,
				4165	int32_t _length,
				4166	const UnicodeString& oldText,
				4167	const UnicodeString& newText)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4168	{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
				4169	newText, 0, newText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4170
				4171	// ============================
				4172	// extract
				4173	// ============================
				4174	inline void
				4175	UnicodeString::doExtract(int32_t start,
				4176	int32_t _length,
				4177	UnicodeString& target) const
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4178	{ target.replace(0, target.length(), *this, start, _length); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4179
				4180	inline void
				4181	UnicodeString::extract(int32_t start,
				4182	int32_t _length,
				4183	UChar *target,
				4184	int32_t targetStart) const
				4185	{ doExtract(start, _length, target, targetStart); }
				4186
				4187	inline void
				4188	UnicodeString::extract(int32_t start,
				4189	int32_t _length,
				4190	UnicodeString& target) const
				4191	{ doExtract(start, _length, target); }
				4192
				4193	#if !UCONFIG_NO_CONVERSION
				4194
				4195	inline int32_t
				4196	UnicodeString::extract(int32_t start,
				4197	int32_t _length,
				4198	char *dst,
				4199	const char *codepage) const
				4200
				4201	{
				4202	// This dstSize value will be checked explicitly
				4203	return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
				4204	}
				4205
				4206	#endif
				4207
				4208	inline void
				4209	UnicodeString::extractBetween(int32_t start,
				4210	int32_t limit,
				4211	UChar *dst,
				4212	int32_t dstStart) const {
				4213	pinIndex(start);
				4214	pinIndex(limit);
				4215	doExtract(start, limit - start, dst, dstStart);
				4216	}
				4217
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	4218	inline UnicodeString
				4219	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
				4220	return tempSubString(start, limit - start);
				4221	}
				4222
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4223	inline UChar
				4224	UnicodeString::doCharAt(int32_t offset) const
				4225	{
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4226	if((uint32_t)offset < (uint32_t)length()) {
				4227	return getArrayStart()[offset];
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4228	} else {
				4229	return kInvalidUChar;
				4230	}
				4231	}
				4232
				4233	inline UChar
				4234	UnicodeString::charAt(int32_t offset) const
				4235	{ return doCharAt(offset); }
				4236
				4237	inline UChar
				4238	UnicodeString::operator[] (int32_t offset) const
				4239	{ return doCharAt(offset); }
				4240
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4241	inline UBool
				4242	UnicodeString::isEmpty() const {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4243	return fShortLength == 0;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4244	}
				4245
				4246	//========================================
				4247	// Write implementation methods
				4248	//========================================
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4249	inline void
				4250	UnicodeString::setLength(int32_t len) {
				4251	if(len <= 127) {
				4252	fShortLength = (int8_t)len;
				4253	} else {
				4254	fShortLength = (int8_t)-1;
				4255	fUnion.fFields.fLength = len;
				4256	}
				4257	}
				4258
				4259	inline void
				4260	UnicodeString::setToEmpty() {
				4261	fShortLength = 0;
				4262	fFlags = kShortString;
				4263	}
				4264
				4265	inline void
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4266	UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
				4267	setLength(len);
				4268	fUnion.fFields.fArray = array;
				4269	fUnion.fFields.fCapacity = capacity;
				4270	}
				4271
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4272	inline const UChar *
				4273	UnicodeString::getTerminatedBuffer() {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4274	if(!isWritable()) {
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4275	return 0;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4276	} else {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4277	UChar *array = getArrayStart();
				4278	int32_t len = length();
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	4279	if(len < getCapacity() && ((fFlags&kRefCounted) == 0 \|\| refCount() == 1)) {
				4280	/*
				4281	* kRefCounted: Do not write the NUL if the buffer is shared.
				4282	* That is mostly safe, except when the length of one copy was modified
				4283	* without copy-on-write, e.g., via truncate(newLength) or remove(void).
				4284	* Then the NUL would be written into the middle of another copy's string.
				4285	*/
				4286	if(!(fFlags&kBufferIsReadonly)) {
				4287	/*
				4288	* We must not write to a readonly buffer, but it is known to be
				4289	* NUL-terminated if len<capacity.
				4290	* A shared, allocated buffer (refCount()>1) must not have its contents
				4291	* modified, but the NUL at [len] is beyond the string contents,
				4292	* and multiple string objects and threads writing the same NUL into the
				4293	* same location is harmless.
				4294	* In all other cases, the buffer is fully writable and it is anyway safe
				4295	* to write the NUL.
				4296	*
				4297	* Note: An earlier version of this code tested whether there is a NUL
				4298	* at [len] already, but, while safe, it generated lots of warnings from
				4299	* tools like valgrind and Purify.
				4300	*/
				4301	array[len] = 0;
				4302	}
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4303	return array;
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	4304	} else if(cloneArrayIfNeeded(len+1)) {
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4305	array = getArrayStart();
				4306	array[len] = 0;
				4307	return array;
				4308	} else {
				4309	return 0;
				4310	}
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4311	}
				4312	}
				4313
				4314	inline UnicodeString&
				4315	UnicodeString::operator= (UChar ch)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4316	{ return doReplace(0, length(), &ch, 0, 1); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4317
				4318	inline UnicodeString&
				4319	UnicodeString::operator= (UChar32 ch)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4320	{ return replace(0, length(), ch); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4321
				4322	inline UnicodeString&
				4323	UnicodeString::setTo(const UnicodeString& srcText,
				4324	int32_t srcStart,
				4325	int32_t srcLength)
				4326	{
				4327	unBogus();
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4328	return doReplace(0, length(), srcText, srcStart, srcLength);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4329	}
				4330
				4331	inline UnicodeString&
				4332	UnicodeString::setTo(const UnicodeString& srcText,
				4333	int32_t srcStart)
				4334	{
				4335	unBogus();
				4336	srcText.pinIndex(srcStart);
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4337	return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4338	}
				4339
				4340	inline UnicodeString&
				4341	UnicodeString::setTo(const UnicodeString& srcText)
				4342	{
claireho	b26ce3a	2012-01-10 17:54:41 -0800	[diff] [blame]	4343	return copyFrom(srcText);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4344	}
				4345
				4346	inline UnicodeString&
				4347	UnicodeString::setTo(const UChar *srcChars,
				4348	int32_t srcLength)
				4349	{
				4350	unBogus();
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4351	return doReplace(0, length(), srcChars, 0, srcLength);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4352	}
				4353
				4354	inline UnicodeString&
				4355	UnicodeString::setTo(UChar srcChar)
				4356	{
				4357	unBogus();
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4358	return doReplace(0, length(), &srcChar, 0, 1);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4359	}
				4360
				4361	inline UnicodeString&
				4362	UnicodeString::setTo(UChar32 srcChar)
				4363	{
				4364	unBogus();
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4365	return replace(0, length(), srcChar);
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4366	}
				4367
				4368	inline UnicodeString&
				4369	UnicodeString::append(const UnicodeString& srcText,
				4370	int32_t srcStart,
				4371	int32_t srcLength)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4372	{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4373
				4374	inline UnicodeString&
				4375	UnicodeString::append(const UnicodeString& srcText)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4376	{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4377
				4378	inline UnicodeString&
				4379	UnicodeString::append(const UChar *srcChars,
				4380	int32_t srcStart,
				4381	int32_t srcLength)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4382	{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4383
				4384	inline UnicodeString&
				4385	UnicodeString::append(const UChar *srcChars,
				4386	int32_t srcLength)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4387	{ return doReplace(length(), 0, srcChars, 0, srcLength); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4388
				4389	inline UnicodeString&
				4390	UnicodeString::append(UChar srcChar)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4391	{ return doReplace(length(), 0, &srcChar, 0, 1); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4392
				4393	inline UnicodeString&
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4394	UnicodeString::operator+= (UChar ch)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4395	{ return doReplace(length(), 0, &ch, 0, 1); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4396
				4397	inline UnicodeString&
				4398	UnicodeString::operator+= (UChar32 ch) {
				4399	return append(ch);
				4400	}
				4401
				4402	inline UnicodeString&
				4403	UnicodeString::operator+= (const UnicodeString& srcText)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4404	{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4405
				4406	inline UnicodeString&
				4407	UnicodeString::insert(int32_t start,
				4408	const UnicodeString& srcText,
				4409	int32_t srcStart,
				4410	int32_t srcLength)
				4411	{ return doReplace(start, 0, srcText, srcStart, srcLength); }
				4412
				4413	inline UnicodeString&
				4414	UnicodeString::insert(int32_t start,
				4415	const UnicodeString& srcText)
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4416	{ return doReplace(start, 0, srcText, 0, srcText.length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4417
				4418	inline UnicodeString&
				4419	UnicodeString::insert(int32_t start,
				4420	const UChar *srcChars,
				4421	int32_t srcStart,
				4422	int32_t srcLength)
				4423	{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
				4424
				4425	inline UnicodeString&
				4426	UnicodeString::insert(int32_t start,
				4427	const UChar *srcChars,
				4428	int32_t srcLength)
				4429	{ return doReplace(start, 0, srcChars, 0, srcLength); }
				4430
				4431	inline UnicodeString&
				4432	UnicodeString::insert(int32_t start,
				4433	UChar srcChar)
				4434	{ return doReplace(start, 0, &srcChar, 0, 1); }
				4435
				4436	inline UnicodeString&
				4437	UnicodeString::insert(int32_t start,
				4438	UChar32 srcChar)
				4439	{ return replace(start, 0, srcChar); }
				4440
				4441
				4442	inline UnicodeString&
				4443	UnicodeString::remove()
				4444	{
				4445	// remove() of a bogus string makes the string empty and non-bogus
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	4446	// we also un-alias a read-only alias to deal with NUL-termination
				4447	// issues with getTerminatedBuffer()
				4448	if(fFlags & (kIsBogus\|kBufferIsReadonly)) {
				4449	setToEmpty();
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4450	} else {
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	4451	fShortLength = 0;
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4452	}
				4453	return *this;
				4454	}
				4455
				4456	inline UnicodeString&
				4457	UnicodeString::remove(int32_t start,
				4458	int32_t _length)
				4459	{
				4460	if(start <= 0 && _length == INT32_MAX) {
				4461	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
				4462	return remove();
				4463	}
				4464	return doReplace(start, _length, NULL, 0, 0);
				4465	}
				4466
				4467	inline UnicodeString&
				4468	UnicodeString::removeBetween(int32_t start,
				4469	int32_t limit)
				4470	{ return doReplace(start, limit - start, NULL, 0, 0); }
				4471
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	4472	inline UnicodeString &
				4473	UnicodeString::retainBetween(int32_t start, int32_t limit) {
				4474	truncate(limit);
				4475	return doReplace(0, start, NULL, 0, 0);
				4476	}
				4477
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4478	inline UBool
				4479	UnicodeString::truncate(int32_t targetLength)
				4480	{
				4481	if(isBogus() && targetLength == 0) {
				4482	// truncate(0) of a bogus string makes the string empty and non-bogus
				4483	unBogus();
				4484	return FALSE;
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4485	} else if((uint32_t)targetLength < (uint32_t)length()) {
				4486	setLength(targetLength);
claireho	50294ea	2010-05-03 15:44:48 -0700	[diff] [blame]	4487	if(fFlags&kBufferIsReadonly) {
				4488	fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
				4489	}
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4490	return TRUE;
				4491	} else {
				4492	return FALSE;
				4493	}
				4494	}
				4495
				4496	inline UnicodeString&
				4497	UnicodeString::reverse()
Jean-Baptiste Queru	c69afce	2009-07-20 15:02:39 -0700	[diff] [blame]	4498	{ return doReverse(0, length()); }
Jean-Baptiste Queru	b13da9d	2009-07-17 17:53:22 -0700	[diff] [blame]	4499
				4500	inline UnicodeString&
				4501	UnicodeString::reverse(int32_t start,
				4502	int32_t _length)
				4503	{ return doReverse(start, _length); }
				4504
				4505	U_NAMESPACE_END
				4506
				4507	#endif