Blame - libicu/cts_headers/util.h - platform/external/icu

blob: 9c3b76d9ed5f0de486e50b69b39c7b2bc95f0150 [file] [log] [blame]

Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/*
				4	**********************************************************************
				5	* Copyright (c) 2001-2011, International Business Machines
				6	* Corporation and others. All Rights Reserved.
				7	**********************************************************************
				8	* Date Name Description
				9	* 11/19/2001 aliu Creation.
				10	**********************************************************************
				11	*/
				12
				13	#ifndef ICU_UTIL_H
				14	#define ICU_UTIL_H
				15
				16	#include "unicode/utypes.h"
				17	#include "unicode/uobject.h"
				18	#include "unicode/unistr.h"
				19
				20	//--------------------------------------------------------------------
				21	// class ICU_Utility
				22	// i18n utility functions, scoped into the class ICU_Utility.
				23	//--------------------------------------------------------------------
				24
				25	U_NAMESPACE_BEGIN
				26
				27	class UnicodeMatcher;
				28
				29	class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
				30	public:
				31
				32	/**
				33	* Append a number to the given UnicodeString in the given radix.
				34	* Standard digits '0'-'9' are used and letters 'A'-'Z' for
				35	* radices 11 through 36.
				36	* @param result the digits of the number are appended here
				37	* @param n the number to be converted to digits; may be negative.
				38	* If negative, a '-' is prepended to the digits.
				39	* @param radix a radix from 2 to 36 inclusive.
				40	* @param minDigits the minimum number of digits, not including
				41	* any '-', to produce. Values less than 2 have no effect. One
				42	* digit is always emitted regardless of this parameter.
				43	* @return a reference to result
				44	*/
				45	static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
				46	int32_t radix = 10,
				47	int32_t minDigits = 1);
				48
				49	/** Returns a bogus UnicodeString by value. */
				50	static inline UnicodeString makeBogusString() {
				51	UnicodeString result;
				52	result.setToBogus();
				53	return result;
				54	}
				55
				56	/**
				57	* Return true if the character is NOT printable ASCII.
				58	*
				59	* This method should really be in UnicodeString (or similar). For
				60	* now, we implement it here and share it with friend classes.
				61	*/
				62	static UBool isUnprintable(UChar32 c);
				63
				64	/**
				65	* Escape unprintable characters using \uxxxx notation for U+0000 to
				66	* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	67	* printable ASCII, then do nothing and return false. Otherwise,
				68	* append the escaped notation and return true.
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	69	*/
				70	static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
				71
				72	/**
				73	* Returns the index of a character, ignoring quoted text.
				74	* For example, in the string "abc'hide'h", the 'h' in "hide" will not be
				75	* found by a search for 'h'.
				76	* @param text text to be searched
				77	* @param start the beginning index, inclusive; <code>0 <= start
				78	* <= limit</code>.
				79	* @param limit the ending index, exclusive; <code>start <= limit
				80	* <= text.length()</code>.
				81	* @param c character to search for
				82	* @return Offset of the first instance of c, or -1 if not found.
				83	*/
				84	//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
				85	// static int32_t quotedIndexOf(const UnicodeString& text,
				86	// int32_t start, int32_t limit,
				87	// UChar c);
				88
				89	/**
				90	* Skip over a sequence of zero or more white space characters at pos.
				91	* @param advance if true, advance pos to the first non-white-space
				92	* character at or after pos, or str.length(), if there is none.
				93	* Otherwise leave pos unchanged.
				94	* @return the index of the first non-white-space character at or
				95	* after pos, or str.length(), if there is none.
				96	*/
				97	static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
Victor Chang	ce4bf3c	2021-01-19 16:34:24 +0000	[diff] [blame]	98	UBool advance = false);
Victor Chang	7322950	2020-09-17 13:39:19 +0100	[diff] [blame]	99
				100	/**
				101	* Skip over Pattern_White_Space in a Replaceable.
				102	* Skipping may be done in the forward or
				103	* reverse direction. In either case, the leftmost index will be
				104	* inclusive, and the rightmost index will be exclusive. That is,
				105	* given a range defined as [start, limit), the call
				106	* skipWhitespace(text, start, limit) will advance start past leading
				107	* whitespace, whereas the call skipWhitespace(text, limit, start),
				108	* will back up limit past trailing whitespace.
				109	* @param text the text to be analyzed
				110	* @param pos either the start or limit of a range of 'text', to skip
				111	* leading or trailing whitespace, respectively
				112	* @param stop either the limit or start of a range of 'text', to skip
				113	* leading or trailing whitespace, respectively
				114	* @return the new start or limit, depending on what was passed in to
				115	* 'pos'
				116	*/
				117	//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
				118	//? static int32_t skipWhitespace(const Replaceable& text,
				119	//? int32_t pos, int32_t stop);
				120
				121	/**
				122	* Parse a single non-whitespace character 'ch', optionally
				123	* preceded by whitespace.
				124	* @param id the string to be parsed
				125	* @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
				126	* offset of the first character to be parsed. On output, pos[0]
				127	* is the index after the last parsed character. If the parse
				128	* fails, pos[0] will be unchanged.
				129	* @param ch the non-whitespace character to be parsed.
				130	* @return true if 'ch' is seen preceded by zero or more
				131	* whitespace characters.
				132	*/
				133	static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);
				134
				135	/**
				136	* Parse a pattern string starting at offset pos. Keywords are
				137	* matched case-insensitively. Spaces may be skipped and may be
				138	* optional or required. Integer values may be parsed, and if
				139	* they are, they will be returned in the given array. If
				140	* successful, the offset of the next non-space character is
				141	* returned. On failure, -1 is returned.
				142	* @param pattern must only contain lowercase characters, which
				143	* will match their uppercase equivalents as well. A space
				144	* character matches one or more required spaces. A '~' character
				145	* matches zero or more optional spaces. A '#' character matches
				146	* an integer and stores it in parsedInts, which the caller must
				147	* ensure has enough capacity.
				148	* @param parsedInts array to receive parsed integers. Caller
				149	* must ensure that parsedInts.length is >= the number of '#'
				150	* signs in 'pattern'.
				151	* @return the position after the last character parsed, or -1 if
				152	* the parse failed
				153	*/
				154	static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
				155	const UnicodeString& pattern, int32_t* parsedInts);
				156
				157	/**
				158	* Parse a pattern string within the given Replaceable and a parsing
				159	* pattern. Characters are matched literally and case-sensitively
				160	* except for the following special characters:
				161	*
				162	* ~ zero or more Pattern_White_Space chars
				163	*
				164	* If end of pattern is reached with all matches along the way,
				165	* pos is advanced to the first unparsed index and returned.
				166	* Otherwise -1 is returned.
				167	* @param pat pattern that controls parsing
				168	* @param text text to be parsed, starting at index
				169	* @param index offset to first character to parse
				170	* @param limit offset after last character to parse
				171	* @return index after last parsed character, or -1 on parse failure.
				172	*/
				173	static int32_t parsePattern(const UnicodeString& pat,
				174	const Replaceable& text,
				175	int32_t index,
				176	int32_t limit);
				177
				178	/**
				179	* Parse an integer at pos, either of the form \d+ or of the form
				180	* 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
				181	* or octal format.
				182	* @param pos INPUT-OUTPUT parameter. On input, the index of the first
				183	* character to parse. On output, the index of the character after the
				184	* last parsed character.
				185	*/
				186	static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
				187
				188	/**
				189	* Parse an integer at pos using only ASCII digits.
				190	* Base 10 only.
				191	* @param pos INPUT-OUTPUT parameter. On input, the index of the first
				192	* character to parse. On output, the index of the character after the
				193	* last parsed character.
				194	*/
				195	static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos);
				196
				197	/**
				198	* Parse a Unicode identifier from the given string at the given
				199	* position. Return the identifier, or an empty string if there
				200	* is no identifier.
				201	* @param str the string to parse
				202	* @param pos INPUT-OUPUT parameter. On INPUT, pos is the
				203	* first character to examine. It must be less than str.length(),
				204	* and it must not point to a whitespace character. That is, must
				205	* have pos < str.length() and
				206	* !UCharacter::isWhitespace(str.char32At(pos)). On
				207	* OUTPUT, the position after the last parsed character.
				208	* @return the Unicode identifier, or an empty string if there is
				209	* no valid identifier at pos.
				210	*/
				211	static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
				212
				213	/**
				214	* Parse an unsigned 31-bit integer at the given offset. Use
				215	* UCharacter.digit() to parse individual characters into digits.
				216	* @param text the text to be parsed
				217	* @param pos INPUT-OUTPUT parameter. On entry, pos is the
				218	* offset within text at which to start parsing; it should point
				219	* to a valid digit. On exit, pos is the offset after the last
				220	* parsed character. If the parse failed, it will be unchanged on
				221	* exit. Must be >= 0 on entry.
				222	* @param radix the radix in which to parse; must be >= 2 and <=
				223	* 36.
				224	* @return a non-negative parsed number, or -1 upon parse failure.
				225	* Parse fails if there are no digits, that is, if pos does not
				226	* point to a valid digit on entry, or if the number to be parsed
				227	* does not fit into a 31-bit unsigned integer.
				228	*/
				229	static int32_t parseNumber(const UnicodeString& text,
				230	int32_t& pos, int8_t radix);
				231
				232	static void appendToRule(UnicodeString& rule,
				233	UChar32 c,
				234	UBool isLiteral,
				235	UBool escapeUnprintable,
				236	UnicodeString& quoteBuf);
				237
				238	static void appendToRule(UnicodeString& rule,
				239	const UnicodeString& text,
				240	UBool isLiteral,
				241	UBool escapeUnprintable,
				242	UnicodeString& quoteBuf);
				243
				244	static void appendToRule(UnicodeString& rule,
				245	const UnicodeMatcher* matcher,
				246	UBool escapeUnprintable,
				247	UnicodeString& quoteBuf);
				248
				249	private:
				250	// do not instantiate
				251	ICU_Utility();
				252	};
				253
				254	U_NAMESPACE_END
				255
				256	#endif
				257	//eof