Blame - libandroidicu/include/unicode/ubrk.h - platform/external/icu

blob: 3077c50865bef287f160b1b699b7c6b92caa594e [file] [log] [blame]

Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/*
				4	******************************************************************************
				5	* Copyright (C) 1996-2015, International Business Machines Corporation and others.
				6	* All Rights Reserved.
				7	******************************************************************************
				8	*/
				9
				10	#ifndef UBRK_H
				11	#define UBRK_H
				12
				13	#include "unicode/utypes.h"
				14	#include "unicode/uloc.h"
				15	#include "unicode/utext.h"
Victor Chang	eaf32ab	2020-08-20 15:50:49 +0100	[diff] [blame]	16
				17	#if U_SHOW_CPLUSPLUS_API
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	18	#include "unicode/localpointer.h"
Victor Chang	eaf32ab	2020-08-20 15:50:49 +0100	[diff] [blame]	19	#endif // U_SHOW_CPLUSPLUS_API
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	20
				21	/**
				22	* A text-break iterator.
				23	* For usage in C programs.
				24	*/
				25	#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
				26	# define UBRK_TYPEDEF_UBREAK_ITERATOR
				27	/**
				28	* Opaque type representing an ICU Break iterator object.
				29	* @stable ICU 2.0
				30	*/
				31	typedef struct UBreakIterator UBreakIterator;
				32	#endif
				33
				34	#if !UCONFIG_NO_BREAK_ITERATION
				35
				36	#include "unicode/parseerr.h"
				37
				38	/**
				39	* \file
				40	* \brief C API: BreakIterator
				41	*
				42	* <h2> BreakIterator C API </h2>
				43	*
				44	* The BreakIterator C API defines methods for finding the location
				45	* of boundaries in text. Pointer to a UBreakIterator maintain a
				46	* current position and scan over text returning the index of characters
				47	* where boundaries occur.
				48	* <p>
				49	* Line boundary analysis determines where a text string can be broken
				50	* when line-wrapping. The mechanism correctly handles punctuation and
				51	* hyphenated words.
				52	* <p>
				53	* Note: The locale keyword "lb" can be used to modify line break
				54	* behavior according to the CSS level 3 line-break options, see
				55	* <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
				56	* "ja@lb=strict", "zh@lb=loose".
				57	* <p>
				58	* Sentence boundary analysis allows selection with correct
				59	* interpretation of periods within numbers and abbreviations, and
				60	* trailing punctuation marks such as quotation marks and parentheses.
				61	* <p>
				62	* Note: The locale keyword "ss" can be used to enable use of
				63	* segmentation suppression data (preventing breaks in English after
				64	* abbreviations such as "Mr." or "Est.", for example), as follows:
				65	* "en@ss=standard".
				66	* <p>
				67	* Word boundary analysis is used by search and replace functions, as
				68	* well as within text editing applications that allow the user to
				69	* select words with a double click. Word selection provides correct
				70	* interpretation of punctuation marks within and following
				71	* words. Characters that are not part of a word, such as symbols or
				72	* punctuation marks, have word-breaks on both sides.
				73	* <p>
				74	* Character boundary analysis identifies the boundaries of
				75	* "Extended Grapheme Clusters", which are groupings of codepoints
				76	* that should be treated as character-like units for many text operations.
				77	* Please see Unicode Standard Annex #29, Unicode Text Segmentation,
				78	* http://www.unicode.org/reports/tr29/ for additional information
				79	* on grapheme clusters and guidelines on their use.
				80	* <p>
				81	* Title boundary analysis locates all positions,
				82	* typically starts of words, that should be set to Title Case
				83	* when title casing the text.
				84	* <p>
				85	* The text boundary positions are found according to the rules
				86	* described in Unicode Standard Annex #29, Text Boundaries, and
				87	* Unicode Standard Annex #14, Line Breaking Properties. These
				88	* are available at http://www.unicode.org/reports/tr14/ and
				89	* http://www.unicode.org/reports/tr29/.
				90	* <p>
				91	* In addition to the plain C API defined in this header file, an
				92	* object oriented C++ API with equivalent functionality is defined in the
				93	* file brkiter.h.
				94	* <p>
				95	* Code snippets illustrating the use of the Break Iterator APIs
				96	* are available in the ICU User Guide,
				97	* http://icu-project.org/userguide/boundaryAnalysis.html
				98	* and in the sample program icu/source/samples/break/break.cpp
				99	*/
				100
				101	/** The possible types of text boundaries. @stable ICU 2.0 */
				102	typedef enum UBreakIteratorType {
				103	/** Character breaks @stable ICU 2.0 */
				104	UBRK_CHARACTER = 0,
				105	/** Word breaks @stable ICU 2.0 */
				106	UBRK_WORD = 1,
				107	/** Line breaks @stable ICU 2.0 */
				108	UBRK_LINE = 2,
				109	/** Sentence breaks @stable ICU 2.0 */
				110	UBRK_SENTENCE = 3,
				111
				112	#ifndef U_HIDE_DEPRECATED_API
				113	/**
				114	* Title Case breaks
				115	* The iterator created using this type locates title boundaries as described for
				116	* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
				117	* please use Word Boundary iterator.
				118	*
				119	* @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
				120	*/
				121	UBRK_TITLE = 4,
				122	/**
				123	* One more than the highest normal UBreakIteratorType value.
				124	* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
				125	*/
				126	UBRK_COUNT = 5
				127	#endif // U_HIDE_DEPRECATED_API
				128	} UBreakIteratorType;
				129
				130	/** Value indicating all text boundaries have been returned.
				131	* @stable ICU 2.0
				132	*/
				133	#define UBRK_DONE ((int32_t) -1)
				134
				135
				136	/**
				137	* Enum constants for the word break tags returned by
				138	* getRuleStatus(). A range of values is defined for each category of
				139	* word, to allow for further subdivisions of a category in future releases.
				140	* Applications should check for tag values falling within the range, rather
				141	* than for single individual values.
				142	*
				143	* The numeric values of all of these constants are stable (will not change).
				144	*
				145	* @stable ICU 2.2
				146	*/
				147	typedef enum UWordBreak {
				148	/** Tag value for "words" that do not fit into any of other categories.
				149	* Includes spaces and most punctuation. */
				150	UBRK_WORD_NONE = 0,
				151	/** Upper bound for tags for uncategorized words. */
				152	UBRK_WORD_NONE_LIMIT = 100,
				153	/** Tag value for words that appear to be numbers, lower limit. */
				154	UBRK_WORD_NUMBER = 100,
				155	/** Tag value for words that appear to be numbers, upper limit. */
				156	UBRK_WORD_NUMBER_LIMIT = 200,
				157	/** Tag value for words that contain letters, excluding
				158	* hiragana, katakana or ideographic characters, lower limit. */
				159	UBRK_WORD_LETTER = 200,
				160	/** Tag value for words containing letters, upper limit */
				161	UBRK_WORD_LETTER_LIMIT = 300,
				162	/** Tag value for words containing kana characters, lower limit */
				163	UBRK_WORD_KANA = 300,
				164	/** Tag value for words containing kana characters, upper limit */
				165	UBRK_WORD_KANA_LIMIT = 400,
				166	/** Tag value for words containing ideographic characters, lower limit */
				167	UBRK_WORD_IDEO = 400,
				168	/** Tag value for words containing ideographic characters, upper limit */
				169	UBRK_WORD_IDEO_LIMIT = 500
				170	} UWordBreak;
				171
				172	/**
				173	* Enum constants for the line break tags returned by getRuleStatus().
				174	* A range of values is defined for each category of
				175	* word, to allow for further subdivisions of a category in future releases.
				176	* Applications should check for tag values falling within the range, rather
				177	* than for single individual values.
				178	*
				179	* The numeric values of all of these constants are stable (will not change).
				180	*
				181	* @stable ICU 2.8
				182	*/
				183	typedef enum ULineBreakTag {
				184	/** Tag value for soft line breaks, positions at which a line break
				185	* is acceptable but not required */
				186	UBRK_LINE_SOFT = 0,
				187	/** Upper bound for soft line breaks. */
				188	UBRK_LINE_SOFT_LIMIT = 100,
				189	/** Tag value for a hard, or mandatory line break */
				190	UBRK_LINE_HARD = 100,
				191	/** Upper bound for hard line breaks. */
				192	UBRK_LINE_HARD_LIMIT = 200
				193	} ULineBreakTag;
				194
				195
				196
				197	/**
				198	* Enum constants for the sentence break tags returned by getRuleStatus().
				199	* A range of values is defined for each category of
				200	* sentence, to allow for further subdivisions of a category in future releases.
				201	* Applications should check for tag values falling within the range, rather
				202	* than for single individual values.
				203	*
				204	* The numeric values of all of these constants are stable (will not change).
				205	*
				206	* @stable ICU 2.8
				207	*/
				208	typedef enum USentenceBreakTag {
				209	/** Tag value for for sentences ending with a sentence terminator
				210	* ('.', '?', '!', etc.) character, possibly followed by a
				211	* hard separator (CR, LF, PS, etc.)
				212	*/
				213	UBRK_SENTENCE_TERM = 0,
				214	/** Upper bound for tags for sentences ended by sentence terminators. */
				215	UBRK_SENTENCE_TERM_LIMIT = 100,
				216	/** Tag value for for sentences that do not contain an ending
				217	* sentence terminator ('.', '?', '!', etc.) character, but
				218	* are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
				219	*/
				220	UBRK_SENTENCE_SEP = 100,
				221	/** Upper bound for tags for sentences ended by a separator. */
				222	UBRK_SENTENCE_SEP_LIMIT = 200
				223	/** Tag value for a hard, or mandatory line break */
				224	} USentenceBreakTag;
				225
				226
				227	/**
				228	* Open a new UBreakIterator for locating text boundaries for a specified locale.
				229	* A UBreakIterator may be used for detecting character, line, word,
				230	* and sentence breaks in text.
				231	* @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
				232	* UBRK_LINE, UBRK_SENTENCE
				233	* @param locale The locale specifying the text-breaking conventions. Note that
				234	* locale keys such as "lb" and "ss" may be used to modify text break behavior,
				235	* see general discussion of BreakIterator C API.
				236	* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
				237	* used to specify the text to be iterated.
				238	* @param textLength The number of characters in text, or -1 if null-terminated.
				239	* @param status A UErrorCode to receive any errors.
				240	* @return A UBreakIterator for the specified locale.
				241	* @see ubrk_openRules
				242	* @stable ICU 2.0
				243	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	244	U_CAPI UBreakIterator* U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	245	ubrk_open(UBreakIteratorType type,
				246	const char *locale,
				247	const UChar *text,
				248	int32_t textLength,
				249	UErrorCode *status);
				250
				251	/**
				252	* Open a new UBreakIterator for locating text boundaries using specified breaking rules.
				253	* The rule syntax is ... (TBD)
				254	* @param rules A set of rules specifying the text breaking conventions.
				255	* @param rulesLength The number of characters in rules, or -1 if null-terminated.
				256	* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
				257	* used to specify the text to be iterated.
				258	* @param textLength The number of characters in text, or -1 if null-terminated.
				259	* @param parseErr Receives position and context information for any syntax errors
				260	* detected while parsing the rules.
				261	* @param status A UErrorCode to receive any errors.
				262	* @return A UBreakIterator for the specified rules.
				263	* @see ubrk_open
				264	* @stable ICU 2.2
				265	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	266	U_CAPI UBreakIterator* U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	267	ubrk_openRules(const UChar *rules,
				268	int32_t rulesLength,
				269	const UChar *text,
				270	int32_t textLength,
				271	UParseError *parseErr,
				272	UErrorCode *status);
				273
				274	/**
				275	* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
				276	* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
				277	* Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
				278	* compatible across different major versions of ICU, nor across platforms of different
				279	* endianness or different base character set family (ASCII vs EBCDIC).
				280	* @param binaryRules A set of compiled binary rules specifying the text breaking
				281	* conventions. Ownership of the storage containing the compiled
				282	* rules remains with the caller of this function. The compiled
				283	* rules must not be modified or deleted during the life of the
				284	* break iterator.
				285	* @param rulesLength The length of binaryRules in bytes; must be >= 0.
				286	* @param text The text to be iterated over. May be null, in which case
				287	* ubrk_setText() is used to specify the text to be iterated.
				288	* @param textLength The number of characters in text, or -1 if null-terminated.
				289	* @param status Pointer to UErrorCode to receive any errors.
				290	* @return UBreakIterator for the specified rules.
				291	* @see ubrk_getBinaryRules
				292	* @stable ICU 59
				293	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	294	U_CAPI UBreakIterator* U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	295	ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
				296	const UChar * text, int32_t textLength,
				297	UErrorCode * status);
				298
Victor Chang	16f37a6	2021-02-01 22:20:48 +0000	[diff] [blame]	299	#ifndef U_HIDE_DEPRECATED_API
				300
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	301	/**
				302	* Thread safe cloning operation
				303	* @param bi iterator to be cloned
				304	* @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
				305	* user allocated space for the new clone. If NULL new memory will be allocated.
				306	* If buffer is not large enough, new memory will be allocated.
				307	* Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
				308	* @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
				309	* pointer to size of allocated space.
				310	* If *pBufferSize == 0, a sufficient size for use in cloning will
				311	* be returned ('pre-flighting')
				312	* If *pBufferSize is not enough for a stack-based safe clone,
				313	* new memory will be allocated.
				314	* @param status to indicate whether the operation went on smoothly or there were errors
				315	* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
				316	* @return pointer to the new clone
Victor Chang	16f37a6	2021-02-01 22:20:48 +0000	[diff] [blame]	317	* @deprecated ICU 69 Use ubrk_clone() instead.
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	318	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	319	U_CAPI UBreakIterator * U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	320	ubrk_safeClone(
				321	const UBreakIterator *bi,
				322	void *stackBuffer,
				323	int32_t *pBufferSize,
				324	UErrorCode *status);
				325
Victor Chang	16f37a6	2021-02-01 22:20:48 +0000	[diff] [blame]	326	#endif /* U_HIDE_DEPRECATED_API */
				327
Victor Chang	16f37a6	2021-02-01 22:20:48 +0000	[diff] [blame]	328
				329	/**
				330	* Thread safe cloning operation.
				331	* @param bi iterator to be cloned
				332	* @param status to indicate whether the operation went on smoothly or there were errors
				333	* @return pointer to the new clone
Victor Chang	e24e73a	2021-02-09 20:53:32 +0000	[diff] [blame^]	334	* @stable ICU 69
Victor Chang	16f37a6	2021-02-01 22:20:48 +0000	[diff] [blame]	335	*/
				336	U_CAPI UBreakIterator * U_EXPORT2
				337	ubrk_clone(const UBreakIterator *bi,
				338	UErrorCode *status);
				339
Victor Chang	16f37a6	2021-02-01 22:20:48 +0000	[diff] [blame]	340
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	341	#ifndef U_HIDE_DEPRECATED_API
				342
				343	/**
				344	* A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
				345	* @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
				346	*/
				347	#define U_BRK_SAFECLONE_BUFFERSIZE 1
				348
				349	#endif /* U_HIDE_DEPRECATED_API */
				350
				351	/**
				352	* Close a UBreakIterator.
				353	* Once closed, a UBreakIterator may no longer be used.
				354	* @param bi The break iterator to close.
				355	* @stable ICU 2.0
				356	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	357	U_CAPI void U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	358	ubrk_close(UBreakIterator *bi);
				359
				360	#if U_SHOW_CPLUSPLUS_API
				361
				362	U_NAMESPACE_BEGIN
				363
				364	/**
				365	* \class LocalUBreakIteratorPointer
				366	* "Smart pointer" class, closes a UBreakIterator via ubrk_close().
				367	* For most methods see the LocalPointerBase base class.
				368	*
				369	* @see LocalPointerBase
				370	* @see LocalPointer
				371	* @stable ICU 4.4
				372	*/
				373	U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
				374
				375	U_NAMESPACE_END
				376
				377	#endif
				378
				379	/**
				380	* Sets an existing iterator to point to a new piece of text.
				381	* The break iterator retains a pointer to the supplied text.
				382	* The caller must not modify or delete the text while the BreakIterator
				383	* retains the reference.
				384	*
				385	* @param bi The iterator to use
				386	* @param text The text to be set
				387	* @param textLength The length of the text
				388	* @param status The error code
				389	* @stable ICU 2.0
				390	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	391	U_CAPI void U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	392	ubrk_setText(UBreakIterator* bi,
				393	const UChar* text,
				394	int32_t textLength,
				395	UErrorCode* status);
				396
				397
				398	/**
				399	* Sets an existing iterator to point to a new piece of text.
				400	*
				401	* All index positions returned by break iterator functions are
				402	* native indices from the UText. For example, when breaking UTF-8
				403	* encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
				404	* will be UTF-8 string indices, not UTF-16 positions.
				405	*
				406	* @param bi The iterator to use
				407	* @param text The text to be set.
				408	* This function makes a shallow clone of the supplied UText. This means
				409	* that the caller is free to immediately close or otherwise reuse the
				410	* UText that was passed as a parameter, but that the underlying text itself
				411	* must not be altered while being referenced by the break iterator.
				412	* @param status The error code
				413	* @stable ICU 3.4
				414	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	415	U_CAPI void U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	416	ubrk_setUText(UBreakIterator* bi,
				417	UText* text,
				418	UErrorCode* status);
				419
				420
				421
				422	/**
				423	* Determine the most recently-returned text boundary.
				424	*
				425	* @param bi The break iterator to use.
				426	* @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
				427	* \ref ubrk_first, or \ref ubrk_last.
				428	* @stable ICU 2.0
				429	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	430	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	431	ubrk_current(const UBreakIterator *bi);
				432
				433	/**
				434	* Advance the iterator to the boundary following the current boundary.
				435	*
				436	* @param bi The break iterator to use.
				437	* @return The character index of the next text boundary, or UBRK_DONE
				438	* if all text boundaries have been returned.
				439	* @see ubrk_previous
				440	* @stable ICU 2.0
				441	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	442	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	443	ubrk_next(UBreakIterator *bi);
				444
				445	/**
				446	* Set the iterator position to the boundary preceding the current boundary.
				447	*
				448	* @param bi The break iterator to use.
				449	* @return The character index of the preceding text boundary, or UBRK_DONE
				450	* if all text boundaries have been returned.
				451	* @see ubrk_next
				452	* @stable ICU 2.0
				453	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	454	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	455	ubrk_previous(UBreakIterator *bi);
				456
				457	/**
				458	* Set the iterator position to zero, the start of the text being scanned.
				459	* @param bi The break iterator to use.
				460	* @return The new iterator position (zero).
				461	* @see ubrk_last
				462	* @stable ICU 2.0
				463	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	464	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	465	ubrk_first(UBreakIterator *bi);
				466
				467	/**
				468	* Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
				469	* This is not the same as the last character.
				470	* @param bi The break iterator to use.
				471	* @return The character offset immediately <EM>beyond</EM> the last character in the
				472	* text being scanned.
				473	* @see ubrk_first
				474	* @stable ICU 2.0
				475	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	476	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	477	ubrk_last(UBreakIterator *bi);
				478
				479	/**
				480	* Set the iterator position to the first boundary preceding the specified offset.
				481	* The new position is always smaller than offset, or UBRK_DONE.
				482	* @param bi The break iterator to use.
				483	* @param offset The offset to begin scanning.
				484	* @return The text boundary preceding offset, or UBRK_DONE.
				485	* @see ubrk_following
				486	* @stable ICU 2.0
				487	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	488	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	489	ubrk_preceding(UBreakIterator *bi,
				490	int32_t offset);
				491
				492	/**
				493	* Advance the iterator to the first boundary following the specified offset.
				494	* The value returned is always greater than offset, or UBRK_DONE.
				495	* @param bi The break iterator to use.
				496	* @param offset The offset to begin scanning.
				497	* @return The text boundary following offset, or UBRK_DONE.
				498	* @see ubrk_preceding
				499	* @stable ICU 2.0
				500	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	501	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	502	ubrk_following(UBreakIterator *bi,
				503	int32_t offset);
				504
				505	/**
				506	* Get a locale for which text breaking information is available.
				507	* A UBreakIterator in a locale returned by this function will perform the correct
				508	* text breaking for the locale.
				509	* @param index The index of the desired locale.
				510	* @return A locale for which number text breaking information is available, or 0 if none.
				511	* @see ubrk_countAvailable
				512	* @stable ICU 2.0
				513	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	514	U_CAPI const char* U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	515	ubrk_getAvailable(int32_t index);
				516
				517	/**
				518	* Determine how many locales have text breaking information available.
				519	* This function is most useful as determining the loop ending condition for
				520	* calls to \ref ubrk_getAvailable.
				521	* @return The number of locales for which text breaking information is available.
				522	* @see ubrk_getAvailable
				523	* @stable ICU 2.0
				524	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	525	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	526	ubrk_countAvailable(void);
				527
				528
				529	/**
				530	* Returns true if the specified position is a boundary position. As a side
				531	* effect, leaves the iterator pointing to the first boundary position at
				532	* or after "offset".
				533	* @param bi The break iterator to use.
				534	* @param offset the offset to check.
				535	* @return True if "offset" is a boundary position.
				536	* @stable ICU 2.0
				537	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	538	U_CAPI UBool U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	539	ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
				540
				541	/**
				542	* Return the status from the break rule that determined the most recently
				543	* returned break position. The values appear in the rule source
				544	* within brackets, {123}, for example. For rules that do not specify a
				545	* status, a default value of 0 is returned.
				546	* <p>
				547	* For word break iterators, the possible values are defined in enum UWordBreak.
				548	* @stable ICU 2.2
				549	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	550	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	551	ubrk_getRuleStatus(UBreakIterator *bi);
				552
				553	/**
				554	* Get the statuses from the break rules that determined the most recently
				555	* returned break position. The values appear in the rule source
				556	* within brackets, {123}, for example. The default status value for rules
				557	* that do not explicitly provide one is zero.
				558	* <p>
				559	* For word break iterators, the possible values are defined in enum UWordBreak.
				560	* @param bi The break iterator to use
				561	* @param fillInVec an array to be filled in with the status values.
				562	* @param capacity the length of the supplied vector. A length of zero causes
				563	* the function to return the number of status values, in the
				564	* normal way, without attempting to store any values.
				565	* @param status receives error codes.
				566	* @return The number of rule status values from rules that determined
				567	* the most recent boundary returned by the break iterator.
				568	* @stable ICU 3.0
				569	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	570	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	571	ubrk_getRuleStatusVec(UBreakIterator bi, int32_t fillInVec, int32_t capacity, UErrorCode *status);
				572
				573	/**
				574	* Return the locale of the break iterator. You can choose between the valid and
				575	* the actual locale.
				576	* @param bi break iterator
				577	* @param type locale type (valid or actual)
				578	* @param status error code
				579	* @return locale string
				580	* @stable ICU 2.8
				581	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	582	U_CAPI const char* U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	583	ubrk_getLocaleByType(const UBreakIterator bi, ULocDataLocaleType type, UErrorCode status);
				584
				585	/**
				586	* Set the subject text string upon which the break iterator is operating
				587	* without changing any other aspect of the state.
				588	* The new and previous text strings must have the same content.
				589	*
				590	* This function is intended for use in environments where ICU is operating on
				591	* strings that may move around in memory. It provides a mechanism for notifying
				592	* ICU that the string has been relocated, and providing a new UText to access the
				593	* string in its new position.
				594	*
				595	* Note that the break iterator never copies the underlying text
				596	* of a string being processed, but always operates directly on the original text
				597	* provided by the user. Refreshing simply drops the references to the old text
				598	* and replaces them with references to the new.
				599	*
				600	* Caution: this function is normally used only by very specialized
				601	* system-level code. One example use case is with garbage collection
				602	* that moves the text in memory.
				603	*
				604	* @param bi The break iterator.
				605	* @param text The new (moved) text string.
				606	* @param status Receives errors detected by this function.
				607	*
				608	* @stable ICU 49
				609	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	610	U_CAPI void U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	611	ubrk_refreshUText(UBreakIterator *bi,
				612	UText *text,
				613	UErrorCode *status);
				614
				615
				616	/**
				617	* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
				618	* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
				619	* more quickly than using ubrk_openRules. The compiled rules are not compatible across
				620	* different major versions of ICU, nor across platforms of different endianness or
				621	* different base character set family (ASCII vs EBCDIC). Supports preflighting (with
				622	* binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
				623	* the binaryRules buffer. However, whether preflighting or not, if the actual length
				624	* is greater than INT32_MAX, then the function returns 0 and sets *status to
				625	* U_INDEX_OUTOFBOUNDS_ERROR.
				626
				627	* @param bi The break iterator to use.
				628	* @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
				629	* preflighting.
				630	* @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
				631	* preflighting. Must be >= 0.
				632	* @param status Pointer to UErrorCode to receive any errors, such as
				633	* U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
				634	* U_ILLEGAL_ARGUMENT_ERROR.
				635	* @return The actual byte length of the binary rules, if <= INT32_MAX;
				636	* otherwise 0. If not preflighting and this is larger than
				637	* rulesCapacity, *status will be set to an error.
				638	* @see ubrk_openBinaryRules
				639	* @stable ICU 59
				640	*/
Victor Chang	0813948	2021-01-19 16:28:40 +0000	[diff] [blame]	641	U_CAPI int32_t U_EXPORT2
Victor Chang	cacdd87	2018-10-24 03:31:13 +0100	[diff] [blame]	642	ubrk_getBinaryRules(UBreakIterator *bi,
				643	uint8_t * binaryRules, int32_t rulesCapacity,
				644	UErrorCode * status);
				645
				646	#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
				647
				648	#endif