blob: b8b6c3b84666a83150f329cda0197c1bb3de2b69 [file] [log] [blame]
Victor Chang2b8ba1e2020-08-19 18:14:25 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2011 International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uiter.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002jan18
16* created by: Markus W. Scherer
17*/
18
19#ifndef __UITER_H__
20#define __UITER_H__
21
22/**
23 * \file
24 * \brief C API: Unicode Character Iteration
25 *
26 * @see UCharIterator
27 */
28
29#include "unicode/utypes.h"
30
31#if U_SHOW_CPLUSPLUS_API
32 U_NAMESPACE_BEGIN
33
34 class CharacterIterator;
35 class Replaceable;
36
37 U_NAMESPACE_END
38#endif
39
40U_CDECL_BEGIN
41
42struct UCharIterator;
43typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
44
45/**
46 * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
47 * @see UCharIteratorMove
48 * @see UCharIterator
49 * @stable ICU 2.1
50 */
51typedef enum UCharIteratorOrigin {
52 UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
53} UCharIteratorOrigin;
54
55/** Constants for UCharIterator. @stable ICU 2.6 */
56enum {
57 /**
58 * Constant value that may be returned by UCharIteratorMove
59 * indicating that the final UTF-16 index is not known, but that the move succeeded.
60 * This can occur when moving relative to limit or length, or
61 * when moving relative to the current index after a setState()
62 * when the current UTF-16 index is not known.
63 *
64 * It would be very inefficient to have to count from the beginning of the text
65 * just to get the current/limit/length index after moving relative to it.
66 * The actual index can be determined with getIndex(UITER_CURRENT)
67 * which will count the UChars if necessary.
68 *
69 * @stable ICU 2.6
70 */
71 UITER_UNKNOWN_INDEX=-2
72};
73
74
75/**
76 * Constant for UCharIterator getState() indicating an error or
77 * an unknown state.
78 * Returned by uiter_getState()/UCharIteratorGetState
79 * when an error occurs.
80 * Also, some UCharIterator implementations may not be able to return
81 * a valid state for each position. This will be clearly documented
82 * for each such iterator (none of the public ones here).
83 *
84 * @stable ICU 2.6
85 */
86#define UITER_NO_STATE ((uint32_t)0xffffffff)
87
88/**
89 * Function type declaration for UCharIterator.getIndex().
90 *
91 * Gets the current position, or the start or limit of the
92 * iteration range.
93 *
94 * This function may perform slowly for UITER_CURRENT after setState() was called,
95 * or for UITER_LENGTH, because an iterator implementation may have to count
96 * UChars if the underlying storage is not UTF-16.
97 *
98 * @param iter the UCharIterator structure ("this pointer")
99 * @param origin get the 0, start, limit, length, or current index
100 * @return the requested index, or U_SENTINEL in an error condition
101 *
102 * @see UCharIteratorOrigin
103 * @see UCharIterator
104 * @stable ICU 2.1
105 */
106typedef int32_t U_CALLCONV
107UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
108
109/**
110 * Function type declaration for UCharIterator.move().
111 *
112 * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
113 *
114 * Moves the current position relative to the start or limit of the
115 * iteration range, or relative to the current position itself.
116 * The movement is expressed in numbers of code units forward
117 * or backward by specifying a positive or negative delta.
118 * Out of bounds movement will be pinned to the start or limit.
119 *
120 * This function may perform slowly for moving relative to UITER_LENGTH
121 * because an iterator implementation may have to count the rest of the
122 * UChars if the native storage is not UTF-16.
123 *
124 * When moving relative to the limit or length, or
125 * relative to the current position after setState() was called,
126 * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
127 * determination of the actual UTF-16 index.
128 * The actual index can be determined with getIndex(UITER_CURRENT)
129 * which will count the UChars if necessary.
130 * See UITER_UNKNOWN_INDEX for details.
131 *
132 * @param iter the UCharIterator structure ("this pointer")
133 * @param delta can be positive, zero, or negative
134 * @param origin move relative to the 0, start, limit, length, or current index
135 * @return the new index, or U_SENTINEL on an error condition,
136 * or UITER_UNKNOWN_INDEX when the index is not known.
137 *
138 * @see UCharIteratorOrigin
139 * @see UCharIterator
140 * @see UITER_UNKNOWN_INDEX
141 * @stable ICU 2.1
142 */
143typedef int32_t U_CALLCONV
144UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
145
146/**
147 * Function type declaration for UCharIterator.hasNext().
148 *
149 * Check if current() and next() can still
150 * return another code unit.
151 *
152 * @param iter the UCharIterator structure ("this pointer")
153 * @return boolean value for whether current() and next() can still return another code unit
154 *
155 * @see UCharIterator
156 * @stable ICU 2.1
157 */
158typedef UBool U_CALLCONV
159UCharIteratorHasNext(UCharIterator *iter);
160
161/**
162 * Function type declaration for UCharIterator.hasPrevious().
163 *
164 * Check if previous() can still return another code unit.
165 *
166 * @param iter the UCharIterator structure ("this pointer")
167 * @return boolean value for whether previous() can still return another code unit
168 *
169 * @see UCharIterator
170 * @stable ICU 2.1
171 */
172typedef UBool U_CALLCONV
173UCharIteratorHasPrevious(UCharIterator *iter);
174
175/**
176 * Function type declaration for UCharIterator.current().
177 *
178 * Return the code unit at the current position,
179 * or U_SENTINEL if there is none (index is at the limit).
180 *
181 * @param iter the UCharIterator structure ("this pointer")
182 * @return the current code unit
183 *
184 * @see UCharIterator
185 * @stable ICU 2.1
186 */
187typedef UChar32 U_CALLCONV
188UCharIteratorCurrent(UCharIterator *iter);
189
190/**
191 * Function type declaration for UCharIterator.next().
192 *
193 * Return the code unit at the current index and increment
194 * the index (post-increment, like s[i++]),
195 * or return U_SENTINEL if there is none (index is at the limit).
196 *
197 * @param iter the UCharIterator structure ("this pointer")
198 * @return the current code unit (and post-increment the current index)
199 *
200 * @see UCharIterator
201 * @stable ICU 2.1
202 */
203typedef UChar32 U_CALLCONV
204UCharIteratorNext(UCharIterator *iter);
205
206/**
207 * Function type declaration for UCharIterator.previous().
208 *
209 * Decrement the index and return the code unit from there
210 * (pre-decrement, like s[--i]),
211 * or return U_SENTINEL if there is none (index is at the start).
212 *
213 * @param iter the UCharIterator structure ("this pointer")
214 * @return the previous code unit (after pre-decrementing the current index)
215 *
216 * @see UCharIterator
217 * @stable ICU 2.1
218 */
219typedef UChar32 U_CALLCONV
220UCharIteratorPrevious(UCharIterator *iter);
221
222/**
223 * Function type declaration for UCharIterator.reservedFn().
224 * Reserved for future use.
225 *
226 * @param iter the UCharIterator structure ("this pointer")
227 * @param something some integer argument
228 * @return some integer
229 *
230 * @see UCharIterator
231 * @stable ICU 2.1
232 */
233typedef int32_t U_CALLCONV
234UCharIteratorReserved(UCharIterator *iter, int32_t something);
235
236/**
237 * Function type declaration for UCharIterator.getState().
238 *
239 * Get the "state" of the iterator in the form of a single 32-bit word.
240 * It is recommended that the state value be calculated to be as small as
241 * is feasible. For strings with limited lengths, fewer than 32 bits may
242 * be sufficient.
243 *
244 * This is used together with setState()/UCharIteratorSetState
245 * to save and restore the iterator position more efficiently than with
246 * getIndex()/move().
247 *
248 * The iterator state is defined as a uint32_t value because it is designed
249 * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
250 * of the character iterator.
251 *
252 * With some UCharIterator implementations (e.g., UTF-8),
253 * getting and setting the UTF-16 index with existing functions
254 * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
255 * relatively slow because the iterator has to "walk" from a known index
256 * to the requested one.
257 * This takes more time the farther it needs to go.
258 *
259 * An opaque state value allows an iterator implementation to provide
260 * an internal index (UTF-8: the source byte array index) for
261 * fast, constant-time restoration.
262 *
263 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
264 * the UTF-16 index may not be restored as well, but the iterator can deliver
265 * the correct text contents and move relative to the current position
266 * without performance degradation.
267 *
268 * Some UCharIterator implementations may not be able to return
269 * a valid state for each position, in which case they return UITER_NO_STATE instead.
270 * This will be clearly documented for each such iterator (none of the public ones here).
271 *
272 * @param iter the UCharIterator structure ("this pointer")
273 * @return the state word
274 *
275 * @see UCharIterator
276 * @see UCharIteratorSetState
277 * @see UITER_NO_STATE
278 * @stable ICU 2.6
279 */
280typedef uint32_t U_CALLCONV
281UCharIteratorGetState(const UCharIterator *iter);
282
283/**
284 * Function type declaration for UCharIterator.setState().
285 *
286 * Restore the "state" of the iterator using a state word from a getState() call.
287 * The iterator object need not be the same one as for which getState() was called,
288 * but it must be of the same type (set up using the same uiter_setXYZ function)
289 * and it must iterate over the same string
290 * (binary identical regardless of memory address).
291 * For more about the state word see UCharIteratorGetState.
292 *
293 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
294 * the UTF-16 index may not be restored as well, but the iterator can deliver
295 * the correct text contents and move relative to the current position
296 * without performance degradation.
297 *
298 * @param iter the UCharIterator structure ("this pointer")
299 * @param state the state word from a getState() call
300 * on a same-type, same-string iterator
301 * @param pErrorCode Must be a valid pointer to an error code value,
302 * which must not indicate a failure before the function call.
303 *
304 * @see UCharIterator
305 * @see UCharIteratorGetState
306 * @stable ICU 2.6
307 */
308typedef void U_CALLCONV
309UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
310
311
312/**
313 * C API for code unit iteration.
314 * This can be used as a C wrapper around
315 * CharacterIterator, Replaceable, or implemented using simple strings, etc.
316 *
317 * There are two roles for using UCharIterator:
318 *
319 * A "provider" sets the necessary function pointers and controls the "protected"
320 * fields of the UCharIterator structure. A "provider" passes a UCharIterator
321 * into C APIs that need a UCharIterator as an abstract, flexible string interface.
322 *
323 * Implementations of such C APIs are "callers" of UCharIterator functions;
324 * they only use the "public" function pointers and never access the "protected"
325 * fields directly.
326 *
327 * The current() and next() functions only check the current index against the
328 * limit, and previous() only checks the current index against the start,
329 * to see if the iterator already reached the end of the iteration range.
330 *
331 * The assumption - in all iterators - is that the index is moved via the API,
332 * which means it won't go out of bounds, or the index is modified by
333 * user code that knows enough about the iterator implementation to set valid
334 * index values.
335 *
336 * UCharIterator functions return code unit values 0..0xffff,
337 * or U_SENTINEL if the iteration bounds are reached.
338 *
339 * @stable ICU 2.1
340 */
341struct UCharIterator {
342 /**
343 * (protected) Pointer to string or wrapped object or similar.
344 * Not used by caller.
345 * @stable ICU 2.1
346 */
347 const void *context;
348
349 /**
350 * (protected) Length of string or similar.
351 * Not used by caller.
352 * @stable ICU 2.1
353 */
354 int32_t length;
355
356 /**
357 * (protected) Start index or similar.
358 * Not used by caller.
359 * @stable ICU 2.1
360 */
361 int32_t start;
362
363 /**
364 * (protected) Current index or similar.
365 * Not used by caller.
366 * @stable ICU 2.1
367 */
368 int32_t index;
369
370 /**
371 * (protected) Limit index or similar.
372 * Not used by caller.
373 * @stable ICU 2.1
374 */
375 int32_t limit;
376
377 /**
378 * (protected) Used by UTF-8 iterators and possibly others.
379 * @stable ICU 2.1
380 */
381 int32_t reservedField;
382
383 /**
384 * (public) Returns the current position or the
385 * start or limit index of the iteration range.
386 *
387 * @see UCharIteratorGetIndex
388 * @stable ICU 2.1
389 */
390 UCharIteratorGetIndex *getIndex;
391
392 /**
393 * (public) Moves the current position relative to the start or limit of the
394 * iteration range, or relative to the current position itself.
395 * The movement is expressed in numbers of code units forward
396 * or backward by specifying a positive or negative delta.
397 *
398 * @see UCharIteratorMove
399 * @stable ICU 2.1
400 */
401 UCharIteratorMove *move;
402
403 /**
404 * (public) Check if current() and next() can still
405 * return another code unit.
406 *
407 * @see UCharIteratorHasNext
408 * @stable ICU 2.1
409 */
410 UCharIteratorHasNext *hasNext;
411
412 /**
413 * (public) Check if previous() can still return another code unit.
414 *
415 * @see UCharIteratorHasPrevious
416 * @stable ICU 2.1
417 */
418 UCharIteratorHasPrevious *hasPrevious;
419
420 /**
421 * (public) Return the code unit at the current position,
422 * or U_SENTINEL if there is none (index is at the limit).
423 *
424 * @see UCharIteratorCurrent
425 * @stable ICU 2.1
426 */
427 UCharIteratorCurrent *current;
428
429 /**
430 * (public) Return the code unit at the current index and increment
431 * the index (post-increment, like s[i++]),
432 * or return U_SENTINEL if there is none (index is at the limit).
433 *
434 * @see UCharIteratorNext
435 * @stable ICU 2.1
436 */
437 UCharIteratorNext *next;
438
439 /**
440 * (public) Decrement the index and return the code unit from there
441 * (pre-decrement, like s[--i]),
442 * or return U_SENTINEL if there is none (index is at the start).
443 *
444 * @see UCharIteratorPrevious
445 * @stable ICU 2.1
446 */
447 UCharIteratorPrevious *previous;
448
449 /**
450 * (public) Reserved for future use. Currently NULL.
451 *
452 * @see UCharIteratorReserved
453 * @stable ICU 2.1
454 */
455 UCharIteratorReserved *reservedFn;
456
457 /**
458 * (public) Return the state of the iterator, to be restored later with setState().
459 * This function pointer is NULL if the iterator does not implement it.
460 *
461 * @see UCharIteratorGet
462 * @stable ICU 2.6
463 */
464 UCharIteratorGetState *getState;
465
466 /**
467 * (public) Restore the iterator state from the state word from a call
468 * to getState().
469 * This function pointer is NULL if the iterator does not implement it.
470 *
471 * @see UCharIteratorSet
472 * @stable ICU 2.6
473 */
474 UCharIteratorSetState *setState;
475};
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493#if U_SHOW_CPLUSPLUS_API
494
495
496
497
498
499#endif
500
501U_CDECL_END
502
503#endif