blob: 39080615ea38110f14aa03f81027592540111742 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2010, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: propsvec.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002feb22
16* created by: Markus W. Scherer
17*
18* Store bits (Unicode character properties) in bit set vectors.
19*/
20
21#ifndef __UPROPSVEC_H__
22#define __UPROPSVEC_H__
23
24#include "unicode/utypes.h"
25#include "utrie.h"
26#include "utrie2.h"
27
28U_CDECL_BEGIN
29
30/**
31 * Unicode Properties Vectors associated with code point ranges.
32 *
33 * Rows of uint32_t integers in a contiguous array store
34 * the range limits and the properties vectors.
35 *
36 * Logically, each row has a certain number of uint32_t values,
37 * which is set via the upvec_open() "columns" parameter.
38 *
39 * Internally, two additional columns are stored.
40 * In each internal row,
41 * row[0] contains the start code point and
42 * row[1] contains the limit code point,
43 * which is the start of the next range.
44 *
45 * Initially, there is only one "normal" row for
46 * range [0..0x110000[ with values 0.
47 * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
48 *
49 * It would be possible to store only one range boundary per row,
50 * but self-contained rows allow to later sort them by contents.
51 */
52struct UPropsVectors;
53typedef struct UPropsVectors UPropsVectors;
54
55/*
56 * Special pseudo code points for storing the initialValue and the errorValue,
57 * which are used to initialize a UTrie2 or similar.
58 */
59#define UPVEC_FIRST_SPECIAL_CP 0x110000
60#define UPVEC_INITIAL_VALUE_CP 0x110000
61#define UPVEC_ERROR_VALUE_CP 0x110001
62#define UPVEC_MAX_CP 0x110001
63
64/*
65 * Special pseudo code point used in upvec_compact() signalling the end of
66 * delivering special values and the beginning of delivering real ones.
67 * Stable value, unlike UPVEC_MAX_CP which might grow over time.
68 */
69#define UPVEC_START_REAL_VALUES_CP 0x200000
70
71/*
72 * Open a UPropsVectors object.
73 * @param columns Number of value integers (uint32_t) per row.
74 */
75U_CAPI UPropsVectors * U_EXPORT2
76upvec_open(int32_t columns, UErrorCode *pErrorCode);
77
78U_CAPI void U_EXPORT2
79upvec_close(UPropsVectors *pv);
80
81/*
82 * In rows for code points [start..end], select the column,
83 * reset the mask bits and set the value bits (ANDed with the mask).
84 *
85 * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
86 */
87U_CAPI void U_EXPORT2
88upvec_setValue(UPropsVectors *pv,
89 UChar32 start, UChar32 end,
90 int32_t column,
91 uint32_t value, uint32_t mask,
92 UErrorCode *pErrorCode);
93
94/*
95 * Logically const but must not be used on the same pv concurrently!
96 * Always returns 0 if called after upvec_compact().
97 */
98U_CAPI uint32_t U_EXPORT2
99upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
100
101/*
102 * pRangeStart and pRangeEnd can be NULL.
103 * @return NULL if rowIndex out of range and for illegal arguments,
104 * or if called after upvec_compact()
105 */
106U_CAPI uint32_t * U_EXPORT2
107upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
108 UChar32 *pRangeStart, UChar32 *pRangeEnd);
109
110/*
111 * Compact the vectors:
112 * - modify the memory
113 * - keep only unique vectors
114 * - store them contiguously from the beginning of the memory
115 * - for each (non-unique) row, call the handler function
116 *
117 * The handler's rowIndex is the index of the row in the compacted
118 * memory block.
119 * (Therefore, it starts at 0 increases in increments of the columns value.)
120 *
121 * In a first phase, only special values are delivered (each exactly once),
122 * with start==end both equalling a special pseudo code point.
123 * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
124 * where rowIndex is the length of the compacted array,
125 * and the row is arbitrary (but not NULL).
126 * Then, in the second phase, the handler is called for each row of real values.
127 */
128typedef void U_CALLCONV
129UPVecCompactHandler(void *context,
130 UChar32 start, UChar32 end,
131 int32_t rowIndex, uint32_t *row, int32_t columns,
132 UErrorCode *pErrorCode);
133
134U_CAPI void U_EXPORT2
135upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
136
137/*
138 * Get the vectors array after calling upvec_compact().
139 * The caller must not modify nor release the returned array.
140 * Returns NULL if called before upvec_compact().
141 */
142U_CAPI const uint32_t * U_EXPORT2
143upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
144
145/*
146 * Get a clone of the vectors array after calling upvec_compact().
147 * The caller owns the returned array and must uprv_free() it.
148 * Returns NULL if called before upvec_compact().
149 */
150U_CAPI uint32_t * U_EXPORT2
151upvec_cloneArray(const UPropsVectors *pv,
152 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
153
154/*
155 * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
156 * vectors array, and freeze the trie.
157 */
158U_CAPI UTrie2 * U_EXPORT2
159upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
160
161struct UPVecToUTrie2Context {
162 UTrie2 *trie;
163 int32_t initialValue;
164 int32_t errorValue;
165 int32_t maxValue;
166};
167typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
168
169/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
170U_CAPI void U_CALLCONV
171upvec_compactToUTrie2Handler(void *context,
172 UChar32 start, UChar32 end,
173 int32_t rowIndex, uint32_t *row, int32_t columns,
174 UErrorCode *pErrorCode);
175
176U_CDECL_END
177
178#endif