Victor Chang | 7322950 | 2020-09-17 13:39:19 +0100 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * |
| 6 | * Copyright (C) 2002-2010, International Business Machines |
| 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************* |
| 10 | * file name: propsvec.h |
| 11 | * encoding: UTF-8 |
| 12 | * tab size: 8 (not used) |
| 13 | * indentation:4 |
| 14 | * |
| 15 | * created on: 2002feb22 |
| 16 | * created by: Markus W. Scherer |
| 17 | * |
| 18 | * Store bits (Unicode character properties) in bit set vectors. |
| 19 | */ |
| 20 | |
| 21 | #ifndef __UPROPSVEC_H__ |
| 22 | #define __UPROPSVEC_H__ |
| 23 | |
| 24 | #include "unicode/utypes.h" |
| 25 | #include "utrie.h" |
| 26 | #include "utrie2.h" |
| 27 | |
| 28 | U_CDECL_BEGIN |
| 29 | |
| 30 | /** |
| 31 | * Unicode Properties Vectors associated with code point ranges. |
| 32 | * |
| 33 | * Rows of uint32_t integers in a contiguous array store |
| 34 | * the range limits and the properties vectors. |
| 35 | * |
| 36 | * Logically, each row has a certain number of uint32_t values, |
| 37 | * which is set via the upvec_open() "columns" parameter. |
| 38 | * |
| 39 | * Internally, two additional columns are stored. |
| 40 | * In each internal row, |
| 41 | * row[0] contains the start code point and |
| 42 | * row[1] contains the limit code point, |
| 43 | * which is the start of the next range. |
| 44 | * |
| 45 | * Initially, there is only one "normal" row for |
| 46 | * range [0..0x110000[ with values 0. |
| 47 | * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. |
| 48 | * |
| 49 | * It would be possible to store only one range boundary per row, |
| 50 | * but self-contained rows allow to later sort them by contents. |
| 51 | */ |
| 52 | struct UPropsVectors; |
| 53 | typedef struct UPropsVectors UPropsVectors; |
| 54 | |
| 55 | /* |
| 56 | * Special pseudo code points for storing the initialValue and the errorValue, |
| 57 | * which are used to initialize a UTrie2 or similar. |
| 58 | */ |
| 59 | #define UPVEC_FIRST_SPECIAL_CP 0x110000 |
| 60 | #define UPVEC_INITIAL_VALUE_CP 0x110000 |
| 61 | #define UPVEC_ERROR_VALUE_CP 0x110001 |
| 62 | #define UPVEC_MAX_CP 0x110001 |
| 63 | |
| 64 | /* |
| 65 | * Special pseudo code point used in upvec_compact() signalling the end of |
| 66 | * delivering special values and the beginning of delivering real ones. |
| 67 | * Stable value, unlike UPVEC_MAX_CP which might grow over time. |
| 68 | */ |
| 69 | #define UPVEC_START_REAL_VALUES_CP 0x200000 |
| 70 | |
| 71 | /* |
| 72 | * Open a UPropsVectors object. |
| 73 | * @param columns Number of value integers (uint32_t) per row. |
| 74 | */ |
| 75 | U_CAPI UPropsVectors * U_EXPORT2 |
| 76 | upvec_open(int32_t columns, UErrorCode *pErrorCode); |
| 77 | |
| 78 | U_CAPI void U_EXPORT2 |
| 79 | upvec_close(UPropsVectors *pv); |
| 80 | |
| 81 | /* |
| 82 | * In rows for code points [start..end], select the column, |
| 83 | * reset the mask bits and set the value bits (ANDed with the mask). |
| 84 | * |
| 85 | * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). |
| 86 | */ |
| 87 | U_CAPI void U_EXPORT2 |
| 88 | upvec_setValue(UPropsVectors *pv, |
| 89 | UChar32 start, UChar32 end, |
| 90 | int32_t column, |
| 91 | uint32_t value, uint32_t mask, |
| 92 | UErrorCode *pErrorCode); |
| 93 | |
| 94 | /* |
| 95 | * Logically const but must not be used on the same pv concurrently! |
| 96 | * Always returns 0 if called after upvec_compact(). |
| 97 | */ |
| 98 | U_CAPI uint32_t U_EXPORT2 |
| 99 | upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); |
| 100 | |
| 101 | /* |
| 102 | * pRangeStart and pRangeEnd can be NULL. |
| 103 | * @return NULL if rowIndex out of range and for illegal arguments, |
| 104 | * or if called after upvec_compact() |
| 105 | */ |
| 106 | U_CAPI uint32_t * U_EXPORT2 |
| 107 | upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, |
| 108 | UChar32 *pRangeStart, UChar32 *pRangeEnd); |
| 109 | |
| 110 | /* |
| 111 | * Compact the vectors: |
| 112 | * - modify the memory |
| 113 | * - keep only unique vectors |
| 114 | * - store them contiguously from the beginning of the memory |
| 115 | * - for each (non-unique) row, call the handler function |
| 116 | * |
| 117 | * The handler's rowIndex is the index of the row in the compacted |
| 118 | * memory block. |
| 119 | * (Therefore, it starts at 0 increases in increments of the columns value.) |
| 120 | * |
| 121 | * In a first phase, only special values are delivered (each exactly once), |
| 122 | * with start==end both equalling a special pseudo code point. |
| 123 | * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP |
| 124 | * where rowIndex is the length of the compacted array, |
| 125 | * and the row is arbitrary (but not NULL). |
| 126 | * Then, in the second phase, the handler is called for each row of real values. |
| 127 | */ |
| 128 | typedef void U_CALLCONV |
| 129 | UPVecCompactHandler(void *context, |
| 130 | UChar32 start, UChar32 end, |
| 131 | int32_t rowIndex, uint32_t *row, int32_t columns, |
| 132 | UErrorCode *pErrorCode); |
| 133 | |
| 134 | U_CAPI void U_EXPORT2 |
| 135 | upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); |
| 136 | |
| 137 | /* |
| 138 | * Get the vectors array after calling upvec_compact(). |
| 139 | * The caller must not modify nor release the returned array. |
| 140 | * Returns NULL if called before upvec_compact(). |
| 141 | */ |
| 142 | U_CAPI const uint32_t * U_EXPORT2 |
| 143 | upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); |
| 144 | |
| 145 | /* |
| 146 | * Get a clone of the vectors array after calling upvec_compact(). |
| 147 | * The caller owns the returned array and must uprv_free() it. |
| 148 | * Returns NULL if called before upvec_compact(). |
| 149 | */ |
| 150 | U_CAPI uint32_t * U_EXPORT2 |
| 151 | upvec_cloneArray(const UPropsVectors *pv, |
| 152 | int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); |
| 153 | |
| 154 | /* |
| 155 | * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted |
| 156 | * vectors array, and freeze the trie. |
| 157 | */ |
| 158 | U_CAPI UTrie2 * U_EXPORT2 |
| 159 | upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); |
| 160 | |
| 161 | struct UPVecToUTrie2Context { |
| 162 | UTrie2 *trie; |
| 163 | int32_t initialValue; |
| 164 | int32_t errorValue; |
| 165 | int32_t maxValue; |
| 166 | }; |
| 167 | typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; |
| 168 | |
| 169 | /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ |
| 170 | U_CAPI void U_CALLCONV |
| 171 | upvec_compactToUTrie2Handler(void *context, |
| 172 | UChar32 start, UChar32 end, |
| 173 | int32_t rowIndex, uint32_t *row, int32_t columns, |
| 174 | UErrorCode *pErrorCode); |
| 175 | |
| 176 | U_CDECL_END |
| 177 | |
| 178 | #endif |