Victor Chang | 7322950 | 2020-09-17 13:39:19 +0100 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
| 6 | * others. All Rights Reserved. |
| 7 | ******************************************************************************* |
| 8 | * |
| 9 | * File DTPTNGEN.H |
| 10 | * |
| 11 | ******************************************************************************* |
| 12 | */ |
| 13 | |
| 14 | #ifndef __DTPTNGEN_IMPL_H__ |
| 15 | #define __DTPTNGEN_IMPL_H__ |
| 16 | |
| 17 | #include "unicode/udatpg.h" |
| 18 | |
| 19 | #include "unicode/strenum.h" |
| 20 | #include "unicode/unistr.h" |
| 21 | #include "uvector.h" |
| 22 | |
| 23 | // TODO(claireho): Split off Builder class. |
| 24 | // TODO(claireho): If splitting off Builder class: As subclass or independent? |
| 25 | |
| 26 | #define MAX_PATTERN_ENTRIES 52 |
| 27 | #define MAX_CLDR_FIELD_LEN 60 |
| 28 | #define MAX_DT_TOKEN 50 |
| 29 | #define MAX_RESOURCE_FIELD 12 |
| 30 | #define MAX_AVAILABLE_FORMATS 12 |
| 31 | #define NONE 0 |
| 32 | #define EXTRA_FIELD 0x10000 |
| 33 | #define MISSING_FIELD 0x1000 |
| 34 | #define MAX_STRING_ENUMERATION 200 |
| 35 | #define SINGLE_QUOTE ((UChar)0x0027) |
| 36 | #define FORWARDSLASH ((UChar)0x002F) |
| 37 | #define BACKSLASH ((UChar)0x005C) |
| 38 | #define SPACE ((UChar)0x0020) |
| 39 | #define QUOTATION_MARK ((UChar)0x0022) |
| 40 | #define ASTERISK ((UChar)0x002A) |
| 41 | #define PLUSSITN ((UChar)0x002B) |
| 42 | #define COMMA ((UChar)0x002C) |
| 43 | #define HYPHEN ((UChar)0x002D) |
| 44 | #define DOT ((UChar)0x002E) |
| 45 | #define COLON ((UChar)0x003A) |
| 46 | #define CAP_A ((UChar)0x0041) |
| 47 | #define CAP_B ((UChar)0x0042) |
| 48 | #define CAP_C ((UChar)0x0043) |
| 49 | #define CAP_D ((UChar)0x0044) |
| 50 | #define CAP_E ((UChar)0x0045) |
| 51 | #define CAP_F ((UChar)0x0046) |
| 52 | #define CAP_G ((UChar)0x0047) |
| 53 | #define CAP_H ((UChar)0x0048) |
| 54 | #define CAP_J ((UChar)0x004A) |
| 55 | #define CAP_K ((UChar)0x004B) |
| 56 | #define CAP_L ((UChar)0x004C) |
| 57 | #define CAP_M ((UChar)0x004D) |
| 58 | #define CAP_O ((UChar)0x004F) |
| 59 | #define CAP_Q ((UChar)0x0051) |
| 60 | #define CAP_S ((UChar)0x0053) |
| 61 | #define CAP_T ((UChar)0x0054) |
| 62 | #define CAP_U ((UChar)0x0055) |
| 63 | #define CAP_V ((UChar)0x0056) |
| 64 | #define CAP_W ((UChar)0x0057) |
| 65 | #define CAP_X ((UChar)0x0058) |
| 66 | #define CAP_Y ((UChar)0x0059) |
| 67 | #define CAP_Z ((UChar)0x005A) |
| 68 | #define LOWLINE ((UChar)0x005F) |
| 69 | #define LOW_A ((UChar)0x0061) |
| 70 | #define LOW_B ((UChar)0x0062) |
| 71 | #define LOW_C ((UChar)0x0063) |
| 72 | #define LOW_D ((UChar)0x0064) |
| 73 | #define LOW_E ((UChar)0x0065) |
| 74 | #define LOW_F ((UChar)0x0066) |
| 75 | #define LOW_G ((UChar)0x0067) |
| 76 | #define LOW_H ((UChar)0x0068) |
| 77 | #define LOW_I ((UChar)0x0069) |
| 78 | #define LOW_J ((UChar)0x006A) |
| 79 | #define LOW_K ((UChar)0x006B) |
| 80 | #define LOW_L ((UChar)0x006C) |
| 81 | #define LOW_M ((UChar)0x006D) |
| 82 | #define LOW_N ((UChar)0x006E) |
| 83 | #define LOW_O ((UChar)0x006F) |
| 84 | #define LOW_P ((UChar)0x0070) |
| 85 | #define LOW_Q ((UChar)0x0071) |
| 86 | #define LOW_R ((UChar)0x0072) |
| 87 | #define LOW_S ((UChar)0x0073) |
| 88 | #define LOW_T ((UChar)0x0074) |
| 89 | #define LOW_U ((UChar)0x0075) |
| 90 | #define LOW_V ((UChar)0x0076) |
| 91 | #define LOW_W ((UChar)0x0077) |
| 92 | #define LOW_X ((UChar)0x0078) |
| 93 | #define LOW_Y ((UChar)0x0079) |
| 94 | #define LOW_Z ((UChar)0x007A) |
| 95 | #define DT_NARROW -0x101 |
| 96 | #define DT_SHORTER -0x102 |
| 97 | #define DT_SHORT -0x103 |
| 98 | #define DT_LONG -0x104 |
| 99 | #define DT_NUMERIC 0x100 |
| 100 | #define DT_DELTA 0x10 |
| 101 | |
| 102 | U_NAMESPACE_BEGIN |
| 103 | |
| 104 | const int32_t UDATPG_FRACTIONAL_MASK = 1<<UDATPG_FRACTIONAL_SECOND_FIELD; |
| 105 | const int32_t UDATPG_SECOND_AND_FRACTIONAL_MASK = (1<<UDATPG_SECOND_FIELD) | (1<<UDATPG_FRACTIONAL_SECOND_FIELD); |
| 106 | |
| 107 | typedef enum dtStrEnum { |
| 108 | DT_BASESKELETON, |
| 109 | DT_SKELETON, |
| 110 | DT_PATTERN |
| 111 | }dtStrEnum; |
| 112 | |
| 113 | typedef struct dtTypeElem { |
| 114 | UChar patternChar; |
| 115 | UDateTimePatternField field; |
| 116 | int16_t type; |
| 117 | int16_t minLen; |
| 118 | int16_t weight; |
| 119 | } dtTypeElem; |
| 120 | |
| 121 | // A compact storage mechanism for skeleton field strings. Several dozen of these will be created |
| 122 | // for a typical DateTimePatternGenerator instance. |
| 123 | class SkeletonFields : public UMemory { |
| 124 | public: |
| 125 | SkeletonFields(); |
| 126 | void clear(); |
| 127 | void copyFrom(const SkeletonFields& other); |
| 128 | void clearField(int32_t field); |
| 129 | UChar getFieldChar(int32_t field) const; |
| 130 | int32_t getFieldLength(int32_t field) const; |
| 131 | void populate(int32_t field, const UnicodeString& value); |
| 132 | void populate(int32_t field, UChar repeatChar, int32_t repeatCount); |
| 133 | UBool isFieldEmpty(int32_t field) const; |
| 134 | UnicodeString& appendTo(UnicodeString& string) const; |
| 135 | UnicodeString& appendFieldTo(int32_t field, UnicodeString& string) const; |
| 136 | UChar getFirstChar() const; |
| 137 | inline UBool operator==(const SkeletonFields& other) const; |
| 138 | inline UBool operator!=(const SkeletonFields& other) const; |
| 139 | |
| 140 | private: |
| 141 | int8_t chars[UDATPG_FIELD_COUNT]; |
| 142 | int8_t lengths[UDATPG_FIELD_COUNT]; |
| 143 | }; |
| 144 | |
| 145 | inline UBool SkeletonFields::operator==(const SkeletonFields& other) const { |
| 146 | return (uprv_memcmp(chars, other.chars, sizeof(chars)) == 0 |
| 147 | && uprv_memcmp(lengths, other.lengths, sizeof(lengths)) == 0); |
| 148 | } |
| 149 | |
| 150 | inline UBool SkeletonFields::operator!=(const SkeletonFields& other) const { |
| 151 | return (! operator==(other)); |
| 152 | } |
| 153 | |
| 154 | class PtnSkeleton : public UMemory { |
| 155 | public: |
| 156 | int32_t type[UDATPG_FIELD_COUNT]; |
| 157 | SkeletonFields original; |
| 158 | SkeletonFields baseOriginal; |
| 159 | UBool addedDefaultDayPeriod; |
| 160 | |
| 161 | PtnSkeleton(); |
| 162 | PtnSkeleton(const PtnSkeleton& other); |
| 163 | void copyFrom(const PtnSkeleton& other); |
| 164 | void clear(); |
| 165 | UBool equals(const PtnSkeleton& other) const; |
| 166 | UnicodeString getSkeleton() const; |
| 167 | UnicodeString getBaseSkeleton() const; |
| 168 | UChar getFirstChar() const; |
| 169 | |
| 170 | // TODO: Why is this virtual, as well as the other destructors in this file? We don't want |
| 171 | // vtables when we don't use class objects polymorphically. |
| 172 | virtual ~PtnSkeleton(); |
| 173 | }; |
| 174 | |
| 175 | class PtnElem : public UMemory { |
| 176 | public: |
| 177 | UnicodeString basePattern; |
| 178 | LocalPointer<PtnSkeleton> skeleton; |
| 179 | UnicodeString pattern; |
| 180 | UBool skeletonWasSpecified; // if specified in availableFormats, not derived |
| 181 | LocalPointer<PtnElem> next; |
| 182 | |
| 183 | PtnElem(const UnicodeString &basePattern, const UnicodeString &pattern); |
| 184 | virtual ~PtnElem(); |
| 185 | }; |
| 186 | |
| 187 | class FormatParser : public UMemory { |
| 188 | public: |
| 189 | UnicodeString items[MAX_DT_TOKEN]; |
| 190 | int32_t itemNumber; |
| 191 | |
| 192 | FormatParser(); |
| 193 | virtual ~FormatParser(); |
| 194 | void set(const UnicodeString& patternString); |
| 195 | void getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex); |
| 196 | UBool isPatternSeparator(const UnicodeString& field) const; |
| 197 | static UBool isQuoteLiteral(const UnicodeString& s); |
Victor Chang | ce4bf3c | 2021-01-19 16:34:24 +0000 | [diff] [blame] | 198 | static int32_t getCanonicalIndex(const UnicodeString& s) { return getCanonicalIndex(s, true); } |
Victor Chang | 7322950 | 2020-09-17 13:39:19 +0100 | [diff] [blame] | 199 | static int32_t getCanonicalIndex(const UnicodeString& s, UBool strict); |
| 200 | |
| 201 | private: |
| 202 | typedef enum TokenStatus { |
| 203 | START, |
| 204 | ADD_TOKEN, |
| 205 | SYNTAX_ERROR, |
| 206 | DONE |
| 207 | } TokenStatus; |
| 208 | |
| 209 | TokenStatus status; |
| 210 | virtual TokenStatus setTokens(const UnicodeString& pattern, int32_t startPos, int32_t *len); |
| 211 | }; |
| 212 | |
| 213 | class DistanceInfo : public UMemory { |
| 214 | public: |
| 215 | int32_t missingFieldMask; |
| 216 | int32_t extraFieldMask; |
| 217 | |
| 218 | DistanceInfo() {} |
| 219 | virtual ~DistanceInfo(); |
| 220 | void clear() { missingFieldMask = extraFieldMask = 0; } |
| 221 | void setTo(const DistanceInfo& other); |
| 222 | void addMissing(int32_t field) { missingFieldMask |= (1<<field); } |
| 223 | void addExtra(int32_t field) { extraFieldMask |= (1<<field); } |
| 224 | }; |
| 225 | |
| 226 | class DateTimeMatcher: public UMemory { |
| 227 | public: |
| 228 | PtnSkeleton skeleton; |
| 229 | |
| 230 | void getBasePattern(UnicodeString& basePattern); |
| 231 | UnicodeString getPattern(); |
| 232 | void set(const UnicodeString& pattern, FormatParser* fp); |
| 233 | void set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton& skeleton); |
| 234 | void copyFrom(const PtnSkeleton& skeleton); |
| 235 | void copyFrom(); |
| 236 | PtnSkeleton* getSkeletonPtr(); |
| 237 | UBool equals(const DateTimeMatcher* other) const; |
| 238 | int32_t getDistance(const DateTimeMatcher& other, int32_t includeMask, DistanceInfo& distanceInfo) const; |
| 239 | DateTimeMatcher(); |
| 240 | DateTimeMatcher(const DateTimeMatcher& other); |
Victor Chang | d8aa9d5 | 2021-01-05 23:49:57 +0000 | [diff] [blame] | 241 | DateTimeMatcher& operator=(const DateTimeMatcher& other); |
Victor Chang | 7322950 | 2020-09-17 13:39:19 +0100 | [diff] [blame] | 242 | virtual ~DateTimeMatcher(); |
| 243 | int32_t getFieldMask() const; |
| 244 | }; |
| 245 | |
| 246 | class PatternMap : public UMemory { |
| 247 | public: |
| 248 | PtnElem *boot[MAX_PATTERN_ENTRIES]; |
| 249 | PatternMap(); |
| 250 | virtual ~PatternMap(); |
| 251 | void add(const UnicodeString& basePattern, const PtnSkeleton& skeleton, const UnicodeString& value, UBool skeletonWasSpecified, UErrorCode& status); |
| 252 | const UnicodeString* getPatternFromBasePattern(const UnicodeString& basePattern, UBool& skeletonWasSpecified) const; |
| 253 | const UnicodeString* getPatternFromSkeleton(const PtnSkeleton& skeleton, const PtnSkeleton** specifiedSkeletonPtr = 0) const; |
| 254 | void copyFrom(const PatternMap& other, UErrorCode& status); |
| 255 | PtnElem* getHeader(UChar baseChar) const; |
| 256 | UBool equals(const PatternMap& other) const; |
| 257 | private: |
| 258 | UBool isDupAllowed; |
| 259 | PtnElem* getDuplicateElem(const UnicodeString& basePattern, const PtnSkeleton& skeleton, PtnElem *baseElem); |
| 260 | }; // end PatternMap |
| 261 | |
| 262 | class PatternMapIterator : public UMemory { |
| 263 | public: |
| 264 | PatternMapIterator(UErrorCode &status); |
| 265 | virtual ~PatternMapIterator(); |
| 266 | void set(PatternMap& patternMap); |
| 267 | PtnSkeleton* getSkeleton() const; |
| 268 | UBool hasNext() const; |
| 269 | DateTimeMatcher& next(); |
| 270 | private: |
| 271 | int32_t bootIndex; |
| 272 | PtnElem *nodePtr; |
| 273 | LocalPointer<DateTimeMatcher> matcher; |
| 274 | PatternMap *patternMap; |
| 275 | }; |
| 276 | |
| 277 | class DTSkeletonEnumeration : public StringEnumeration { |
| 278 | public: |
| 279 | DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status); |
| 280 | virtual ~DTSkeletonEnumeration(); |
| 281 | static UClassID U_EXPORT2 getStaticClassID(void); |
| 282 | virtual UClassID getDynamicClassID(void) const; |
| 283 | virtual const UnicodeString* snext(UErrorCode& status); |
| 284 | virtual void reset(UErrorCode& status); |
| 285 | virtual int32_t count(UErrorCode& status) const; |
| 286 | private: |
| 287 | int32_t pos; |
| 288 | UBool isCanonicalItem(const UnicodeString& item); |
| 289 | LocalPointer<UVector> fSkeletons; |
| 290 | }; |
| 291 | |
| 292 | class DTRedundantEnumeration : public StringEnumeration { |
| 293 | public: |
| 294 | DTRedundantEnumeration(); |
| 295 | virtual ~DTRedundantEnumeration(); |
| 296 | static UClassID U_EXPORT2 getStaticClassID(void); |
| 297 | virtual UClassID getDynamicClassID(void) const; |
| 298 | virtual const UnicodeString* snext(UErrorCode& status); |
| 299 | virtual void reset(UErrorCode& status); |
| 300 | virtual int32_t count(UErrorCode& status) const; |
| 301 | void add(const UnicodeString &pattern, UErrorCode& status); |
| 302 | private: |
| 303 | int32_t pos; |
| 304 | UBool isCanonicalItem(const UnicodeString& item) const; |
| 305 | LocalPointer<UVector> fPatterns; |
| 306 | }; |
| 307 | |
| 308 | U_NAMESPACE_END |
| 309 | |
| 310 | #endif |