| // © 2018 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_FORMATTING |
| #ifndef __NUMPARSE_AFFIXES_H__ |
| #define __NUMPARSE_AFFIXES_H__ |
| |
| #include "cmemory.h" |
| |
| #include "numparse_types.h" |
| #include "numparse_symbols.h" |
| #include "numparse_currency.h" |
| #include "number_affixutils.h" |
| #include "number_currencysymbols.h" |
| |
| U_NAMESPACE_BEGIN |
| namespace numparse { |
| namespace impl { |
| |
| // Forward-declaration of implementation classes for friending |
| class AffixPatternMatcherBuilder; |
| class AffixPatternMatcher; |
| |
| using ::icu::number::impl::AffixPatternProvider; |
| using ::icu::number::impl::TokenConsumer; |
| using ::icu::number::impl::CurrencySymbols; |
| |
| |
| class U_I18N_API CodePointMatcher : public NumberParseMatcher, public UMemory { |
| public: |
| CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state |
| |
| CodePointMatcher(UChar32 cp); |
| |
| bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; |
| |
| bool smokeTest(const StringSegment& segment) const override; |
| |
| UnicodeString toString() const override; |
| |
| private: |
| UChar32 fCp; |
| }; |
| |
| } // namespace impl |
| } // namespace numparse |
| |
| // Export a explicit template instantiations of MaybeStackArray, MemoryPool and CompactUnicodeString. |
| // When building DLLs for Windows this is required even though no direct access leaks out of the i18n library. |
| // (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.) |
| // Note: These need to be outside of the numparse::impl namespace, or Clang will generate a compile error. |
| #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN |
| template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 8>; |
| template class U_I18N_API MaybeStackArray<UChar, 4>; |
| template class U_I18N_API MemoryPool<numparse::impl::CodePointMatcher, 8>; |
| template class U_I18N_API numparse::impl::CompactUnicodeString<4>; |
| #endif |
| |
| namespace numparse { |
| namespace impl { |
| |
| struct AffixTokenMatcherSetupData { |
| const CurrencySymbols& currencySymbols; |
| const DecimalFormatSymbols& dfs; |
| IgnorablesMatcher& ignorables; |
| const Locale& locale; |
| parse_flags_t parseFlags; |
| }; |
| |
| |
| /** |
| * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. |
| * |
| * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a |
| * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The |
| * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from |
| * the warehouse. |
| * |
| * @author sffc |
| */ |
| // Exported as U_I18N_API for tests |
| class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { |
| public: |
| AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state |
| |
| AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData); |
| |
| NumberParseMatcher& minusSign(); |
| |
| NumberParseMatcher& plusSign(); |
| |
| NumberParseMatcher& percent(); |
| |
| NumberParseMatcher& permille(); |
| |
| NumberParseMatcher& currency(UErrorCode& status); |
| |
| IgnorablesMatcher& ignorables(); |
| |
| NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status); |
| |
| private: |
| // NOTE: The following field may be unsafe to access after construction is done! |
| const AffixTokenMatcherSetupData* fSetupData; |
| |
| // NOTE: These are default-constructed and should not be used until initialized. |
| MinusSignMatcher fMinusSign; |
| PlusSignMatcher fPlusSign; |
| PercentMatcher fPercent; |
| PermilleMatcher fPermille; |
| CombinedCurrencyMatcher fCurrency; |
| |
| // Use a child class for code point matchers, since it requires non-default operators. |
| MemoryPool<CodePointMatcher> fCodePoints; |
| |
| friend class AffixPatternMatcherBuilder; |
| friend class AffixPatternMatcher; |
| }; |
| |
| |
| class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection { |
| public: |
| AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, |
| IgnorablesMatcher* ignorables); |
| |
| void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; |
| |
| /** NOTE: You can build only once! */ |
| AffixPatternMatcher build(); |
| |
| private: |
| ArraySeriesMatcher::MatcherArray fMatchers; |
| int32_t fMatchersLen; |
| int32_t fLastTypeOrCp; |
| |
| const UnicodeString& fPattern; |
| AffixTokenMatcherWarehouse& fWarehouse; |
| IgnorablesMatcher* fIgnorables; |
| |
| void addMatcher(NumberParseMatcher& matcher) override; |
| }; |
| |
| |
| // Exported as U_I18N_API for tests |
| class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher { |
| public: |
| AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state |
| |
| static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, |
| AffixTokenMatcherWarehouse& warehouse, |
| parse_flags_t parseFlags, bool* success, |
| UErrorCode& status); |
| |
| UnicodeString getPattern() const; |
| |
| bool operator==(const AffixPatternMatcher& other) const; |
| |
| private: |
| CompactUnicodeString<4> fPattern; |
| |
| AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern); |
| |
| friend class AffixPatternMatcherBuilder; |
| }; |
| |
| |
| class AffixMatcher : public NumberParseMatcher, public UMemory { |
| public: |
| AffixMatcher() = default; // WARNING: Leaves the object in an unusable state |
| |
| AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags); |
| |
| bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; |
| |
| void postProcess(ParsedNumber& result) const override; |
| |
| bool smokeTest(const StringSegment& segment) const override; |
| |
| int8_t compareTo(const AffixMatcher& rhs) const; |
| |
| UnicodeString toString() const override; |
| |
| private: |
| AffixPatternMatcher* fPrefix; |
| AffixPatternMatcher* fSuffix; |
| result_flags_t fFlags; |
| }; |
| |
| |
| /** |
| * A C++-only class to retain ownership of the AffixMatchers needed for parsing. |
| */ |
| class AffixMatcherWarehouse { |
| public: |
| AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state |
| |
| AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse); |
| |
| void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output, |
| const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, |
| UErrorCode& status); |
| |
| private: |
| // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix |
| AffixMatcher fAffixMatchers[9]; |
| // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each |
| AffixPatternMatcher fAffixPatternMatchers[6]; |
| // Reference to the warehouse for tokens used by the AffixPatternMatchers |
| AffixTokenMatcherWarehouse* fTokenWarehouse; |
| |
| friend class AffixMatcher; |
| |
| static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables, |
| parse_flags_t parseFlags, UErrorCode& status); |
| }; |
| |
| |
| } // namespace impl |
| } // namespace numparse |
| U_NAMESPACE_END |
| |
| #endif //__NUMPARSE_AFFIXES_H__ |
| #endif /* #if !UCONFIG_NO_FORMATTING */ |