Fredrik Roubert | 0596fae | 2017-04-18 21:34:02 +0200 | [diff] [blame^] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Fredrik Roubert | 64339d3 | 2016-10-21 19:43:16 +0200 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
Jean-Baptiste Queru | ac04d0b | 2009-07-17 17:11:19 -0700 | [diff] [blame] | 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (c) 2000-2005, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | * Date Name Description |
| 9 | * 02/04/00 aliu Creation. |
| 10 | ********************************************************************** |
| 11 | */ |
| 12 | #ifndef SYMTABLE_H |
| 13 | #define SYMTABLE_H |
| 14 | |
| 15 | #include "unicode/utypes.h" |
| 16 | #include "unicode/uobject.h" |
| 17 | |
| 18 | /** |
| 19 | * \file |
| 20 | * \brief C++ API: An interface that defines both lookup protocol and parsing of |
| 21 | * symbolic names. |
| 22 | */ |
| 23 | |
| 24 | U_NAMESPACE_BEGIN |
| 25 | |
| 26 | class ParsePosition; |
| 27 | class UnicodeFunctor; |
| 28 | class UnicodeSet; |
| 29 | class UnicodeString; |
| 30 | |
| 31 | /** |
| 32 | * An interface that defines both lookup protocol and parsing of |
| 33 | * symbolic names. |
| 34 | * |
| 35 | * <p>A symbol table maintains two kinds of mappings. The first is |
| 36 | * between symbolic names and their values. For example, if the |
| 37 | * variable with the name "start" is set to the value "alpha" |
| 38 | * (perhaps, though not necessarily, through an expression such as |
| 39 | * "$start=alpha"), then the call lookup("start") will return the |
| 40 | * char[] array ['a', 'l', 'p', 'h', 'a']. |
| 41 | * |
| 42 | * <p>The second kind of mapping is between character values and |
| 43 | * UnicodeMatcher objects. This is used by RuleBasedTransliterator, |
| 44 | * which uses characters in the private use area to represent objects |
| 45 | * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z], |
| 46 | * then lookupMatcher(0xE015) will return the UnicodeSet [a-z]. |
| 47 | * |
| 48 | * <p>Finally, a symbol table defines parsing behavior for symbolic |
| 49 | * names. All symbolic names start with the SYMBOL_REF character. |
| 50 | * When a parser encounters this character, it calls parseReference() |
| 51 | * with the position immediately following the SYMBOL_REF. The symbol |
| 52 | * table parses the name, if there is one, and returns it. |
| 53 | * |
| 54 | * @stable ICU 2.8 |
| 55 | */ |
| 56 | class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ { |
| 57 | public: |
| 58 | |
| 59 | /** |
| 60 | * The character preceding a symbol reference name. |
| 61 | * @stable ICU 2.8 |
| 62 | */ |
| 63 | enum { SYMBOL_REF = 0x0024 /*$*/ }; |
| 64 | |
| 65 | /** |
| 66 | * Destructor. |
| 67 | * @stable ICU 2.8 |
| 68 | */ |
| 69 | virtual ~SymbolTable(); |
| 70 | |
| 71 | /** |
| 72 | * Lookup the characters associated with this string and return it. |
| 73 | * Return <tt>NULL</tt> if no such name exists. The resultant |
| 74 | * string may have length zero. |
| 75 | * @param s the symbolic name to lookup |
| 76 | * @return a string containing the name's value, or <tt>NULL</tt> if |
| 77 | * there is no mapping for s. |
| 78 | * @stable ICU 2.8 |
| 79 | */ |
| 80 | virtual const UnicodeString* lookup(const UnicodeString& s) const = 0; |
| 81 | |
| 82 | /** |
| 83 | * Lookup the UnicodeMatcher associated with the given character, and |
| 84 | * return it. Return <tt>NULL</tt> if not found. |
| 85 | * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive. |
| 86 | * @return the UnicodeMatcher object represented by the given |
| 87 | * character, or NULL if there is no mapping for ch. |
| 88 | * @stable ICU 2.8 |
| 89 | */ |
| 90 | virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0; |
| 91 | |
| 92 | /** |
| 93 | * Parse a symbol reference name from the given string, starting |
| 94 | * at the given position. If no valid symbol reference name is |
| 95 | * found, return the empty string and leave pos unchanged. That is, if the |
| 96 | * character at pos cannot start a name, or if pos is at or after |
| 97 | * text.length(), then return an empty string. This indicates an |
| 98 | * isolated SYMBOL_REF character. |
| 99 | * @param text the text to parse for the name |
| 100 | * @param pos on entry, the index of the first character to parse. |
| 101 | * This is the character following the SYMBOL_REF character. On |
| 102 | * exit, the index after the last parsed character. If the parse |
| 103 | * failed, pos is unchanged on exit. |
| 104 | * @param limit the index after the last character to be parsed. |
| 105 | * @return the parsed name, or an empty string if there is no |
| 106 | * valid symbolic name at the given position. |
| 107 | * @stable ICU 2.8 |
| 108 | */ |
| 109 | virtual UnicodeString parseReference(const UnicodeString& text, |
| 110 | ParsePosition& pos, int32_t limit) const = 0; |
| 111 | }; |
| 112 | U_NAMESPACE_END |
| 113 | |
| 114 | #endif |