blob: 54f37fd7e2f55bcdb038464f028915d0bb348e21 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7#ifndef __NUMBER_PATTERNSTRING_H__
8#define __NUMBER_PATTERNSTRING_H__
9
10
11#include <cstdint>
12#include "unicode/unum.h"
13#include "unicode/unistr.h"
14#include "number_types.h"
15#include "number_decimalquantity.h"
16#include "number_decimfmtprops.h"
17#include "number_affixutils.h"
18
19U_NAMESPACE_BEGIN namespace number {
20namespace impl {
21
22// Forward declaration
23class PatternParser;
24
Victor Changd8aa9d52021-01-05 23:49:57 +000025// Note: the order of fields in this enum matters for parsing.
26enum PatternSignType {
27 /** Render using normal positive subpattern rules */
28 PATTERN_SIGN_TYPE_POS,
29 /** Render using rules to force the display of a plus sign */
30 PATTERN_SIGN_TYPE_POS_SIGN,
31 /** Render using negative subpattern rules */
32 PATTERN_SIGN_TYPE_NEG,
33 /** Count for looping over the possibilities */
34 PATTERN_SIGN_TYPE_COUNT
35};
36
Victor Chang73229502020-09-17 13:39:19 +010037// Exported as U_I18N_API because it is a public member field of exported ParsedSubpatternInfo
38struct U_I18N_API Endpoints {
39 int32_t start = 0;
40 int32_t end = 0;
41};
42
43// Exported as U_I18N_API because it is a public member field of exported ParsedPatternInfo
44struct U_I18N_API ParsedSubpatternInfo {
45 uint64_t groupingSizes = 0x0000ffffffff0000L;
46 int32_t integerLeadingHashSigns = 0;
47 int32_t integerTrailingHashSigns = 0;
48 int32_t integerNumerals = 0;
49 int32_t integerAtSigns = 0;
50 int32_t integerTotal = 0; // for convenience
51 int32_t fractionNumerals = 0;
52 int32_t fractionHashSigns = 0;
53 int32_t fractionTotal = 0; // for convenience
54 bool hasDecimal = false;
55 int32_t widthExceptAffixes = 0;
56 // Note: NullableValue causes issues here with std::move.
57 bool hasPadding = false;
58 UNumberFormatPadPosition paddingLocation = UNUM_PAD_BEFORE_PREFIX;
59 DecimalQuantity rounding;
60 bool exponentHasPlusSign = false;
61 int32_t exponentZeros = 0;
62 bool hasPercentSign = false;
63 bool hasPerMilleSign = false;
64 bool hasCurrencySign = false;
65 bool hasMinusSign = false;
66 bool hasPlusSign = false;
67
68 Endpoints prefixEndpoints;
69 Endpoints suffixEndpoints;
70 Endpoints paddingEndpoints;
71};
72
73// Exported as U_I18N_API because it is needed for the unit test PatternStringTest
74struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemory {
75 UnicodeString pattern;
76 ParsedSubpatternInfo positive;
77 ParsedSubpatternInfo negative;
78
79 ParsedPatternInfo()
80 : state(this->pattern), currentSubpattern(nullptr) {}
81
82 ~ParsedPatternInfo() U_OVERRIDE = default;
83
84 // Need to declare this explicitly because of the destructor
85 ParsedPatternInfo& operator=(ParsedPatternInfo&& src) U_NOEXCEPT = default;
86
87 static int32_t getLengthFromEndpoints(const Endpoints& endpoints);
88
89 char16_t charAt(int32_t flags, int32_t index) const U_OVERRIDE;
90
91 int32_t length(int32_t flags) const U_OVERRIDE;
92
93 UnicodeString getString(int32_t flags) const U_OVERRIDE;
94
95 bool positiveHasPlusSign() const U_OVERRIDE;
96
97 bool hasNegativeSubpattern() const U_OVERRIDE;
98
99 bool negativeHasMinusSign() const U_OVERRIDE;
100
101 bool hasCurrencySign() const U_OVERRIDE;
102
103 bool containsSymbolType(AffixPatternType type, UErrorCode& status) const U_OVERRIDE;
104
105 bool hasBody() const U_OVERRIDE;
106
107 private:
108 struct U_I18N_API ParserState {
109 const UnicodeString& pattern; // reference to the parent
110 int32_t offset = 0;
111
112 explicit ParserState(const UnicodeString& _pattern)
113 : pattern(_pattern) {}
114
115 ParserState& operator=(ParserState&& src) U_NOEXCEPT {
116 // Leave pattern reference alone; it will continue to point to the same place in memory,
117 // which gets overwritten by ParsedPatternInfo's implicit move assignment.
118 offset = src.offset;
119 return *this;
120 }
121
122 UChar32 peek();
123
124 UChar32 next();
125
126 // TODO: We don't currently do anything with the message string.
127 // This method is here as a shell for Java compatibility.
128 inline void toParseException(const char16_t* message) { (void) message; }
129 } state;
130
131 // NOTE: In Java, these are written as pure functions.
132 // In C++, they're written as methods.
133 // The behavior is the same.
134
135 // Mutable transient pointer:
136 ParsedSubpatternInfo* currentSubpattern;
137
138 // In Java, "negative == null" tells us whether or not we had a negative subpattern.
139 // In C++, we need to remember in another boolean.
140 bool fHasNegativeSubpattern = false;
141
142 const Endpoints& getEndpoints(int32_t flags) const;
143
144 /** Run the recursive descent parser. */
145 void consumePattern(const UnicodeString& patternString, UErrorCode& status);
146
147 void consumeSubpattern(UErrorCode& status);
148
149 void consumePadding(PadPosition paddingLocation, UErrorCode& status);
150
151 void consumeAffix(Endpoints& endpoints, UErrorCode& status);
152
153 void consumeLiteral(UErrorCode& status);
154
155 void consumeFormat(UErrorCode& status);
156
157 void consumeIntegerFormat(UErrorCode& status);
158
159 void consumeFractionFormat(UErrorCode& status);
160
161 void consumeExponent(UErrorCode& status);
162
163 friend class PatternParser;
164};
165
166enum IgnoreRounding {
167 IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2
168};
169
170class U_I18N_API PatternParser {
171 public:
172 /**
173 * Runs the recursive descent parser on the given pattern string, returning a data structure with raw information
174 * about the pattern string.
175 *
176 * <p>
177 * To obtain a more useful form of the data, consider using {@link #parseToProperties} instead.
178 *
179 * TODO: Change argument type to const char16_t* instead of UnicodeString?
180 *
181 * @param patternString
182 * The LDML decimal format pattern (Excel-style pattern) to parse.
183 * @return The results of the parse.
184 */
185 static void parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
186 UErrorCode& status);
187
188 /**
189 * Parses a pattern string into a new property bag.
190 *
191 * @param pattern
192 * The pattern string, like "#,##0.00"
193 * @param ignoreRounding
194 * Whether to leave out rounding information (minFrac, maxFrac, and rounding increment) when parsing the
195 * pattern. This may be desirable if a custom rounding mode, such as CurrencyUsage, is to be used
196 * instead.
197 * @return A property bag object.
198 * @throws IllegalArgumentException
199 * If there is a syntax error in the pattern string.
200 */
201 static DecimalFormatProperties parseToProperties(const UnicodeString& pattern,
202 IgnoreRounding ignoreRounding, UErrorCode& status);
203
204 static DecimalFormatProperties parseToProperties(const UnicodeString& pattern, UErrorCode& status);
205
206 /**
207 * Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string
208 * will be overwritten with either their default value or with the value coming from the pattern string. Properties
209 * that cannot be encoded into a pattern string, such as rounding mode, are not modified.
210 *
211 * @param pattern
212 * The pattern string, like "#,##0.00"
213 * @param properties
214 * The property bag object to overwrite.
215 * @param ignoreRounding
216 * See {@link #parseToProperties(String pattern, int ignoreRounding)}.
217 * @throws IllegalArgumentException
218 * If there was a syntax error in the pattern string.
219 */
220 static void parseToExistingProperties(const UnicodeString& pattern,
221 DecimalFormatProperties& properties,
222 IgnoreRounding ignoreRounding, UErrorCode& status);
223
224 private:
225 static void parseToExistingPropertiesImpl(const UnicodeString& pattern,
226 DecimalFormatProperties& properties,
227 IgnoreRounding ignoreRounding, UErrorCode& status);
228
229 /** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */
230 static void patternInfoToProperties(DecimalFormatProperties& properties,
231 ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding,
232 UErrorCode& status);
233};
234
235class U_I18N_API PatternStringUtils {
236 public:
237 /**
238 * Determine whether a given roundingIncrement should be ignored for formatting
239 * based on the current maxFrac value (maximum fraction digits). For example a
240 * roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac
241 * is 2 or more. Note that roundingIncrements are rounded up in significance, so
242 * a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e.
243 * it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of
244 * 0.005 is treated like 0.001 for significance).
245 *
246 * This test is needed for both NumberPropertyMapper::oldToNew and
247 * PatternStringUtils::propertiesToPatternString. In Java it cannot be
248 * exported by NumberPropertyMapper (package provate) so it is in
249 * PatternStringUtils, do the same in C.
250 *
251 * @param roundIncr
252 * The roundingIncrement to be checked. Must be non-zero.
253 * @param maxFrac
254 * The current maximum fraction digits value.
255 * @return true if roundIncr should be ignored for formatting.
256 */
257 static bool ignoreRoundingIncrement(double roundIncr, int32_t maxFrac);
258
259 /**
260 * Creates a pattern string from a property bag.
261 *
262 * <p>
263 * Since pattern strings support only a subset of the functionality available in a property bag, a new property bag
264 * created from the string returned by this function may not be the same as the original property bag.
265 *
266 * @param properties
267 * The property bag to serialize.
268 * @return A pattern string approximately serializing the property bag.
269 */
270 static UnicodeString propertiesToPatternString(const DecimalFormatProperties& properties,
271 UErrorCode& status);
272
273
274 /**
275 * Converts a pattern between standard notation and localized notation. Localized notation means that instead of
276 * using generic placeholders in the pattern, you use the corresponding locale-specific characters instead. For
277 * example, in locale <em>fr-FR</em>, the period in the pattern "0.000" means "decimal" in standard notation (as it
278 * does in every other locale), but it means "grouping" in localized notation.
279 *
280 * <p>
281 * A greedy string-substitution strategy is used to substitute locale symbols. If two symbols are ambiguous or have
282 * the same prefix, the result is not well-defined.
283 *
284 * <p>
285 * Locale symbols are not allowed to contain the ASCII quote character.
286 *
287 * <p>
288 * This method is provided for backwards compatibility and should not be used in any new code.
289 *
290 * TODO(C++): This method is not yet implemented.
291 *
292 * @param input
293 * The pattern to convert.
294 * @param symbols
295 * The symbols corresponding to the localized pattern.
296 * @param toLocalized
297 * true to convert from standard to localized notation; false to convert from localized to standard
298 * notation.
299 * @return The pattern expressed in the other notation.
300 */
301 static UnicodeString convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
302 bool toLocalized, UErrorCode& status);
303
304 /**
305 * This method contains the heart of the logic for rendering LDML affix strings. It handles
306 * sign-always-shown resolution, whether to use the positive or negative subpattern, permille
307 * substitution, and plural forms for CurrencyPluralInfo.
308 */
309 static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
Victor Changd8aa9d52021-01-05 23:49:57 +0000310 PatternSignType patternSignType,
Victor Chang73229502020-09-17 13:39:19 +0100311 StandardPlural::Form plural, bool perMilleReplacesPercent,
312 UnicodeString& output);
313
Victor Changd8aa9d52021-01-05 23:49:57 +0000314 static PatternSignType resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum);
315
Victor Chang73229502020-09-17 13:39:19 +0100316 private:
317 /** @return The number of chars inserted. */
318 static int escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
319 UErrorCode& status);
320};
321
322} // namespace impl
323} // namespace number
324U_NAMESPACE_END
325
326
327#endif //__NUMBER_PATTERNSTRING_H__
328
329#endif /* #if !UCONFIG_NO_FORMATTING */