blob: f284590264175bc4a7a79bb87873d89ad3c2d502 [file] [log] [blame]
Ben Murdoch8b112d22011-06-08 16:22:53 +01001// Copyright 2011 the V8 project authors. All rights reserved.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Steve Blocka7e24c12009-10-30 11:49:00 +00004
5#ifndef V8_DATEPARSER_H_
6#define V8_DATEPARSER_H_
7
Ben Murdochb8a8cc12014-11-26 15:28:44 +00008#include "src/allocation.h"
9#include "src/char-predicates-inl.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000010
11namespace v8 {
12namespace internal {
13
14class DateParser : public AllStatic {
15 public:
Steve Blocka7e24c12009-10-30 11:49:00 +000016 // Parse the string as a date. If parsing succeeds, return true after
17 // filling out the output array as follows (all integers are Smis):
18 // [0]: year
19 // [1]: month (0 = Jan, 1 = Feb, ...)
20 // [2]: day
21 // [3]: hour
22 // [4]: minute
23 // [5]: second
Steve Block6ded16b2010-05-10 14:33:55 +010024 // [6]: millisecond
25 // [7]: UTC offset in seconds, or null value if no timezone specified
Steve Blocka7e24c12009-10-30 11:49:00 +000026 // If parsing fails, return false (content of output array is not defined).
27 template <typename Char>
Ben Murdoch8b112d22011-06-08 16:22:53 +010028 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
Steve Blocka7e24c12009-10-30 11:49:00 +000029
30 enum {
Steve Block6ded16b2010-05-10 14:33:55 +010031 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
Steve Blocka7e24c12009-10-30 11:49:00 +000032 };
33
34 private:
35 // Range testing
36 static inline bool Between(int x, int lo, int hi) {
37 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
38 }
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000039
Steve Blocka7e24c12009-10-30 11:49:00 +000040 // Indicates a missing value.
41 static const int kNone = kMaxInt;
42
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000043 // Maximal number of digits used to build the value of a numeral.
44 // Remaining digits are ignored.
45 static const int kMaxSignificantDigits = 9;
46
Steve Blocka7e24c12009-10-30 11:49:00 +000047 // InputReader provides basic string parsing and character classification.
48 template <typename Char>
49 class InputReader BASE_EMBEDDED {
50 public:
Ben Murdoch8b112d22011-06-08 16:22:53 +010051 InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
Steve Blocka7e24c12009-10-30 11:49:00 +000052 : index_(0),
53 buffer_(s),
Ben Murdoch8b112d22011-06-08 16:22:53 +010054 unicode_cache_(unicode_cache) {
Steve Blocka7e24c12009-10-30 11:49:00 +000055 Next();
56 }
57
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000058 int position() { return index_; }
Steve Blocka7e24c12009-10-30 11:49:00 +000059
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000060 // Advance to the next character of the string.
61 void Next() {
62 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
63 index_++;
Steve Blocka7e24c12009-10-30 11:49:00 +000064 }
65
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000066 // Read a string of digits as an unsigned number. Cap value at
67 // kMaxSignificantDigits, but skip remaining digits if the numeral
68 // is longer.
69 int ReadUnsignedNumeral() {
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080070 int n = 0;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000071 int i = 0;
72 while (IsAsciiDigit()) {
73 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
74 i++;
75 Next();
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080076 }
77 return n;
78 }
79
Steve Blocka7e24c12009-10-30 11:49:00 +000080 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
81 // lower-case prefix, and pad any remainder of the buffer with zeroes.
82 // Return word length.
83 int ReadWord(uint32_t* prefix, int prefix_size) {
84 int len;
85 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
Iain Merrick9ac36c92010-09-13 15:29:50 +010086 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
Steve Blocka7e24c12009-10-30 11:49:00 +000087 }
88 for (int i = len; i < prefix_size; i++) prefix[i] = 0;
89 return len;
90 }
91
92 // The skip methods return whether they actually skipped something.
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080093 bool Skip(uint32_t c) {
94 if (ch_ == c) {
95 Next();
96 return true;
97 }
98 return false;
99 }
Steve Blocka7e24c12009-10-30 11:49:00 +0000100
101 bool SkipWhiteSpace() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000102 if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -0800103 Next();
104 return true;
105 }
106 return false;
Steve Blocka7e24c12009-10-30 11:49:00 +0000107 }
108
109 bool SkipParentheses() {
110 if (ch_ != '(') return false;
111 int balance = 0;
112 do {
113 if (ch_ == ')') --balance;
114 else if (ch_ == '(') ++balance;
115 Next();
116 } while (balance > 0 && ch_);
117 return true;
118 }
119
120 // Character testing/classification. Non-ASCII digits are not supported.
121 bool Is(uint32_t c) const { return ch_ == c; }
122 bool IsEnd() const { return ch_ == 0; }
123 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
124 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
125 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
126
127 // Return 1 for '+' and -1 for '-'.
128 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
129
Steve Blocka7e24c12009-10-30 11:49:00 +0000130 private:
Steve Blocka7e24c12009-10-30 11:49:00 +0000131 int index_;
132 Vector<Char> buffer_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000133 uint32_t ch_;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100134 UnicodeCache* unicode_cache_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000135 };
136
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000137 enum KeywordType {
138 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
139 };
140
141 struct DateToken {
142 public:
143 bool IsInvalid() { return tag_ == kInvalidTokenTag; }
144 bool IsUnknown() { return tag_ == kUnknownTokenTag; }
145 bool IsNumber() { return tag_ == kNumberTag; }
146 bool IsSymbol() { return tag_ == kSymbolTag; }
147 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
148 bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
149 bool IsKeyword() { return tag_ >= kKeywordTagStart; }
150
151 int length() { return length_; }
152
153 int number() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000154 DCHECK(IsNumber());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000155 return value_;
156 }
157 KeywordType keyword_type() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000158 DCHECK(IsKeyword());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000159 return static_cast<KeywordType>(tag_);
160 }
161 int keyword_value() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000162 DCHECK(IsKeyword());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000163 return value_;
164 }
165 char symbol() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000166 DCHECK(IsSymbol());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000167 return static_cast<char>(value_);
168 }
169 bool IsSymbol(char symbol) {
170 return IsSymbol() && this->symbol() == symbol;
171 }
172 bool IsKeywordType(KeywordType tag) {
173 return tag_ == tag;
174 }
175 bool IsFixedLengthNumber(int length) {
176 return IsNumber() && length_ == length;
177 }
178 bool IsAsciiSign() {
179 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
180 }
181 int ascii_sign() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000182 DCHECK(IsAsciiSign());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000183 return 44 - value_;
184 }
185 bool IsKeywordZ() {
186 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
187 }
188 bool IsUnknown(int character) {
189 return IsUnknown() && value_ == character;
190 }
191 // Factory functions.
192 static DateToken Keyword(KeywordType tag, int value, int length) {
193 return DateToken(tag, length, value);
194 }
195 static DateToken Number(int value, int length) {
196 return DateToken(kNumberTag, length, value);
197 }
198 static DateToken Symbol(char symbol) {
199 return DateToken(kSymbolTag, 1, symbol);
200 }
201 static DateToken EndOfInput() {
202 return DateToken(kEndOfInputTag, 0, -1);
203 }
204 static DateToken WhiteSpace(int length) {
205 return DateToken(kWhiteSpaceTag, length, -1);
206 }
207 static DateToken Unknown() {
208 return DateToken(kUnknownTokenTag, 1, -1);
209 }
210 static DateToken Invalid() {
211 return DateToken(kInvalidTokenTag, 0, -1);
212 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000213
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000214 private:
215 enum TagType {
216 kInvalidTokenTag = -6,
217 kUnknownTokenTag = -5,
218 kWhiteSpaceTag = -4,
219 kNumberTag = -3,
220 kSymbolTag = -2,
221 kEndOfInputTag = -1,
222 kKeywordTagStart = 0
223 };
224 DateToken(int tag, int length, int value)
225 : tag_(tag),
226 length_(length),
227 value_(value) { }
228
229 int tag_;
230 int length_; // Number of characters.
231 int value_;
232 };
233
234 template <typename Char>
235 class DateStringTokenizer {
236 public:
237 explicit DateStringTokenizer(InputReader<Char>* in)
238 : in_(in), next_(Scan()) { }
239 DateToken Next() {
240 DateToken result = next_;
241 next_ = Scan();
242 return result;
243 }
244
245 DateToken Peek() {
246 return next_;
247 }
248 bool SkipSymbol(char symbol) {
249 if (next_.IsSymbol(symbol)) {
250 next_ = Scan();
251 return true;
252 }
253 return false;
254 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000255
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000256 private:
257 DateToken Scan();
258
259 InputReader<Char>* in_;
260 DateToken next_;
261 };
262
263 static int ReadMilliseconds(DateToken number);
Steve Blocka7e24c12009-10-30 11:49:00 +0000264
265 // KeywordTable maps names of months, time zones, am/pm to numbers.
266 class KeywordTable : public AllStatic {
267 public:
268 // Look up a word in the keyword table and return an index.
269 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
270 // and 'len' is the word length.
271 static int Lookup(const uint32_t* pre, int len);
272 // Get the type of the keyword at index i.
273 static KeywordType GetType(int i) {
274 return static_cast<KeywordType>(array[i][kTypeOffset]);
275 }
276 // Get the value of the keyword at index i.
277 static int GetValue(int i) { return array[i][kValueOffset]; }
278
279 static const int kPrefixLength = 3;
280 static const int kTypeOffset = kPrefixLength;
281 static const int kValueOffset = kTypeOffset + 1;
282 static const int kEntrySize = kValueOffset + 1;
283 static const int8_t array[][kEntrySize];
284 };
285
286 class TimeZoneComposer BASE_EMBEDDED {
287 public:
288 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
289 void Set(int offset_in_hours) {
290 sign_ = offset_in_hours < 0 ? -1 : 1;
291 hour_ = offset_in_hours * sign_;
292 minute_ = 0;
293 }
294 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
295 void SetAbsoluteHour(int hour) { hour_ = hour; }
296 void SetAbsoluteMinute(int minute) { minute_ = minute; }
297 bool IsExpecting(int n) const {
298 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
299 }
300 bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
301 bool Write(FixedArray* output);
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000302 bool IsEmpty() { return hour_ == kNone; }
Steve Blocka7e24c12009-10-30 11:49:00 +0000303 private:
304 int sign_;
305 int hour_;
306 int minute_;
307 };
308
309 class TimeComposer BASE_EMBEDDED {
310 public:
311 TimeComposer() : index_(0), hour_offset_(kNone) {}
312 bool IsEmpty() const { return index_ == 0; }
313 bool IsExpecting(int n) const {
Steve Block6ded16b2010-05-10 14:33:55 +0100314 return (index_ == 1 && IsMinute(n)) ||
315 (index_ == 2 && IsSecond(n)) ||
316 (index_ == 3 && IsMillisecond(n));
Steve Blocka7e24c12009-10-30 11:49:00 +0000317 }
318 bool Add(int n) {
319 return index_ < kSize ? (comp_[index_++] = n, true) : false;
320 }
321 bool AddFinal(int n) {
322 if (!Add(n)) return false;
323 while (index_ < kSize) comp_[index_++] = 0;
324 return true;
325 }
326 void SetHourOffset(int n) { hour_offset_ = n; }
327 bool Write(FixedArray* output);
328
329 static bool IsMinute(int x) { return Between(x, 0, 59); }
Steve Blocka7e24c12009-10-30 11:49:00 +0000330 static bool IsHour(int x) { return Between(x, 0, 23); }
Steve Blocka7e24c12009-10-30 11:49:00 +0000331 static bool IsSecond(int x) { return Between(x, 0, 59); }
Ben Murdoch589d6972011-11-30 16:04:58 +0000332
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000333 private:
334 static bool IsHour12(int x) { return Between(x, 0, 12); }
Steve Block6ded16b2010-05-10 14:33:55 +0100335 static bool IsMillisecond(int x) { return Between(x, 0, 999); }
Steve Blocka7e24c12009-10-30 11:49:00 +0000336
Steve Block6ded16b2010-05-10 14:33:55 +0100337 static const int kSize = 4;
Steve Blocka7e24c12009-10-30 11:49:00 +0000338 int comp_[kSize];
339 int index_;
340 int hour_offset_;
341 };
342
343 class DayComposer BASE_EMBEDDED {
344 public:
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000345 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
Steve Blocka7e24c12009-10-30 11:49:00 +0000346 bool IsEmpty() const { return index_ == 0; }
347 bool Add(int n) {
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000348 if (index_ < kSize) {
349 comp_[index_] = n;
350 index_++;
351 return true;
352 }
353 return false;
Steve Blocka7e24c12009-10-30 11:49:00 +0000354 }
355 void SetNamedMonth(int n) { named_month_ = n; }
356 bool Write(FixedArray* output);
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000357 void set_iso_date() { is_iso_date_ = true; }
Steve Blocka7e24c12009-10-30 11:49:00 +0000358 static bool IsMonth(int x) { return Between(x, 1, 12); }
359 static bool IsDay(int x) { return Between(x, 1, 31); }
360
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000361 private:
Steve Blocka7e24c12009-10-30 11:49:00 +0000362 static const int kSize = 3;
363 int comp_[kSize];
364 int index_;
365 int named_month_;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000366 // If set, ensures that data is always parsed in year-month-date order.
367 bool is_iso_date_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000368 };
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000369
370 // Tries to parse an ES5 Date Time String. Returns the next token
371 // to continue with in the legacy date string parser. If parsing is
372 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
373 // returns DateToken::Invalid(). Otherwise parsing continues in the
374 // legacy parser.
375 template <typename Char>
376 static DateParser::DateToken ParseES5DateTime(
377 DateStringTokenizer<Char>* scanner,
378 DayComposer* day,
379 TimeComposer* time,
380 TimeZoneComposer* tz);
Steve Blocka7e24c12009-10-30 11:49:00 +0000381};
382
383
384} } // namespace v8::internal
385
386#endif // V8_DATEPARSER_H_