blob: d7676cbe08a24620156772fba85ca8a1c3b43c7a [file] [log] [blame]
Ben Murdoch8b112d22011-06-08 16:22:53 +01001// Copyright 2011 the V8 project authors. All rights reserved.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Steve Blocka7e24c12009-10-30 11:49:00 +00004
5#ifndef V8_DATEPARSER_H_
6#define V8_DATEPARSER_H_
7
Ben Murdochb8a8cc12014-11-26 15:28:44 +00008#include "src/allocation.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00009#include "src/char-predicates.h"
10#include "src/parsing/scanner.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000011
12namespace v8 {
13namespace internal {
14
15class DateParser : public AllStatic {
16 public:
Steve Blocka7e24c12009-10-30 11:49:00 +000017 // Parse the string as a date. If parsing succeeds, return true after
18 // filling out the output array as follows (all integers are Smis):
19 // [0]: year
20 // [1]: month (0 = Jan, 1 = Feb, ...)
21 // [2]: day
22 // [3]: hour
23 // [4]: minute
24 // [5]: second
Steve Block6ded16b2010-05-10 14:33:55 +010025 // [6]: millisecond
26 // [7]: UTC offset in seconds, or null value if no timezone specified
Steve Blocka7e24c12009-10-30 11:49:00 +000027 // If parsing fails, return false (content of output array is not defined).
28 template <typename Char>
Ben Murdoch61f157c2016-09-16 13:49:30 +010029 static bool Parse(Isolate* isolate, Vector<Char> str, FixedArray* output);
Steve Blocka7e24c12009-10-30 11:49:00 +000030
31 enum {
Steve Block6ded16b2010-05-10 14:33:55 +010032 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
Steve Blocka7e24c12009-10-30 11:49:00 +000033 };
34
35 private:
36 // Range testing
37 static inline bool Between(int x, int lo, int hi) {
38 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
39 }
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000040
Steve Blocka7e24c12009-10-30 11:49:00 +000041 // Indicates a missing value.
42 static const int kNone = kMaxInt;
43
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000044 // Maximal number of digits used to build the value of a numeral.
45 // Remaining digits are ignored.
46 static const int kMaxSignificantDigits = 9;
47
Steve Blocka7e24c12009-10-30 11:49:00 +000048 // InputReader provides basic string parsing and character classification.
49 template <typename Char>
50 class InputReader BASE_EMBEDDED {
51 public:
Ben Murdoch8b112d22011-06-08 16:22:53 +010052 InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
Steve Blocka7e24c12009-10-30 11:49:00 +000053 : index_(0),
54 buffer_(s),
Ben Murdoch8b112d22011-06-08 16:22:53 +010055 unicode_cache_(unicode_cache) {
Steve Blocka7e24c12009-10-30 11:49:00 +000056 Next();
57 }
58
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000059 int position() { return index_; }
Steve Blocka7e24c12009-10-30 11:49:00 +000060
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000061 // Advance to the next character of the string.
62 void Next() {
63 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
64 index_++;
Steve Blocka7e24c12009-10-30 11:49:00 +000065 }
66
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000067 // Read a string of digits as an unsigned number. Cap value at
68 // kMaxSignificantDigits, but skip remaining digits if the numeral
69 // is longer.
70 int ReadUnsignedNumeral() {
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080071 int n = 0;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +000072 int i = 0;
73 while (IsAsciiDigit()) {
74 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
75 i++;
76 Next();
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080077 }
78 return n;
79 }
80
Steve Blocka7e24c12009-10-30 11:49:00 +000081 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
82 // lower-case prefix, and pad any remainder of the buffer with zeroes.
83 // Return word length.
84 int ReadWord(uint32_t* prefix, int prefix_size) {
85 int len;
86 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
Iain Merrick9ac36c92010-09-13 15:29:50 +010087 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
Steve Blocka7e24c12009-10-30 11:49:00 +000088 }
89 for (int i = len; i < prefix_size; i++) prefix[i] = 0;
90 return len;
91 }
92
93 // The skip methods return whether they actually skipped something.
Shimeng (Simon) Wang8a31eba2010-12-06 19:01:33 -080094 bool Skip(uint32_t c) {
95 if (ch_ == c) {
96 Next();
97 return true;
98 }
99 return false;
100 }
Steve Blocka7e24c12009-10-30 11:49:00 +0000101
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000102 inline bool SkipWhiteSpace();
103 inline bool SkipParentheses();
Steve Blocka7e24c12009-10-30 11:49:00 +0000104
105 // Character testing/classification. Non-ASCII digits are not supported.
106 bool Is(uint32_t c) const { return ch_ == c; }
107 bool IsEnd() const { return ch_ == 0; }
108 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
109 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
110 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
111
112 // Return 1 for '+' and -1 for '-'.
113 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
114
Steve Blocka7e24c12009-10-30 11:49:00 +0000115 private:
Steve Blocka7e24c12009-10-30 11:49:00 +0000116 int index_;
117 Vector<Char> buffer_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000118 uint32_t ch_;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100119 UnicodeCache* unicode_cache_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000120 };
121
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000122 enum KeywordType {
123 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
124 };
125
126 struct DateToken {
127 public:
128 bool IsInvalid() { return tag_ == kInvalidTokenTag; }
129 bool IsUnknown() { return tag_ == kUnknownTokenTag; }
130 bool IsNumber() { return tag_ == kNumberTag; }
131 bool IsSymbol() { return tag_ == kSymbolTag; }
132 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
133 bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
134 bool IsKeyword() { return tag_ >= kKeywordTagStart; }
135
136 int length() { return length_; }
137
138 int number() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000139 DCHECK(IsNumber());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000140 return value_;
141 }
142 KeywordType keyword_type() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000143 DCHECK(IsKeyword());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000144 return static_cast<KeywordType>(tag_);
145 }
146 int keyword_value() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000147 DCHECK(IsKeyword());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000148 return value_;
149 }
150 char symbol() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000151 DCHECK(IsSymbol());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000152 return static_cast<char>(value_);
153 }
154 bool IsSymbol(char symbol) {
155 return IsSymbol() && this->symbol() == symbol;
156 }
157 bool IsKeywordType(KeywordType tag) {
158 return tag_ == tag;
159 }
160 bool IsFixedLengthNumber(int length) {
161 return IsNumber() && length_ == length;
162 }
163 bool IsAsciiSign() {
164 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
165 }
166 int ascii_sign() {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000167 DCHECK(IsAsciiSign());
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000168 return 44 - value_;
169 }
170 bool IsKeywordZ() {
171 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
172 }
173 bool IsUnknown(int character) {
174 return IsUnknown() && value_ == character;
175 }
176 // Factory functions.
177 static DateToken Keyword(KeywordType tag, int value, int length) {
178 return DateToken(tag, length, value);
179 }
180 static DateToken Number(int value, int length) {
181 return DateToken(kNumberTag, length, value);
182 }
183 static DateToken Symbol(char symbol) {
184 return DateToken(kSymbolTag, 1, symbol);
185 }
186 static DateToken EndOfInput() {
187 return DateToken(kEndOfInputTag, 0, -1);
188 }
189 static DateToken WhiteSpace(int length) {
190 return DateToken(kWhiteSpaceTag, length, -1);
191 }
192 static DateToken Unknown() {
193 return DateToken(kUnknownTokenTag, 1, -1);
194 }
195 static DateToken Invalid() {
196 return DateToken(kInvalidTokenTag, 0, -1);
197 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000198
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000199 private:
200 enum TagType {
201 kInvalidTokenTag = -6,
202 kUnknownTokenTag = -5,
203 kWhiteSpaceTag = -4,
204 kNumberTag = -3,
205 kSymbolTag = -2,
206 kEndOfInputTag = -1,
207 kKeywordTagStart = 0
208 };
209 DateToken(int tag, int length, int value)
210 : tag_(tag),
211 length_(length),
212 value_(value) { }
213
214 int tag_;
215 int length_; // Number of characters.
216 int value_;
217 };
218
219 template <typename Char>
220 class DateStringTokenizer {
221 public:
222 explicit DateStringTokenizer(InputReader<Char>* in)
223 : in_(in), next_(Scan()) { }
224 DateToken Next() {
225 DateToken result = next_;
226 next_ = Scan();
227 return result;
228 }
229
230 DateToken Peek() {
231 return next_;
232 }
233 bool SkipSymbol(char symbol) {
234 if (next_.IsSymbol(symbol)) {
235 next_ = Scan();
236 return true;
237 }
238 return false;
239 }
Ben Murdoch589d6972011-11-30 16:04:58 +0000240
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000241 private:
242 DateToken Scan();
243
244 InputReader<Char>* in_;
245 DateToken next_;
246 };
247
248 static int ReadMilliseconds(DateToken number);
Steve Blocka7e24c12009-10-30 11:49:00 +0000249
250 // KeywordTable maps names of months, time zones, am/pm to numbers.
251 class KeywordTable : public AllStatic {
252 public:
253 // Look up a word in the keyword table and return an index.
254 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
255 // and 'len' is the word length.
256 static int Lookup(const uint32_t* pre, int len);
257 // Get the type of the keyword at index i.
258 static KeywordType GetType(int i) {
259 return static_cast<KeywordType>(array[i][kTypeOffset]);
260 }
261 // Get the value of the keyword at index i.
262 static int GetValue(int i) { return array[i][kValueOffset]; }
263
264 static const int kPrefixLength = 3;
265 static const int kTypeOffset = kPrefixLength;
266 static const int kValueOffset = kTypeOffset + 1;
267 static const int kEntrySize = kValueOffset + 1;
268 static const int8_t array[][kEntrySize];
269 };
270
271 class TimeZoneComposer BASE_EMBEDDED {
272 public:
273 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
274 void Set(int offset_in_hours) {
275 sign_ = offset_in_hours < 0 ? -1 : 1;
276 hour_ = offset_in_hours * sign_;
277 minute_ = 0;
278 }
279 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
280 void SetAbsoluteHour(int hour) { hour_ = hour; }
281 void SetAbsoluteMinute(int minute) { minute_ = minute; }
282 bool IsExpecting(int n) const {
283 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
284 }
285 bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
286 bool Write(FixedArray* output);
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000287 bool IsEmpty() { return hour_ == kNone; }
Steve Blocka7e24c12009-10-30 11:49:00 +0000288 private:
289 int sign_;
290 int hour_;
291 int minute_;
292 };
293
294 class TimeComposer BASE_EMBEDDED {
295 public:
296 TimeComposer() : index_(0), hour_offset_(kNone) {}
297 bool IsEmpty() const { return index_ == 0; }
298 bool IsExpecting(int n) const {
Steve Block6ded16b2010-05-10 14:33:55 +0100299 return (index_ == 1 && IsMinute(n)) ||
300 (index_ == 2 && IsSecond(n)) ||
301 (index_ == 3 && IsMillisecond(n));
Steve Blocka7e24c12009-10-30 11:49:00 +0000302 }
303 bool Add(int n) {
304 return index_ < kSize ? (comp_[index_++] = n, true) : false;
305 }
306 bool AddFinal(int n) {
307 if (!Add(n)) return false;
308 while (index_ < kSize) comp_[index_++] = 0;
309 return true;
310 }
311 void SetHourOffset(int n) { hour_offset_ = n; }
312 bool Write(FixedArray* output);
313
314 static bool IsMinute(int x) { return Between(x, 0, 59); }
Steve Blocka7e24c12009-10-30 11:49:00 +0000315 static bool IsHour(int x) { return Between(x, 0, 23); }
Steve Blocka7e24c12009-10-30 11:49:00 +0000316 static bool IsSecond(int x) { return Between(x, 0, 59); }
Ben Murdoch589d6972011-11-30 16:04:58 +0000317
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000318 private:
319 static bool IsHour12(int x) { return Between(x, 0, 12); }
Steve Block6ded16b2010-05-10 14:33:55 +0100320 static bool IsMillisecond(int x) { return Between(x, 0, 999); }
Steve Blocka7e24c12009-10-30 11:49:00 +0000321
Steve Block6ded16b2010-05-10 14:33:55 +0100322 static const int kSize = 4;
Steve Blocka7e24c12009-10-30 11:49:00 +0000323 int comp_[kSize];
324 int index_;
325 int hour_offset_;
326 };
327
328 class DayComposer BASE_EMBEDDED {
329 public:
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000330 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
Steve Blocka7e24c12009-10-30 11:49:00 +0000331 bool IsEmpty() const { return index_ == 0; }
332 bool Add(int n) {
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000333 if (index_ < kSize) {
334 comp_[index_] = n;
335 index_++;
336 return true;
337 }
338 return false;
Steve Blocka7e24c12009-10-30 11:49:00 +0000339 }
340 void SetNamedMonth(int n) { named_month_ = n; }
341 bool Write(FixedArray* output);
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000342 void set_iso_date() { is_iso_date_ = true; }
Steve Blocka7e24c12009-10-30 11:49:00 +0000343 static bool IsMonth(int x) { return Between(x, 1, 12); }
344 static bool IsDay(int x) { return Between(x, 1, 31); }
345
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000346 private:
Steve Blocka7e24c12009-10-30 11:49:00 +0000347 static const int kSize = 3;
348 int comp_[kSize];
349 int index_;
350 int named_month_;
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000351 // If set, ensures that data is always parsed in year-month-date order.
352 bool is_iso_date_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000353 };
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000354
355 // Tries to parse an ES5 Date Time String. Returns the next token
356 // to continue with in the legacy date string parser. If parsing is
357 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
358 // returns DateToken::Invalid(). Otherwise parsing continues in the
359 // legacy parser.
360 template <typename Char>
361 static DateParser::DateToken ParseES5DateTime(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000362 DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
Ben Murdoch3fb3ca82011-12-02 17:19:32 +0000363 TimeZoneComposer* tz);
Steve Blocka7e24c12009-10-30 11:49:00 +0000364};
365
366
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000367} // namespace internal
368} // namespace v8
Steve Blocka7e24c12009-10-30 11:49:00 +0000369
370#endif // V8_DATEPARSER_H_