blob: 4bd320e901d585c4df3c7763f600dc01505d38cb [file] [log] [blame]
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +00001// Copyright 2011 the V8 project authors. All rights reserved.
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_DATEPARSER_H_
29#define V8_DATEPARSER_H_
30
lrn@chromium.org1c092762011-05-09 09:42:16 +000031#include "allocation.h"
erik.corry@gmail.com4a6c3272010-11-18 12:04:40 +000032#include "char-predicates-inl.h"
33#include "scanner-base.h"
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000037
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000038class DateParser : public AllStatic {
39 public:
40
41 // Parse the string as a date. If parsing succeeds, return true after
42 // filling out the output array as follows (all integers are Smis):
43 // [0]: year
44 // [1]: month (0 = Jan, 1 = Feb, ...)
45 // [2]: day
46 // [3]: hour
47 // [4]: minute
48 // [5]: second
kmillikin@chromium.org4111b802010-05-03 10:34:42 +000049 // [6]: millisecond
50 // [7]: UTC offset in seconds, or null value if no timezone specified
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000051 // If parsing fails, return false (content of output array is not defined).
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000052 template <typename Char>
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +000053 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000054
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000055 enum {
kmillikin@chromium.org4111b802010-05-03 10:34:42 +000056 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000057 };
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000058
59 private:
60 // Range testing
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000061 static inline bool Between(int x, int lo, int hi) {
62 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
63 }
ricow@chromium.org4f693d62011-07-04 14:01:31 +000064
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000065 // Indicates a missing value.
66 static const int kNone = kMaxInt;
67
ricow@chromium.org4f693d62011-07-04 14:01:31 +000068 // Maximal number of digits used to build the value of a numeral.
69 // Remaining digits are ignored.
70 static const int kMaxSignificantDigits = 9;
71
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000072 // InputReader provides basic string parsing and character classification.
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000073 template <typename Char>
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000074 class InputReader BASE_EMBEDDED {
75 public:
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +000076 InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000077 : index_(0),
78 buffer_(s),
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +000079 unicode_cache_(unicode_cache) {
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000080 Next();
81 }
82
ricow@chromium.org4f693d62011-07-04 14:01:31 +000083 int position() { return index_; }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000084
ricow@chromium.org4f693d62011-07-04 14:01:31 +000085 // Advance to the next character of the string.
86 void Next() {
87 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
88 index_++;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000089 }
90
ricow@chromium.org4f693d62011-07-04 14:01:31 +000091 // Read a string of digits as an unsigned number. Cap value at
92 // kMaxSignificantDigits, but skip remaining digits if the numeral
93 // is longer.
94 int ReadUnsignedNumeral() {
ager@chromium.orgbeb25712010-11-29 08:02:25 +000095 int n = 0;
ricow@chromium.org4f693d62011-07-04 14:01:31 +000096 int i = 0;
97 while (IsAsciiDigit()) {
98 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
99 i++;
100 Next();
ager@chromium.orgbeb25712010-11-29 08:02:25 +0000101 }
102 return n;
103 }
104
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000105 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
106 // lower-case prefix, and pad any remainder of the buffer with zeroes.
107 // Return word length.
108 int ReadWord(uint32_t* prefix, int prefix_size) {
109 int len;
110 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
kmillikin@chromium.org3cdd9e12010-09-06 11:39:48 +0000111 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000112 }
113 for (int i = len; i < prefix_size; i++) prefix[i] = 0;
114 return len;
115 }
116
117 // The skip methods return whether they actually skipped something.
erik.corry@gmail.com4a6c3272010-11-18 12:04:40 +0000118 bool Skip(uint32_t c) {
119 if (ch_ == c) {
120 Next();
121 return true;
122 }
123 return false;
124 }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000125
126 bool SkipWhiteSpace() {
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +0000127 if (unicode_cache_->IsWhiteSpace(ch_)) {
erik.corry@gmail.com4a6c3272010-11-18 12:04:40 +0000128 Next();
129 return true;
130 }
131 return false;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000132 }
133
134 bool SkipParentheses() {
135 if (ch_ != '(') return false;
136 int balance = 0;
137 do {
138 if (ch_ == ')') --balance;
139 else if (ch_ == '(') ++balance;
140 Next();
141 } while (balance > 0 && ch_);
142 return true;
143 }
144
145 // Character testing/classification. Non-ASCII digits are not supported.
146 bool Is(uint32_t c) const { return ch_ == c; }
147 bool IsEnd() const { return ch_ == 0; }
148 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
149 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
150 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
151
152 // Return 1 for '+' and -1 for '-'.
153 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
154
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000155 private:
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000156 int index_;
157 Vector<Char> buffer_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000158 uint32_t ch_;
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +0000159 UnicodeCache* unicode_cache_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000160 };
161
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000162 enum KeywordType {
163 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
164 };
165
166 struct DateToken {
167 public:
168 bool IsInvalid() { return tag_ == kInvalidTokenTag; }
169 bool IsUnknown() { return tag_ == kUnknownTokenTag; }
170 bool IsNumber() { return tag_ == kNumberTag; }
171 bool IsSymbol() { return tag_ == kSymbolTag; }
172 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
173 bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
174 bool IsKeyword() { return tag_ >= kKeywordTagStart; }
175
176 int length() { return length_; }
177
178 int number() {
179 ASSERT(IsNumber());
180 return value_;
181 }
182 KeywordType keyword_type() {
183 ASSERT(IsKeyword());
184 return static_cast<KeywordType>(tag_);
185 }
186 int keyword_value() {
187 ASSERT(IsKeyword());
188 return value_;
189 }
190 char symbol() {
191 ASSERT(IsSymbol());
192 return static_cast<char>(value_);
193 }
194 bool IsSymbol(char symbol) {
195 return IsSymbol() && this->symbol() == symbol;
196 }
197 bool IsKeywordType(KeywordType tag) {
198 return tag_ == tag;
199 }
200 bool IsFixedLengthNumber(int length) {
201 return IsNumber() && length_ == length;
202 }
203 bool IsAsciiSign() {
204 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
205 }
206 int ascii_sign() {
207 ASSERT(IsAsciiSign());
208 return 44 - value_;
209 }
210 bool IsKeywordZ() {
211 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
212 }
213 bool IsUnknown(int character) {
214 return IsUnknown() && value_ == character;
215 }
216 // Factory functions.
217 static DateToken Keyword(KeywordType tag, int value, int length) {
218 return DateToken(tag, length, value);
219 }
220 static DateToken Number(int value, int length) {
221 return DateToken(kNumberTag, length, value);
222 }
223 static DateToken Symbol(char symbol) {
224 return DateToken(kSymbolTag, 1, symbol);
225 }
226 static DateToken EndOfInput() {
227 return DateToken(kEndOfInputTag, 0, -1);
228 }
229 static DateToken WhiteSpace(int length) {
230 return DateToken(kWhiteSpaceTag, length, -1);
231 }
232 static DateToken Unknown() {
233 return DateToken(kUnknownTokenTag, 1, -1);
234 }
235 static DateToken Invalid() {
236 return DateToken(kInvalidTokenTag, 0, -1);
237 }
238 private:
239 enum TagType {
240 kInvalidTokenTag = -6,
241 kUnknownTokenTag = -5,
242 kWhiteSpaceTag = -4,
243 kNumberTag = -3,
244 kSymbolTag = -2,
245 kEndOfInputTag = -1,
246 kKeywordTagStart = 0
247 };
248 DateToken(int tag, int length, int value)
249 : tag_(tag),
250 length_(length),
251 value_(value) { }
252
253 int tag_;
254 int length_; // Number of characters.
255 int value_;
256 };
257
258 template <typename Char>
259 class DateStringTokenizer {
260 public:
261 explicit DateStringTokenizer(InputReader<Char>* in)
262 : in_(in), next_(Scan()) { }
263 DateToken Next() {
264 DateToken result = next_;
265 next_ = Scan();
266 return result;
267 }
268
269 DateToken Peek() {
270 return next_;
271 }
272 bool SkipSymbol(char symbol) {
273 if (next_.IsSymbol(symbol)) {
274 next_ = Scan();
275 return true;
276 }
277 return false;
278 }
279 private:
280 DateToken Scan();
281
282 InputReader<Char>* in_;
283 DateToken next_;
284 };
285
286 static int ReadMilliseconds(DateToken number);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000287
288 // KeywordTable maps names of months, time zones, am/pm to numbers.
289 class KeywordTable : public AllStatic {
290 public:
291 // Look up a word in the keyword table and return an index.
292 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
293 // and 'len' is the word length.
294 static int Lookup(const uint32_t* pre, int len);
295 // Get the type of the keyword at index i.
296 static KeywordType GetType(int i) {
297 return static_cast<KeywordType>(array[i][kTypeOffset]);
298 }
299 // Get the value of the keyword at index i.
300 static int GetValue(int i) { return array[i][kValueOffset]; }
301
302 static const int kPrefixLength = 3;
303 static const int kTypeOffset = kPrefixLength;
304 static const int kValueOffset = kTypeOffset + 1;
305 static const int kEntrySize = kValueOffset + 1;
306 static const int8_t array[][kEntrySize];
307 };
308
309 class TimeZoneComposer BASE_EMBEDDED {
310 public:
311 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
312 void Set(int offset_in_hours) {
313 sign_ = offset_in_hours < 0 ? -1 : 1;
314 hour_ = offset_in_hours * sign_;
315 minute_ = 0;
316 }
317 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
318 void SetAbsoluteHour(int hour) { hour_ = hour; }
319 void SetAbsoluteMinute(int minute) { minute_ = minute; }
320 bool IsExpecting(int n) const {
321 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
322 }
323 bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
324 bool Write(FixedArray* output);
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000325 bool IsEmpty() { return hour_ == kNone; }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000326 private:
327 int sign_;
328 int hour_;
329 int minute_;
330 };
331
332 class TimeComposer BASE_EMBEDDED {
333 public:
334 TimeComposer() : index_(0), hour_offset_(kNone) {}
335 bool IsEmpty() const { return index_ == 0; }
336 bool IsExpecting(int n) const {
kmillikin@chromium.org4111b802010-05-03 10:34:42 +0000337 return (index_ == 1 && IsMinute(n)) ||
338 (index_ == 2 && IsSecond(n)) ||
339 (index_ == 3 && IsMillisecond(n));
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000340 }
341 bool Add(int n) {
342 return index_ < kSize ? (comp_[index_++] = n, true) : false;
343 }
344 bool AddFinal(int n) {
345 if (!Add(n)) return false;
346 while (index_ < kSize) comp_[index_++] = 0;
347 return true;
348 }
349 void SetHourOffset(int n) { hour_offset_ = n; }
350 bool Write(FixedArray* output);
351
352 static bool IsMinute(int x) { return Between(x, 0, 59); }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000353 static bool IsHour(int x) { return Between(x, 0, 23); }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000354 static bool IsSecond(int x) { return Between(x, 0, 59); }
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000355 private:
356 static bool IsHour12(int x) { return Between(x, 0, 12); }
kmillikin@chromium.org4111b802010-05-03 10:34:42 +0000357 static bool IsMillisecond(int x) { return Between(x, 0, 999); }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000358
kmillikin@chromium.org4111b802010-05-03 10:34:42 +0000359 static const int kSize = 4;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000360 int comp_[kSize];
361 int index_;
362 int hour_offset_;
363 };
364
365 class DayComposer BASE_EMBEDDED {
366 public:
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000367 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000368 bool IsEmpty() const { return index_ == 0; }
369 bool Add(int n) {
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000370 if (index_ < kSize) {
371 comp_[index_] = n;
372 index_++;
373 return true;
374 }
375 return false;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000376 }
377 void SetNamedMonth(int n) { named_month_ = n; }
378 bool Write(FixedArray* output);
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000379 void set_iso_date() { is_iso_date_ = true; }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000380 static bool IsMonth(int x) { return Between(x, 1, 12); }
381 static bool IsDay(int x) { return Between(x, 1, 31); }
382
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000383 private:
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000384 static const int kSize = 3;
385 int comp_[kSize];
386 int index_;
387 int named_month_;
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000388 // If set, ensures that data is always parsed in year-month-date order.
389 bool is_iso_date_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000390 };
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000391
392 // Tries to parse an ES5 Date Time String. Returns the next token
393 // to continue with in the legacy date string parser. If parsing is
394 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
395 // returns DateToken::Invalid(). Otherwise parsing continues in the
396 // legacy parser.
397 template <typename Char>
398 static DateParser::DateToken ParseES5DateTime(
399 DateStringTokenizer<Char>* scanner,
400 DayComposer* day,
401 TimeComposer* time,
402 TimeZoneComposer* tz);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000403};
404
405
406} } // namespace v8::internal
407
408#endif // V8_DATEPARSER_H_