blob: 27584ce39efd4f50a6f11b055983dd51c907486c [file] [log] [blame]
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +00001// Copyright 2011 the V8 project authors. All rights reserved.
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_DATEPARSER_H_
29#define V8_DATEPARSER_H_
30
lrn@chromium.org1c092762011-05-09 09:42:16 +000031#include "allocation.h"
erik.corry@gmail.com4a6c3272010-11-18 12:04:40 +000032#include "char-predicates-inl.h"
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000033
kasperl@chromium.org71affb52009-05-26 05:44:31 +000034namespace v8 {
35namespace internal {
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000036
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000037class DateParser : public AllStatic {
38 public:
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000039 // Parse the string as a date. If parsing succeeds, return true after
40 // filling out the output array as follows (all integers are Smis):
41 // [0]: year
42 // [1]: month (0 = Jan, 1 = Feb, ...)
43 // [2]: day
44 // [3]: hour
45 // [4]: minute
46 // [5]: second
kmillikin@chromium.org4111b802010-05-03 10:34:42 +000047 // [6]: millisecond
48 // [7]: UTC offset in seconds, or null value if no timezone specified
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000049 // If parsing fails, return false (content of output array is not defined).
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000050 template <typename Char>
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +000051 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000052
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000053 enum {
kmillikin@chromium.org4111b802010-05-03 10:34:42 +000054 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000055 };
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000056
57 private:
58 // Range testing
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000059 static inline bool Between(int x, int lo, int hi) {
60 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
61 }
ricow@chromium.org4f693d62011-07-04 14:01:31 +000062
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000063 // Indicates a missing value.
64 static const int kNone = kMaxInt;
65
ricow@chromium.org4f693d62011-07-04 14:01:31 +000066 // Maximal number of digits used to build the value of a numeral.
67 // Remaining digits are ignored.
68 static const int kMaxSignificantDigits = 9;
69
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000070 // InputReader provides basic string parsing and character classification.
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000071 template <typename Char>
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000072 class InputReader BASE_EMBEDDED {
73 public:
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +000074 InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000075 : index_(0),
76 buffer_(s),
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +000077 unicode_cache_(unicode_cache) {
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000078 Next();
79 }
80
ricow@chromium.org4f693d62011-07-04 14:01:31 +000081 int position() { return index_; }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000082
ricow@chromium.org4f693d62011-07-04 14:01:31 +000083 // Advance to the next character of the string.
84 void Next() {
85 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
86 index_++;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000087 }
88
ricow@chromium.org4f693d62011-07-04 14:01:31 +000089 // Read a string of digits as an unsigned number. Cap value at
90 // kMaxSignificantDigits, but skip remaining digits if the numeral
91 // is longer.
92 int ReadUnsignedNumeral() {
ager@chromium.orgbeb25712010-11-29 08:02:25 +000093 int n = 0;
ricow@chromium.org4f693d62011-07-04 14:01:31 +000094 int i = 0;
95 while (IsAsciiDigit()) {
96 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
97 i++;
98 Next();
ager@chromium.orgbeb25712010-11-29 08:02:25 +000099 }
100 return n;
101 }
102
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000103 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
104 // lower-case prefix, and pad any remainder of the buffer with zeroes.
105 // Return word length.
106 int ReadWord(uint32_t* prefix, int prefix_size) {
107 int len;
108 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
kmillikin@chromium.org3cdd9e12010-09-06 11:39:48 +0000109 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000110 }
111 for (int i = len; i < prefix_size; i++) prefix[i] = 0;
112 return len;
113 }
114
115 // The skip methods return whether they actually skipped something.
erik.corry@gmail.com4a6c3272010-11-18 12:04:40 +0000116 bool Skip(uint32_t c) {
117 if (ch_ == c) {
118 Next();
119 return true;
120 }
121 return false;
122 }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000123
124 bool SkipWhiteSpace() {
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +0000125 if (unicode_cache_->IsWhiteSpace(ch_)) {
erik.corry@gmail.com4a6c3272010-11-18 12:04:40 +0000126 Next();
127 return true;
128 }
129 return false;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000130 }
131
132 bool SkipParentheses() {
133 if (ch_ != '(') return false;
134 int balance = 0;
135 do {
136 if (ch_ == ')') --balance;
137 else if (ch_ == '(') ++balance;
138 Next();
139 } while (balance > 0 && ch_);
140 return true;
141 }
142
143 // Character testing/classification. Non-ASCII digits are not supported.
144 bool Is(uint32_t c) const { return ch_ == c; }
145 bool IsEnd() const { return ch_ == 0; }
146 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
147 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
148 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
149
150 // Return 1 for '+' and -1 for '-'.
151 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
152
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000153 private:
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000154 int index_;
155 Vector<Char> buffer_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000156 uint32_t ch_;
ager@chromium.orga9aa5fa2011-04-13 08:46:07 +0000157 UnicodeCache* unicode_cache_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000158 };
159
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000160 enum KeywordType {
161 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
162 };
163
164 struct DateToken {
165 public:
166 bool IsInvalid() { return tag_ == kInvalidTokenTag; }
167 bool IsUnknown() { return tag_ == kUnknownTokenTag; }
168 bool IsNumber() { return tag_ == kNumberTag; }
169 bool IsSymbol() { return tag_ == kSymbolTag; }
170 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
171 bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
172 bool IsKeyword() { return tag_ >= kKeywordTagStart; }
173
174 int length() { return length_; }
175
176 int number() {
177 ASSERT(IsNumber());
178 return value_;
179 }
180 KeywordType keyword_type() {
181 ASSERT(IsKeyword());
182 return static_cast<KeywordType>(tag_);
183 }
184 int keyword_value() {
185 ASSERT(IsKeyword());
186 return value_;
187 }
188 char symbol() {
189 ASSERT(IsSymbol());
190 return static_cast<char>(value_);
191 }
192 bool IsSymbol(char symbol) {
193 return IsSymbol() && this->symbol() == symbol;
194 }
195 bool IsKeywordType(KeywordType tag) {
196 return tag_ == tag;
197 }
198 bool IsFixedLengthNumber(int length) {
199 return IsNumber() && length_ == length;
200 }
201 bool IsAsciiSign() {
202 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
203 }
204 int ascii_sign() {
205 ASSERT(IsAsciiSign());
206 return 44 - value_;
207 }
208 bool IsKeywordZ() {
209 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
210 }
211 bool IsUnknown(int character) {
212 return IsUnknown() && value_ == character;
213 }
214 // Factory functions.
215 static DateToken Keyword(KeywordType tag, int value, int length) {
216 return DateToken(tag, length, value);
217 }
218 static DateToken Number(int value, int length) {
219 return DateToken(kNumberTag, length, value);
220 }
221 static DateToken Symbol(char symbol) {
222 return DateToken(kSymbolTag, 1, symbol);
223 }
224 static DateToken EndOfInput() {
225 return DateToken(kEndOfInputTag, 0, -1);
226 }
227 static DateToken WhiteSpace(int length) {
228 return DateToken(kWhiteSpaceTag, length, -1);
229 }
230 static DateToken Unknown() {
231 return DateToken(kUnknownTokenTag, 1, -1);
232 }
233 static DateToken Invalid() {
234 return DateToken(kInvalidTokenTag, 0, -1);
235 }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000236
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000237 private:
238 enum TagType {
239 kInvalidTokenTag = -6,
240 kUnknownTokenTag = -5,
241 kWhiteSpaceTag = -4,
242 kNumberTag = -3,
243 kSymbolTag = -2,
244 kEndOfInputTag = -1,
245 kKeywordTagStart = 0
246 };
247 DateToken(int tag, int length, int value)
248 : tag_(tag),
249 length_(length),
250 value_(value) { }
251
252 int tag_;
253 int length_; // Number of characters.
254 int value_;
255 };
256
257 template <typename Char>
258 class DateStringTokenizer {
259 public:
260 explicit DateStringTokenizer(InputReader<Char>* in)
261 : in_(in), next_(Scan()) { }
262 DateToken Next() {
263 DateToken result = next_;
264 next_ = Scan();
265 return result;
266 }
267
268 DateToken Peek() {
269 return next_;
270 }
271 bool SkipSymbol(char symbol) {
272 if (next_.IsSymbol(symbol)) {
273 next_ = Scan();
274 return true;
275 }
276 return false;
277 }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000278
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000279 private:
280 DateToken Scan();
281
282 InputReader<Char>* in_;
283 DateToken next_;
284 };
285
286 static int ReadMilliseconds(DateToken number);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000287
288 // KeywordTable maps names of months, time zones, am/pm to numbers.
289 class KeywordTable : public AllStatic {
290 public:
291 // Look up a word in the keyword table and return an index.
292 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
293 // and 'len' is the word length.
294 static int Lookup(const uint32_t* pre, int len);
295 // Get the type of the keyword at index i.
296 static KeywordType GetType(int i) {
297 return static_cast<KeywordType>(array[i][kTypeOffset]);
298 }
299 // Get the value of the keyword at index i.
300 static int GetValue(int i) { return array[i][kValueOffset]; }
301
302 static const int kPrefixLength = 3;
303 static const int kTypeOffset = kPrefixLength;
304 static const int kValueOffset = kTypeOffset + 1;
305 static const int kEntrySize = kValueOffset + 1;
306 static const int8_t array[][kEntrySize];
307 };
308
309 class TimeZoneComposer BASE_EMBEDDED {
310 public:
311 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
312 void Set(int offset_in_hours) {
313 sign_ = offset_in_hours < 0 ? -1 : 1;
314 hour_ = offset_in_hours * sign_;
315 minute_ = 0;
316 }
317 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
318 void SetAbsoluteHour(int hour) { hour_ = hour; }
319 void SetAbsoluteMinute(int minute) { minute_ = minute; }
320 bool IsExpecting(int n) const {
321 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
322 }
323 bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
324 bool Write(FixedArray* output);
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000325 bool IsEmpty() { return hour_ == kNone; }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000326 private:
327 int sign_;
328 int hour_;
329 int minute_;
330 };
331
332 class TimeComposer BASE_EMBEDDED {
333 public:
334 TimeComposer() : index_(0), hour_offset_(kNone) {}
335 bool IsEmpty() const { return index_ == 0; }
336 bool IsExpecting(int n) const {
kmillikin@chromium.org4111b802010-05-03 10:34:42 +0000337 return (index_ == 1 && IsMinute(n)) ||
338 (index_ == 2 && IsSecond(n)) ||
339 (index_ == 3 && IsMillisecond(n));
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000340 }
341 bool Add(int n) {
342 return index_ < kSize ? (comp_[index_++] = n, true) : false;
343 }
344 bool AddFinal(int n) {
345 if (!Add(n)) return false;
346 while (index_ < kSize) comp_[index_++] = 0;
347 return true;
348 }
349 void SetHourOffset(int n) { hour_offset_ = n; }
350 bool Write(FixedArray* output);
351
352 static bool IsMinute(int x) { return Between(x, 0, 59); }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000353 static bool IsHour(int x) { return Between(x, 0, 23); }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000354 static bool IsSecond(int x) { return Between(x, 0, 59); }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000355
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000356 private:
357 static bool IsHour12(int x) { return Between(x, 0, 12); }
kmillikin@chromium.org4111b802010-05-03 10:34:42 +0000358 static bool IsMillisecond(int x) { return Between(x, 0, 999); }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000359
kmillikin@chromium.org4111b802010-05-03 10:34:42 +0000360 static const int kSize = 4;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000361 int comp_[kSize];
362 int index_;
363 int hour_offset_;
364 };
365
366 class DayComposer BASE_EMBEDDED {
367 public:
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000368 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000369 bool IsEmpty() const { return index_ == 0; }
370 bool Add(int n) {
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000371 if (index_ < kSize) {
372 comp_[index_] = n;
373 index_++;
374 return true;
375 }
376 return false;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000377 }
378 void SetNamedMonth(int n) { named_month_ = n; }
379 bool Write(FixedArray* output);
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000380 void set_iso_date() { is_iso_date_ = true; }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000381 static bool IsMonth(int x) { return Between(x, 1, 12); }
382 static bool IsDay(int x) { return Between(x, 1, 31); }
383
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000384 private:
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000385 static const int kSize = 3;
386 int comp_[kSize];
387 int index_;
388 int named_month_;
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000389 // If set, ensures that data is always parsed in year-month-date order.
390 bool is_iso_date_;
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000391 };
ricow@chromium.org4f693d62011-07-04 14:01:31 +0000392
393 // Tries to parse an ES5 Date Time String. Returns the next token
394 // to continue with in the legacy date string parser. If parsing is
395 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
396 // returns DateToken::Invalid(). Otherwise parsing continues in the
397 // legacy parser.
398 template <typename Char>
399 static DateParser::DateToken ParseES5DateTime(
400 DateStringTokenizer<Char>* scanner,
401 DayComposer* day,
402 TimeComposer* time,
403 TimeZoneComposer* tz);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000404};
405
406
407} } // namespace v8::internal
408
409#endif // V8_DATEPARSER_H_