Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 1 | // Copyright 2011 the V8 project authors. All rights reserved. |
Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 4 | |
| 5 | #ifndef V8_CHAR_PREDICATES_H_ |
| 6 | #define V8_CHAR_PREDICATES_H_ |
| 7 | |
Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame] | 8 | #include "src/unicode.h" |
Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 9 | |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 10 | namespace v8 { |
| 11 | namespace internal { |
| 12 | |
| 13 | // Unicode character predicates as defined by ECMA-262, 3rd, |
| 14 | // used for lexical analysis. |
| 15 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 16 | inline int AsciiAlphaToLower(uc32 c); |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 17 | inline bool IsCarriageReturn(uc32 c); |
| 18 | inline bool IsLineFeed(uc32 c); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 19 | inline bool IsAsciiIdentifier(uc32 c); |
| 20 | inline bool IsAlphaNumeric(uc32 c); |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 21 | inline bool IsDecimalDigit(uc32 c); |
| 22 | inline bool IsHexDigit(uc32 c); |
Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame] | 23 | inline bool IsOctalDigit(uc32 c); |
| 24 | inline bool IsBinaryDigit(uc32 c); |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 25 | inline bool IsRegExpWord(uc32 c); |
| 26 | inline bool IsRegExpNewline(uc32 c); |
| 27 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 28 | |
| 29 | struct SupplementaryPlanes { |
| 30 | static bool IsIDStart(uc32 c); |
| 31 | static bool IsIDPart(uc32 c); |
| 32 | }; |
| 33 | |
| 34 | |
| 35 | // ES6 draft section 11.6 |
| 36 | // This includes '_', '$' and '\', and ID_Start according to |
| 37 | // http://www.unicode.org/reports/tr31/, which consists of categories |
| 38 | // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties |
| 39 | // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 40 | // For code points in the SMPs, we can resort to ICU (if available). |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 41 | struct IdentifierStart { |
| 42 | static inline bool Is(uc32 c) { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 43 | if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c); |
| 44 | return unibrow::ID_Start::Is(c); |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 45 | } |
| 46 | }; |
| 47 | |
| 48 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 49 | // ES6 draft section 11.6 |
| 50 | // This includes \u200c and \u200d, and ID_Continue according to |
| 51 | // http://www.unicode.org/reports/tr31/, which consists of ID_Start, |
| 52 | // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties |
| 53 | // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 54 | // For code points in the SMPs, we can resort to ICU (if available). |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 55 | struct IdentifierPart { |
| 56 | static inline bool Is(uc32 c) { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 57 | if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c); |
| 58 | return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 59 | } |
| 60 | }; |
| 61 | |
Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame] | 62 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 63 | // ES6 draft section 11.2 |
| 64 | // This includes all code points of Unicode category 'Zs'. |
| 65 | // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, |
| 66 | // so it is also included. |
| 67 | // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. |
| 68 | // There are no category 'Zs' code points in the SMPs. |
Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame] | 69 | struct WhiteSpace { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 70 | static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } |
Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame] | 71 | }; |
| 72 | |
| 73 | |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 74 | // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 |
| 75 | // This consists of \000a, \000d, \u2028, and \u2029. |
Ben Murdoch | b8a8cc1 | 2014-11-26 15:28:44 +0000 | [diff] [blame] | 76 | struct WhiteSpaceOrLineTerminator { |
| 77 | static inline bool Is(uc32 c) { |
| 78 | return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); |
| 79 | } |
| 80 | }; |
| 81 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 82 | } // namespace internal |
| 83 | } // namespace v8 |
Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame] | 84 | |
| 85 | #endif // V8_CHAR_PREDICATES_H_ |