blob: 3161ae4ae930c408edf0b57729156f727c529053 [file] [log] [blame]
Ben Murdoch257744e2011-11-30 15:57:28 +00001// Copyright 2011 the V8 project authors. All rights reserved.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Steve Blocka7e24c12009-10-30 11:49:00 +00004
5#ifndef V8_CHAR_PREDICATES_H_
6#define V8_CHAR_PREDICATES_H_
7
Ben Murdochb8a8cc12014-11-26 15:28:44 +00008#include "src/unicode.h"
Ben Murdoch257744e2011-11-30 15:57:28 +00009
Steve Blocka7e24c12009-10-30 11:49:00 +000010namespace v8 {
11namespace internal {
12
13// Unicode character predicates as defined by ECMA-262, 3rd,
14// used for lexical analysis.
15
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000016inline int AsciiAlphaToLower(uc32 c);
Steve Blocka7e24c12009-10-30 11:49:00 +000017inline bool IsCarriageReturn(uc32 c);
18inline bool IsLineFeed(uc32 c);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000019inline bool IsAsciiIdentifier(uc32 c);
20inline bool IsAlphaNumeric(uc32 c);
Steve Blocka7e24c12009-10-30 11:49:00 +000021inline bool IsDecimalDigit(uc32 c);
22inline bool IsHexDigit(uc32 c);
Ben Murdochb8a8cc12014-11-26 15:28:44 +000023inline bool IsOctalDigit(uc32 c);
24inline bool IsBinaryDigit(uc32 c);
Steve Blocka7e24c12009-10-30 11:49:00 +000025inline bool IsRegExpWord(uc32 c);
26inline bool IsRegExpNewline(uc32 c);
27
Emily Bernierd0a1eb72015-03-24 16:35:39 -040028
29struct SupplementaryPlanes {
30 static bool IsIDStart(uc32 c);
31 static bool IsIDPart(uc32 c);
32};
33
34
35// ES6 draft section 11.6
36// This includes '_', '$' and '\', and ID_Start according to
37// http://www.unicode.org/reports/tr31/, which consists of categories
38// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
39// 'Pattern_Syntax' or 'Pattern_White_Space'.
40// For code points in the SMPs, we can resort to ICU (if available).
Steve Blocka7e24c12009-10-30 11:49:00 +000041struct IdentifierStart {
42 static inline bool Is(uc32 c) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -040043 if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c);
44 return unibrow::ID_Start::Is(c);
Steve Blocka7e24c12009-10-30 11:49:00 +000045 }
46};
47
48
Emily Bernierd0a1eb72015-03-24 16:35:39 -040049// ES6 draft section 11.6
50// This includes \u200c and \u200d, and ID_Continue according to
51// http://www.unicode.org/reports/tr31/, which consists of ID_Start,
52// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
53// 'Pattern_Syntax' or 'Pattern_White_Space'.
54// For code points in the SMPs, we can resort to ICU (if available).
Steve Blocka7e24c12009-10-30 11:49:00 +000055struct IdentifierPart {
56 static inline bool Is(uc32 c) {
Emily Bernierd0a1eb72015-03-24 16:35:39 -040057 if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c);
58 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
Steve Blocka7e24c12009-10-30 11:49:00 +000059 }
60};
61
Ben Murdochb8a8cc12014-11-26 15:28:44 +000062
Emily Bernierd0a1eb72015-03-24 16:35:39 -040063// ES6 draft section 11.2
64// This includes all code points of Unicode category 'Zs'.
65// \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1,
66// so it is also included.
67// Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff.
68// There are no category 'Zs' code points in the SMPs.
Ben Murdochb8a8cc12014-11-26 15:28:44 +000069struct WhiteSpace {
Emily Bernierd0a1eb72015-03-24 16:35:39 -040070 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); }
Ben Murdochb8a8cc12014-11-26 15:28:44 +000071};
72
73
Emily Bernierd0a1eb72015-03-24 16:35:39 -040074// WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
75// This consists of \000a, \000d, \u2028, and \u2029.
Ben Murdochb8a8cc12014-11-26 15:28:44 +000076struct WhiteSpaceOrLineTerminator {
77 static inline bool Is(uc32 c) {
78 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
79 }
80};
81
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000082} // namespace internal
83} // namespace v8
Steve Blocka7e24c12009-10-30 11:49:00 +000084
85#endif // V8_CHAR_PREDICATES_H_