Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2003 Lars Knoll (knoll@kde.org) |
| 3 | * Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. |
| 4 | * Copyright (C) 2008 Eric Seidel <eric@webkit.org> |
| 5 | * Copyright (C) 2009 - 2010 Torch Mobile (Beijing) Co. Ltd. All rights reserved. |
| 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Library General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Library General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Library General Public License |
| 18 | * along with this library; see the file COPYING.LIB. If not, write to |
| 19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 20 | * Boston, MA 02110-1301, USA. |
| 21 | */ |
| 22 | |
| 23 | #ifndef CSSTokenizer_h |
| 24 | #define CSSTokenizer_h |
| 25 | |
| 26 | #include "wtf/Noncopyable.h" |
| 27 | #include "wtf/OwnPtr.h" |
| 28 | #include "wtf/text/WTFString.h" |
| 29 | |
| 30 | namespace WebCore { |
| 31 | |
Torne (Richard Coles) | 0938029 | 2014-02-21 12:17:33 +0000 | [diff] [blame] | 32 | class BisonCSSParser; |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 33 | struct CSSParserLocation; |
| 34 | struct CSSParserString; |
| 35 | |
| 36 | class CSSTokenizer { |
| 37 | WTF_MAKE_NONCOPYABLE(CSSTokenizer); |
| 38 | public: |
| 39 | // FIXME: This should not be needed but there are still some ties between the 2 classes. |
Torne (Richard Coles) | 0938029 | 2014-02-21 12:17:33 +0000 | [diff] [blame] | 40 | friend class BisonCSSParser; |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 41 | |
Torne (Richard Coles) | 0938029 | 2014-02-21 12:17:33 +0000 | [diff] [blame] | 42 | CSSTokenizer(BisonCSSParser& parser) |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 43 | : m_parser(parser) |
| 44 | , m_parsedTextPrefixLength(0) |
| 45 | , m_parsedTextSuffixLength(0) |
| 46 | , m_parsingMode(NormalMode) |
| 47 | , m_is8BitSource(false) |
| 48 | , m_length(0) |
| 49 | , m_token(0) |
| 50 | , m_lineNumber(0) |
| 51 | , m_tokenStartLineNumber(0) |
| 52 | , m_internal(true) |
| 53 | { |
| 54 | m_tokenStart.ptr8 = 0; |
| 55 | } |
| 56 | |
| 57 | void setupTokenizer(const char* prefix, unsigned prefixLength, const String&, const char* suffix, unsigned suffixLength); |
| 58 | |
| 59 | CSSParserLocation currentLocation(); |
| 60 | |
| 61 | inline int lex(void* yylval) { return (this->*m_lexFunc)(yylval); } |
| 62 | |
| 63 | inline unsigned safeUserStringTokenOffset() |
| 64 | { |
| 65 | return std::min(tokenStartOffset(), static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength)) - m_parsedTextPrefixLength; |
| 66 | } |
| 67 | |
| 68 | bool is8BitSource() const { return m_is8BitSource; } |
| 69 | |
| 70 | // FIXME: These 2 functions should be private so that we don't need the definitions below. |
| 71 | template <typename CharacterType> |
| 72 | inline CharacterType* tokenStart(); |
| 73 | |
| 74 | inline unsigned tokenStartOffset(); |
| 75 | |
| 76 | private: |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 77 | UChar* allocateStringBuffer16(size_t len); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 78 | |
| 79 | template <typename CharacterType> |
| 80 | inline CharacterType*& currentCharacter(); |
| 81 | |
| 82 | template <typename CharacterType> |
| 83 | inline CharacterType* dataStart(); |
| 84 | |
| 85 | template <typename CharacterType> |
| 86 | inline void setTokenStart(CharacterType*); |
| 87 | |
| 88 | template <typename CharacterType> |
| 89 | inline bool isIdentifierStart(); |
| 90 | |
| 91 | template <typename CharacterType> |
| 92 | inline CSSParserLocation tokenLocation(); |
| 93 | |
| 94 | template <typename CharacterType> |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 95 | static unsigned parseEscape(CharacterType*&); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 96 | template <typename DestCharacterType> |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 97 | static inline void UnicodeToChars(DestCharacterType*&, unsigned); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 98 | |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 99 | template <typename SrcCharacterType, typename DestCharacterType> |
| 100 | static inline bool parseIdentifierInternal(SrcCharacterType*&, DestCharacterType*&, bool&); |
| 101 | template <typename SrcCharacterType> |
| 102 | static size_t peekMaxIdentifierLen(SrcCharacterType*); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 103 | template <typename CharacterType> |
| 104 | inline void parseIdentifier(CharacterType*&, CSSParserString&, bool&); |
| 105 | |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 106 | template <typename SrcCharacterType> |
| 107 | static size_t peekMaxStringLen(SrcCharacterType*, UChar quote); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 108 | template <typename SrcCharacterType, typename DestCharacterType> |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 109 | static inline bool parseStringInternal(SrcCharacterType*&, DestCharacterType*&, UChar); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 110 | template <typename CharacterType> |
| 111 | inline void parseString(CharacterType*&, CSSParserString& resultString, UChar); |
| 112 | |
| 113 | template <typename CharacterType> |
| 114 | inline bool findURI(CharacterType*& start, CharacterType*& end, UChar& quote); |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 115 | template <typename SrcCharacterType> |
| 116 | static size_t peekMaxURILen(SrcCharacterType*, UChar quote); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 117 | template <typename SrcCharacterType, typename DestCharacterType> |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 118 | static inline bool parseURIInternal(SrcCharacterType*&, DestCharacterType*&, UChar quote); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 119 | template <typename CharacterType> |
| 120 | inline void parseURI(CSSParserString&); |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 121 | |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 122 | template <typename CharacterType> |
| 123 | inline bool parseUnicodeRange(); |
| 124 | template <typename CharacterType> |
| 125 | bool parseNthChild(); |
| 126 | template <typename CharacterType> |
| 127 | bool parseNthChildExtra(); |
| 128 | template <typename CharacterType> |
| 129 | inline bool detectFunctionTypeToken(int); |
| 130 | template <typename CharacterType> |
| 131 | inline void detectMediaQueryToken(int); |
| 132 | template <typename CharacterType> |
| 133 | inline void detectNumberToken(CharacterType*, int); |
| 134 | template <typename CharacterType> |
| 135 | inline void detectDashToken(int); |
| 136 | template <typename CharacterType> |
| 137 | inline void detectAtToken(int, bool); |
| 138 | template <typename CharacterType> |
| 139 | inline void detectSupportsToken(int); |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 140 | |
| 141 | template <typename SourceCharacterType> |
| 142 | int realLex(void* yylval); |
| 143 | |
Torne (Richard Coles) | 0938029 | 2014-02-21 12:17:33 +0000 | [diff] [blame] | 144 | BisonCSSParser& m_parser; |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 145 | |
| 146 | size_t m_parsedTextPrefixLength; |
| 147 | size_t m_parsedTextSuffixLength; |
| 148 | |
| 149 | enum ParsingMode { |
| 150 | NormalMode, |
| 151 | MediaQueryMode, |
| 152 | SupportsMode, |
| 153 | NthChildMode |
| 154 | }; |
| 155 | |
| 156 | ParsingMode m_parsingMode; |
| 157 | bool m_is8BitSource; |
| 158 | OwnPtr<LChar[]> m_dataStart8; |
| 159 | OwnPtr<UChar[]> m_dataStart16; |
| 160 | LChar* m_currentCharacter8; |
| 161 | UChar* m_currentCharacter16; |
Ben Murdoch | aafa69c | 2014-04-03 12:30:15 +0100 | [diff] [blame] | 162 | |
| 163 | // During parsing of an ASCII stylesheet we might locate escape |
| 164 | // sequences that expand into UTF-16 code points. Strings, |
| 165 | // identifiers and URIs containing such escape sequences are |
| 166 | // stored in m_cssStrings16 so that we don't have to store the |
| 167 | // whole stylesheet as UTF-16. |
| 168 | Vector<OwnPtr<UChar[]> > m_cssStrings16; |
Torne (Richard Coles) | 51b2906 | 2013-11-28 11:56:03 +0000 | [diff] [blame] | 169 | union { |
| 170 | LChar* ptr8; |
| 171 | UChar* ptr16; |
| 172 | } m_tokenStart; |
| 173 | unsigned m_length; |
| 174 | int m_token; |
| 175 | int m_lineNumber; |
| 176 | int m_tokenStartLineNumber; |
| 177 | |
| 178 | // FIXME: This boolean is misnamed. Also it would be nice if we could consolidate it |
| 179 | // with the CSSParserMode logic to determine if internal properties are allowed. |
| 180 | bool m_internal; |
| 181 | |
| 182 | int (CSSTokenizer::*m_lexFunc)(void*); |
| 183 | }; |
| 184 | |
| 185 | inline unsigned CSSTokenizer::tokenStartOffset() |
| 186 | { |
| 187 | if (is8BitSource()) |
| 188 | return m_tokenStart.ptr8 - m_dataStart8.get(); |
| 189 | return m_tokenStart.ptr16 - m_dataStart16.get(); |
| 190 | } |
| 191 | |
| 192 | template <> |
| 193 | inline LChar* CSSTokenizer::tokenStart<LChar>() |
| 194 | { |
| 195 | return m_tokenStart.ptr8; |
| 196 | } |
| 197 | |
| 198 | template <> |
| 199 | inline UChar* CSSTokenizer::tokenStart<UChar>() |
| 200 | { |
| 201 | return m_tokenStart.ptr16; |
| 202 | } |
| 203 | |
| 204 | } // namespace WebCore |
| 205 | |
| 206 | #endif // CSSTokenizer_h |