Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 1 | //===-- ClangHighlighter.cpp ------------------------------------*- C++ -*-===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "ClangHighlighter.h" |
| 10 | |
Jonas Devlieghere | 9764b65 | 2019-02-18 20:31:18 +0000 | [diff] [blame] | 11 | #include "lldb/Host/FileSystem.h" |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 12 | #include "lldb/Target/Language.h" |
| 13 | #include "lldb/Utility/AnsiTerminal.h" |
| 14 | #include "lldb/Utility/StreamString.h" |
| 15 | |
| 16 | #include "clang/Basic/SourceManager.h" |
| 17 | #include "clang/Lex/Lexer.h" |
| 18 | #include "llvm/ADT/StringSet.h" |
| 19 | #include "llvm/Support/MemoryBuffer.h" |
| 20 | |
| 21 | using namespace lldb_private; |
| 22 | |
| 23 | bool ClangHighlighter::isKeyword(llvm::StringRef token) const { |
| 24 | return keywords.find(token) != keywords.end(); |
| 25 | } |
| 26 | |
| 27 | ClangHighlighter::ClangHighlighter() { |
| 28 | #define KEYWORD(X, N) keywords.insert(#X); |
| 29 | #include "clang/Basic/TokenKinds.def" |
| 30 | } |
| 31 | |
| 32 | /// Determines which style should be applied to the given token. |
| 33 | /// \param highlighter |
| 34 | /// The current highlighter that should use the style. |
| 35 | /// \param token |
| 36 | /// The current token. |
| 37 | /// \param tok_str |
| 38 | /// The string in the source code the token represents. |
| 39 | /// \param options |
| 40 | /// The style we use for coloring the source code. |
| 41 | /// \param in_pp_directive |
| 42 | /// If we are currently in a preprocessor directive. NOTE: This is |
| 43 | /// passed by reference and will be updated if the current token starts |
| 44 | /// or ends a preprocessor directive. |
| 45 | /// \return |
| 46 | /// The ColorStyle that should be applied to the token. |
| 47 | static HighlightStyle::ColorStyle |
| 48 | determineClangStyle(const ClangHighlighter &highlighter, |
| 49 | const clang::Token &token, llvm::StringRef tok_str, |
| 50 | const HighlightStyle &options, bool &in_pp_directive) { |
| 51 | using namespace clang; |
| 52 | |
| 53 | if (token.is(tok::comment)) { |
| 54 | // If we were in a preprocessor directive before, we now left it. |
| 55 | in_pp_directive = false; |
| 56 | return options.comment; |
| 57 | } else if (in_pp_directive || token.getKind() == tok::hash) { |
| 58 | // Let's assume that the rest of the line is a PP directive. |
| 59 | in_pp_directive = true; |
| 60 | // Preprocessor directives are hard to match, so we have to hack this in. |
| 61 | return options.pp_directive; |
| 62 | } else if (tok::isStringLiteral(token.getKind())) |
| 63 | return options.string_literal; |
| 64 | else if (tok::isLiteral(token.getKind())) |
| 65 | return options.scalar_literal; |
| 66 | else if (highlighter.isKeyword(tok_str)) |
| 67 | return options.keyword; |
| 68 | else |
| 69 | switch (token.getKind()) { |
| 70 | case tok::raw_identifier: |
| 71 | case tok::identifier: |
| 72 | return options.identifier; |
| 73 | case tok::l_brace: |
| 74 | case tok::r_brace: |
| 75 | return options.braces; |
| 76 | case tok::l_square: |
| 77 | case tok::r_square: |
| 78 | return options.square_brackets; |
| 79 | case tok::l_paren: |
| 80 | case tok::r_paren: |
| 81 | return options.parentheses; |
| 82 | case tok::comma: |
| 83 | return options.comma; |
| 84 | case tok::coloncolon: |
| 85 | case tok::colon: |
| 86 | return options.colon; |
| 87 | |
| 88 | case tok::amp: |
| 89 | case tok::ampamp: |
| 90 | case tok::ampequal: |
| 91 | case tok::star: |
| 92 | case tok::starequal: |
| 93 | case tok::plus: |
| 94 | case tok::plusplus: |
| 95 | case tok::plusequal: |
| 96 | case tok::minus: |
| 97 | case tok::arrow: |
| 98 | case tok::minusminus: |
| 99 | case tok::minusequal: |
| 100 | case tok::tilde: |
| 101 | case tok::exclaim: |
| 102 | case tok::exclaimequal: |
| 103 | case tok::slash: |
| 104 | case tok::slashequal: |
| 105 | case tok::percent: |
| 106 | case tok::percentequal: |
| 107 | case tok::less: |
| 108 | case tok::lessless: |
| 109 | case tok::lessequal: |
| 110 | case tok::lesslessequal: |
| 111 | case tok::spaceship: |
| 112 | case tok::greater: |
| 113 | case tok::greatergreater: |
| 114 | case tok::greaterequal: |
| 115 | case tok::greatergreaterequal: |
| 116 | case tok::caret: |
| 117 | case tok::caretequal: |
| 118 | case tok::pipe: |
| 119 | case tok::pipepipe: |
| 120 | case tok::pipeequal: |
| 121 | case tok::question: |
| 122 | case tok::equal: |
| 123 | case tok::equalequal: |
| 124 | return options.operators; |
| 125 | default: |
| 126 | break; |
| 127 | } |
| 128 | return HighlightStyle::ColorStyle(); |
| 129 | } |
| 130 | |
Raphael Isemann | 2d437f6 | 2018-08-14 17:12:54 +0000 | [diff] [blame] | 131 | void ClangHighlighter::Highlight(const HighlightStyle &options, |
| 132 | llvm::StringRef line, |
Raphael Isemann | 2078632 | 2018-08-30 00:09:21 +0000 | [diff] [blame] | 133 | llvm::Optional<size_t> cursor_pos, |
Raphael Isemann | 2d437f6 | 2018-08-14 17:12:54 +0000 | [diff] [blame] | 134 | llvm::StringRef previous_lines, |
| 135 | Stream &result) const { |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 136 | using namespace clang; |
| 137 | |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 138 | FileSystemOptions file_opts; |
Jonas Devlieghere | 9764b65 | 2019-02-18 20:31:18 +0000 | [diff] [blame] | 139 | FileManager file_mgr(file_opts, |
| 140 | FileSystem::Instance().GetVirtualFileSystem()); |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 141 | |
| 142 | unsigned line_number = previous_lines.count('\n') + 1U; |
| 143 | |
| 144 | // Let's build the actual source code Clang needs and setup some utility |
| 145 | // objects. |
| 146 | std::string full_source = previous_lines.str() + line.str(); |
| 147 | llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs()); |
| 148 | llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts( |
| 149 | new DiagnosticOptions()); |
| 150 | DiagnosticsEngine diags(diag_ids, diags_opts); |
| 151 | clang::SourceManager SM(diags, file_mgr); |
| 152 | auto buf = llvm::MemoryBuffer::getMemBuffer(full_source); |
| 153 | |
| 154 | FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get()); |
| 155 | |
| 156 | // Let's just enable the latest ObjC and C++ which should get most tokens |
| 157 | // right. |
| 158 | LangOptions Opts; |
Erik Pilkington | fa98390 | 2018-10-30 20:31:30 +0000 | [diff] [blame] | 159 | Opts.ObjC = true; |
| 160 | // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 161 | Opts.CPlusPlus17 = true; |
| 162 | Opts.LineComment = true; |
| 163 | |
| 164 | Lexer lex(FID, buf.get(), SM, Opts); |
| 165 | // The lexer should keep whitespace around. |
| 166 | lex.SetKeepWhitespaceMode(true); |
| 167 | |
| 168 | // Keeps track if we have entered a PP directive. |
| 169 | bool in_pp_directive = false; |
| 170 | |
| 171 | // True once we actually lexed the user provided line. |
| 172 | bool found_user_line = false; |
| 173 | |
Raphael Isemann | 2078632 | 2018-08-30 00:09:21 +0000 | [diff] [blame] | 174 | // True if we already highlighted the token under the cursor, false otherwise. |
| 175 | bool highlighted_cursor = false; |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 176 | Token token; |
| 177 | bool exit = false; |
| 178 | while (!exit) { |
| 179 | // Returns true if this is the last token we get from the lexer. |
| 180 | exit = lex.LexFromRawLexer(token); |
| 181 | |
| 182 | bool invalid = false; |
| 183 | unsigned current_line_number = |
| 184 | SM.getSpellingLineNumber(token.getLocation(), &invalid); |
| 185 | if (current_line_number != line_number) |
| 186 | continue; |
| 187 | found_user_line = true; |
| 188 | |
| 189 | // We don't need to print any tokens without a spelling line number. |
| 190 | if (invalid) |
| 191 | continue; |
| 192 | |
| 193 | // Same as above but with the column number. |
| 194 | invalid = false; |
| 195 | unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid); |
| 196 | if (invalid) |
| 197 | continue; |
| 198 | // Column numbers start at 1, but indexes in our string start at 0. |
| 199 | --start; |
| 200 | |
| 201 | // Annotations don't have a length, so let's skip them. |
| 202 | if (token.isAnnotation()) |
| 203 | continue; |
| 204 | |
| 205 | // Extract the token string from our source code. |
| 206 | llvm::StringRef tok_str = line.substr(start, token.getLength()); |
| 207 | |
| 208 | // If the token is just an empty string, we can skip all the work below. |
| 209 | if (tok_str.empty()) |
| 210 | continue; |
| 211 | |
Raphael Isemann | 2078632 | 2018-08-30 00:09:21 +0000 | [diff] [blame] | 212 | // If the cursor is inside this token, we have to apply the 'selected' |
| 213 | // highlight style before applying the actual token color. |
| 214 | llvm::StringRef to_print = tok_str; |
| 215 | StreamString storage; |
| 216 | auto end = start + token.getLength(); |
| 217 | if (cursor_pos && end > *cursor_pos && !highlighted_cursor) { |
| 218 | highlighted_cursor = true; |
| 219 | options.selected.Apply(storage, tok_str); |
| 220 | to_print = storage.GetString(); |
| 221 | } |
| 222 | |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 223 | // See how we are supposed to highlight this token. |
| 224 | HighlightStyle::ColorStyle color = |
| 225 | determineClangStyle(*this, token, tok_str, options, in_pp_directive); |
| 226 | |
Raphael Isemann | 2078632 | 2018-08-30 00:09:21 +0000 | [diff] [blame] | 227 | color.Apply(result, to_print); |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 228 | } |
| 229 | |
| 230 | // If we went over the whole file but couldn't find our own file, then |
| 231 | // somehow our setup was wrong. When we're in release mode we just give the |
| 232 | // user the normal line and pretend we don't know how to highlight it. In |
| 233 | // debug mode we bail out with an assert as this should never happen. |
| 234 | if (!found_user_line) { |
| 235 | result << line; |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 236 | assert(false && "We couldn't find the user line in the input file?"); |
| 237 | } |
Raphael Isemann | 566afa0 | 2018-08-02 00:30:15 +0000 | [diff] [blame] | 238 | } |