Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 1 | //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Various code that examines C++ source code without using heavy AST machinery |
| 10 | // (and often not even the lexer). To be used sparingly! |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H |
| 14 | #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 15 | #include "Context.h" |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 16 | #include "Protocol.h" |
Kadir Cetinkaya | 2f84d91 | 2018-08-08 08:59:29 +0000 | [diff] [blame] | 17 | #include "clang/Basic/Diagnostic.h" |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 18 | #include "clang/Basic/LangOptions.h" |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 19 | #include "clang/Basic/SourceLocation.h" |
Kadir Cetinkaya | d08eab4 | 2018-11-27 16:08:53 +0000 | [diff] [blame] | 20 | #include "clang/Basic/SourceManager.h" |
Eric Liu | dd66277 | 2019-01-28 14:01:55 +0000 | [diff] [blame] | 21 | #include "clang/Format/Format.h" |
Eric Liu | 9133ecd | 2018-05-11 12:12:08 +0000 | [diff] [blame] | 22 | #include "clang/Tooling/Core/Replacement.h" |
Eric Liu | dd66277 | 2019-01-28 14:01:55 +0000 | [diff] [blame] | 23 | #include "llvm/ADT/StringRef.h" |
Kadir Cetinkaya | d08eab4 | 2018-11-27 16:08:53 +0000 | [diff] [blame] | 24 | #include "llvm/Support/SHA1.h" |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 25 | |
| 26 | namespace clang { |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 27 | class SourceManager; |
| 28 | |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 29 | namespace clangd { |
| 30 | |
Kadir Cetinkaya | d08eab4 | 2018-11-27 16:08:53 +0000 | [diff] [blame] | 31 | // We tend to generate digests for source codes in a lot of different places. |
| 32 | // This represents the type for those digests to prevent us hard coding details |
| 33 | // of hashing function at every place that needs to store this information. |
| 34 | using FileDigest = decltype(llvm::SHA1::hash({})); |
| 35 | FileDigest digest(StringRef Content); |
| 36 | Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID); |
| 37 | |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 38 | // This context variable controls the behavior of functions in this file |
| 39 | // that convert between LSP offsets and native clang byte offsets. |
| 40 | // If not set, defaults to UTF-16 for backwards-compatibility. |
| 41 | extern Key<OffsetEncoding> kCurrentOffsetEncoding; |
| 42 | |
Sam McCall | 7189112 | 2018-10-23 11:51:53 +0000 | [diff] [blame] | 43 | // Counts the number of UTF-16 code units needed to represent a string (LSP |
| 44 | // specifies string lengths in UTF-16 code units). |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 45 | // Use of UTF-16 may be overridden by kCurrentOffsetEncoding. |
Sam McCall | 7189112 | 2018-10-23 11:51:53 +0000 | [diff] [blame] | 46 | size_t lspLength(StringRef Code); |
| 47 | |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 48 | /// Turn a [line, column] pair into an offset in Code. |
Simon Marchi | 766338a | 2018-03-21 14:36:46 +0000 | [diff] [blame] | 49 | /// |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 50 | /// If P.character exceeds the line length, returns the offset at end-of-line. |
| 51 | /// (If !AllowColumnsBeyondLineLength, then returns an error instead). |
| 52 | /// If the line number is out of range, returns an error. |
Simon Marchi | 766338a | 2018-03-21 14:36:46 +0000 | [diff] [blame] | 53 | /// |
| 54 | /// The returned value is in the range [0, Code.size()]. |
| 55 | llvm::Expected<size_t> |
| 56 | positionToOffset(llvm::StringRef Code, Position P, |
Fangrui Song | 8ebb854 | 2019-02-07 15:38:14 +0000 | [diff] [blame] | 57 | bool AllowColumnsBeyondLineLength = true); |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 58 | |
| 59 | /// Turn an offset in Code into a [line, column] pair. |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 60 | /// The offset must be in range [0, Code.size()]. |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 61 | Position offsetToPosition(llvm::StringRef Code, size_t Offset); |
| 62 | |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 63 | /// Turn a SourceLocation into a [line, column] pair. |
Simon Marchi | 766338a | 2018-03-21 14:36:46 +0000 | [diff] [blame] | 64 | /// FIXME: This should return an error if the location is invalid. |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 65 | Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc); |
| 66 | |
Ilya Biryukov | cce67a3 | 2019-01-29 14:17:36 +0000 | [diff] [blame] | 67 | /// Return the file location, corresponding to \p P. Note that one should take |
| 68 | /// care to avoid comparing the result with expansion locations. |
| 69 | llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, |
| 70 | Position P); |
| 71 | |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 72 | /// Turns a token range into a half-open range and checks its correctness. |
| 73 | /// The resulting range will have only valid source location on both sides, both |
| 74 | /// of which are file locations. |
| 75 | /// |
| 76 | /// File locations always point to a particular offset in a file, i.e. they |
| 77 | /// never refer to a location inside a macro expansion. Turning locations from |
| 78 | /// macro expansions into file locations is ambiguous - one can use |
| 79 | /// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function |
| 80 | /// calls SourceManager::getFileLoc on both ends of \p R to do the conversion. |
| 81 | /// |
| 82 | /// User input (e.g. cursor position) is expressed as a file location, so this |
| 83 | /// function can be viewed as a way to normalize the ranges used in the clang |
| 84 | /// AST so that they are comparable with ranges coming from the user input. |
| 85 | llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr, |
| 86 | const LangOptions &LangOpts, |
| 87 | SourceRange R); |
| 88 | |
| 89 | /// Returns true iff all of the following conditions hold: |
| 90 | /// - start and end locations are valid, |
| 91 | /// - start and end locations are file locations from the same file |
| 92 | /// (i.e. expansion locations are not taken into account). |
| 93 | /// - start offset <= end offset. |
| 94 | /// FIXME: introduce a type for source range with this invariant. |
| 95 | bool isValidFileRange(const SourceManager &Mgr, SourceRange R); |
| 96 | |
| 97 | /// Returns true iff \p L is contained in \p R. |
| 98 | /// EXPECTS: isValidFileRange(R) == true, L is a file location. |
| 99 | bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R, |
| 100 | SourceLocation L); |
| 101 | |
| 102 | /// Returns true iff \p L is contained in \p R or \p L is equal to the end point |
| 103 | /// of \p R. |
| 104 | /// EXPECTS: isValidFileRange(R) == true, L is a file location. |
| 105 | bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R, |
| 106 | SourceLocation L); |
| 107 | |
| 108 | /// Returns the source code covered by the source range. |
| 109 | /// EXPECTS: isValidFileRange(R) == true. |
| 110 | llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R); |
| 111 | |
Ilya Biryukov | 71028b8 | 2018-03-12 15:28:22 +0000 | [diff] [blame] | 112 | // Converts a half-open clang source range to an LSP range. |
| 113 | // Note that clang also uses closed source ranges, which this can't handle! |
| 114 | Range halfOpenToRange(const SourceManager &SM, CharSourceRange R); |
| 115 | |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 116 | // Converts an offset to a clang line/column (1-based, columns are bytes). |
| 117 | // The offset must be in range [0, Code.size()]. |
| 118 | // Prefer to use SourceManager if one is available. |
| 119 | std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, |
Fangrui Song | 8ebb854 | 2019-02-07 15:38:14 +0000 | [diff] [blame] | 120 | size_t Offset); |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 121 | |
Marc-Andre Laperle | b387b6e | 2018-04-23 20:00:52 +0000 | [diff] [blame] | 122 | /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no |
| 123 | /// qualifier. |
| 124 | std::pair<llvm::StringRef, llvm::StringRef> |
| 125 | splitQualifiedName(llvm::StringRef QName); |
| 126 | |
Eric Liu | 9133ecd | 2018-05-11 12:12:08 +0000 | [diff] [blame] | 127 | TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R); |
| 128 | |
| 129 | std::vector<TextEdit> replacementsToEdits(StringRef Code, |
Fangrui Song | 8ebb854 | 2019-02-07 15:38:14 +0000 | [diff] [blame] | 130 | const tooling::Replacements &Repls); |
Eric Liu | 9133ecd | 2018-05-11 12:12:08 +0000 | [diff] [blame] | 131 | |
Kadir Cetinkaya | 2f84d91 | 2018-08-08 08:59:29 +0000 | [diff] [blame] | 132 | TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, |
Fangrui Song | 8ebb854 | 2019-02-07 15:38:14 +0000 | [diff] [blame] | 133 | const LangOptions &L); |
Kadir Cetinkaya | 2f84d91 | 2018-08-08 08:59:29 +0000 | [diff] [blame] | 134 | |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 135 | /// Get the canonical path of \p F. This means: |
Simon Marchi | 25f1f73 | 2018-08-10 22:27:53 +0000 | [diff] [blame] | 136 | /// |
| 137 | /// - Absolute path |
| 138 | /// - Symlinks resolved |
| 139 | /// - No "." or ".." component |
| 140 | /// - No duplicate or trailing directory separator |
| 141 | /// |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 142 | /// This function should be used when paths needs to be used outside the |
| 143 | /// component that generate it, so that paths are normalized as much as |
| 144 | /// possible. |
| 145 | llvm::Optional<std::string> getCanonicalPath(const FileEntry *F, |
Fangrui Song | 8ebb854 | 2019-02-07 15:38:14 +0000 | [diff] [blame] | 146 | const SourceManager &SourceMgr); |
Kadir Cetinkaya | a9c9d00 | 2018-08-13 08:23:01 +0000 | [diff] [blame] | 147 | |
Haojian Wu | aa3ed5a | 2019-01-25 15:14:03 +0000 | [diff] [blame] | 148 | bool isRangeConsecutive(const Range &Left, const Range &Right); |
Eric Liu | dd66277 | 2019-01-28 14:01:55 +0000 | [diff] [blame] | 149 | |
| 150 | format::FormatStyle getFormatStyleForFile(llvm::StringRef File, |
| 151 | llvm::StringRef Content, |
| 152 | llvm::vfs::FileSystem *FS); |
| 153 | |
Haojian Wu | 12e194c | 2019-02-06 15:24:50 +0000 | [diff] [blame] | 154 | // Cleanup and format the given replacements. |
| 155 | llvm::Expected<tooling::Replacements> |
| 156 | cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, |
| 157 | const format::FormatStyle &Style); |
| 158 | |
Eric Liu | 00d99bd | 2019-04-11 09:36:36 +0000 | [diff] [blame^] | 159 | /// Collects identifiers with counts in the source code. |
| 160 | llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, |
| 161 | const format::FormatStyle &Style); |
| 162 | |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 163 | } // namespace clangd |
| 164 | } // namespace clang |
| 165 | #endif |