Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 1 | //===--- FuzzyMatch.h - Approximate identifier matching ---------*- C++-*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // To check for a match between a Pattern ('u_p') and a Word ('unique_ptr'), |
| 11 | // we consider the possible partial match states: |
| 12 | // |
| 13 | // u n i q u e _ p t r |
| 14 | // +--------------------- |
| 15 | // |A . . . . . . . . . . |
| 16 | // u| |
| 17 | // |. . . . . . . . . . . |
| 18 | // _| |
| 19 | // |. . . . . . . O . . . |
| 20 | // p| |
| 21 | // |. . . . . . . . . . B |
| 22 | // |
| 23 | // Each dot represents some prefix of the pattern being matched against some |
| 24 | // prefix of the word. |
| 25 | // - A is the initial state: '' matched against '' |
| 26 | // - O is an intermediate state: 'u_' matched against 'unique_' |
| 27 | // - B is the target state: 'u_p' matched against 'unique_ptr' |
| 28 | // |
| 29 | // We aim to find the best path from A->B. |
| 30 | // - Moving right (consuming a word character) |
| 31 | // Always legal: not all word characters must match. |
| 32 | // - Moving diagonally (consuming both a word and pattern character) |
| 33 | // Legal if the characters match. |
| 34 | // - Moving down (consuming a pattern character) is never legal. |
| 35 | // Never legal: all pattern characters must match something. |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 36 | // Characters are matched case-insensitively. |
| 37 | // The first pattern character may only match the start of a word segment. |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 38 | // |
| 39 | // The scoring is based on heuristics: |
| 40 | // - when matching a character, apply a bonus or penalty depending on the |
| 41 | // match quality (does case match, do word segments align, etc) |
| 42 | // - when skipping a character, apply a penalty if it hurts the match |
| 43 | // (it starts a word segment, or splits the matched region, etc) |
| 44 | // |
| 45 | // These heuristics require the ability to "look backward" one character, to |
| 46 | // see whether it was matched or not. Therefore the dynamic-programming matrix |
| 47 | // has an extra dimension (last character matched). |
| 48 | // Each entry also has an additional flag indicating whether the last-but-one |
| 49 | // character matched, which is needed to trace back through the scoring table |
| 50 | // and reconstruct the match. |
| 51 | // |
| 52 | // We treat strings as byte-sequences, so only ASCII has first-class support. |
| 53 | // |
| 54 | // This algorithm was inspired by VS code's client-side filtering, and aims |
| 55 | // to be mostly-compatible. |
| 56 | // |
| 57 | //===----------------------------------------------------------------------===// |
| 58 | |
| 59 | #include "FuzzyMatch.h" |
| 60 | #include "llvm/ADT/Optional.h" |
| 61 | #include "llvm/Support/Format.h" |
| 62 | |
Sam McCall | 8fed634 | 2017-12-01 20:03:19 +0000 | [diff] [blame] | 63 | namespace clang { |
| 64 | namespace clangd { |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 65 | using namespace llvm; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 66 | |
Sam McCall | a8c5d3a | 2017-12-02 02:28:29 +0000 | [diff] [blame] | 67 | constexpr int FuzzyMatcher::MaxPat; |
| 68 | constexpr int FuzzyMatcher::MaxWord; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 69 | |
| 70 | static char lower(char C) { return C >= 'A' && C <= 'Z' ? C + ('a' - 'A') : C; } |
| 71 | // A "negative infinity" score that won't overflow. |
| 72 | // We use this to mark unreachable states and forbidden solutions. |
| 73 | // Score field is 15 bits wide, min value is -2^14, we use half of that. |
| 74 | static constexpr int AwfulScore = -(1 << 13); |
| 75 | static bool isAwful(int S) { return S < AwfulScore / 2; } |
| 76 | static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score. |
| 77 | |
| 78 | FuzzyMatcher::FuzzyMatcher(StringRef Pattern) |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 79 | : PatN(std::min<int>(MaxPat, Pattern.size())), |
Sam McCall | 5668cd3 | 2018-01-17 15:25:55 +0000 | [diff] [blame] | 80 | ScoreScale(PatN ? float{1} / (PerfectBonus * PatN) : 0), WordN(0) { |
Sam McCall | eb74ab8 | 2018-01-19 15:03:49 +0000 | [diff] [blame] | 81 | std::copy(Pattern.begin(), Pattern.begin() + PatN, Pat); |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 82 | for (int I = 0; I < PatN; ++I) |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 83 | LowPat[I] = lower(Pat[I]); |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 84 | Scores[0][0][Miss] = {0, Miss}; |
| 85 | Scores[0][0][Match] = {AwfulScore, Miss}; |
| 86 | for (int P = 0; P <= PatN; ++P) |
| 87 | for (int W = 0; W < P; ++W) |
| 88 | for (Action A : {Miss, Match}) |
| 89 | Scores[P][W][A] = {AwfulScore, Miss}; |
Sam McCall | f7d1805 | 2018-07-20 08:01:37 +0000 | [diff] [blame] | 90 | PatTypeSet = |
| 91 | calculateRoles(StringRef(Pat, PatN), makeMutableArrayRef(PatRole, PatN)); |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 92 | } |
| 93 | |
| 94 | Optional<float> FuzzyMatcher::match(StringRef Word) { |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 95 | if (!(WordContainsPattern = init(Word))) |
| 96 | return None; |
Sam McCall | 09b4be5 | 2018-01-13 16:46:26 +0000 | [diff] [blame] | 97 | if (!PatN) |
| 98 | return 1; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 99 | buildGraph(); |
| 100 | auto Best = std::max(Scores[PatN][WordN][Miss].Score, |
| 101 | Scores[PatN][WordN][Match].Score); |
| 102 | if (isAwful(Best)) |
| 103 | return None; |
Sam McCall | bc7cbb7 | 2018-06-06 12:38:37 +0000 | [diff] [blame] | 104 | float Score = |
| 105 | ScoreScale * std::min(PerfectBonus * PatN, std::max<int>(0, Best)); |
| 106 | // If the pattern is as long as the word, we have an exact string match, |
| 107 | // since every pattern character must match something. |
| 108 | if (WordN == PatN) |
| 109 | Score *= 2; // May not be perfect 2 if case differs in a significant way. |
| 110 | return Score; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 111 | } |
| 112 | |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 113 | // We get CharTypes from a lookup table. Each is 2 bits, 4 fit in each byte. |
| 114 | // The top 6 bits of the char select the byte, the bottom 2 select the offset. |
| 115 | // e.g. 'q' = 010100 01 = byte 28 (55), bits 3-2 (01) -> Lower. |
| 116 | constexpr static uint8_t CharTypes[] = { |
| 117 | 0x00, 0x00, 0x00, 0x00, // Control characters |
| 118 | 0x00, 0x00, 0x00, 0x00, // Control characters |
| 119 | 0xff, 0xff, 0xff, 0xff, // Punctuation |
| 120 | 0x55, 0x55, 0xf5, 0xff, // Numbers->Lower, more Punctuation. |
| 121 | 0xab, 0xaa, 0xaa, 0xaa, // @ and A-O |
| 122 | 0xaa, 0xaa, 0xea, 0xff, // P-Z, more Punctuation. |
| 123 | 0x57, 0x55, 0x55, 0x55, // ` and a-o |
| 124 | 0x55, 0x55, 0xd5, 0x3f, // p-z, Punctuation, DEL. |
| 125 | 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // Bytes over 127 -> Lower. |
| 126 | 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // (probably UTF-8). |
| 127 | 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, |
| 128 | 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, |
| 129 | }; |
| 130 | |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 131 | // The Role can be determined from the Type of a character and its neighbors: |
| 132 | // |
| 133 | // Example | Chars | Type | Role |
| 134 | // ---------+--------------+----- |
| 135 | // F(o)oBar | Foo | Ull | Tail |
| 136 | // Foo(B)ar | oBa | lUl | Head |
| 137 | // (f)oo | ^fo | Ell | Head |
| 138 | // H(T)TP | HTT | UUU | Tail |
| 139 | // |
| 140 | // Our lookup table maps a 6 bit key (Prev, Curr, Next) to a 2-bit Role. |
| 141 | // A byte packs 4 Roles. (Prev, Curr) selects a byte, Next selects the offset. |
| 142 | // e.g. Lower, Upper, Lower -> 01 10 01 -> byte 6 (aa), bits 3-2 (10) -> Head. |
| 143 | constexpr static uint8_t CharRoles[] = { |
| 144 | // clang-format off |
| 145 | // Curr= Empty Lower Upper Separ |
| 146 | /* Prev=Empty */ 0x00, 0xaa, 0xaa, 0xff, // At start, Lower|Upper->Head |
| 147 | /* Prev=Lower */ 0x00, 0x55, 0xaa, 0xff, // In word, Upper->Head;Lower->Tail |
| 148 | /* Prev=Upper */ 0x00, 0x55, 0x59, 0xff, // Ditto, but U(U)U->Tail |
| 149 | /* Prev=Separ */ 0x00, 0xaa, 0xaa, 0xff, // After separator, like at start |
| 150 | // clang-format on |
| 151 | }; |
| 152 | |
| 153 | template <typename T> static T packedLookup(const uint8_t *Data, int I) { |
| 154 | return static_cast<T>((Data[I >> 2] >> ((I & 3) * 2)) & 3); |
| 155 | } |
Sam McCall | f7d1805 | 2018-07-20 08:01:37 +0000 | [diff] [blame] | 156 | CharTypeSet calculateRoles(StringRef Text, MutableArrayRef<CharRole> Roles) { |
| 157 | assert(Text.size() == Roles.size()); |
| 158 | if (Text.size() == 0) |
| 159 | return 0; |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 160 | CharType Type = packedLookup<CharType>(CharTypes, Text[0]); |
Sam McCall | f7d1805 | 2018-07-20 08:01:37 +0000 | [diff] [blame] | 161 | CharTypeSet TypeSet = 1 << Type; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 162 | // Types holds a sliding window of (Prev, Curr, Next) types. |
| 163 | // Initial value is (Empty, Empty, type of Text[0]). |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 164 | int Types = Type; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 165 | // Rotate slides in the type of the next character. |
| 166 | auto Rotate = [&](CharType T) { Types = ((Types << 2) | T) & 0x3f; }; |
Sam McCall | f7d1805 | 2018-07-20 08:01:37 +0000 | [diff] [blame] | 167 | for (unsigned I = 0; I < Text.size() - 1; ++I) { |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 168 | // For each character, rotate in the next, and look up the role. |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 169 | Type = packedLookup<CharType>(CharTypes, Text[I + 1]); |
| 170 | TypeSet |= 1 << Type; |
| 171 | Rotate(Type); |
Sam McCall | f7d1805 | 2018-07-20 08:01:37 +0000 | [diff] [blame] | 172 | Roles[I] = packedLookup<CharRole>(CharRoles, Types); |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 173 | } |
| 174 | // For the last character, the "next character" is Empty. |
| 175 | Rotate(Empty); |
Sam McCall | f7d1805 | 2018-07-20 08:01:37 +0000 | [diff] [blame] | 176 | Roles[Text.size() - 1] = packedLookup<CharRole>(CharRoles, Types); |
| 177 | return TypeSet; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 178 | } |
| 179 | |
| 180 | // Sets up the data structures matching Word. |
| 181 | // Returns false if we can cheaply determine that no match is possible. |
| 182 | bool FuzzyMatcher::init(StringRef NewWord) { |
| 183 | WordN = std::min<int>(MaxWord, NewWord.size()); |
| 184 | if (PatN > WordN) |
| 185 | return false; |
Sam McCall | eb74ab8 | 2018-01-19 15:03:49 +0000 | [diff] [blame] | 186 | std::copy(NewWord.begin(), NewWord.begin() + WordN, Word); |
Sam McCall | 09b4be5 | 2018-01-13 16:46:26 +0000 | [diff] [blame] | 187 | if (PatN == 0) |
| 188 | return true; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 189 | for (int I = 0; I < WordN; ++I) |
| 190 | LowWord[I] = lower(Word[I]); |
| 191 | |
| 192 | // Cheap subsequence check. |
| 193 | for (int W = 0, P = 0; P != PatN; ++W) { |
| 194 | if (W == WordN) |
| 195 | return false; |
| 196 | if (LowWord[W] == LowPat[P]) |
| 197 | ++P; |
| 198 | } |
| 199 | |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 200 | // FIXME: some words are hard to tokenize algorithmically. |
| 201 | // e.g. vsprintf is V S Print F, and should match [pri] but not [int]. |
| 202 | // We could add a tokenization dictionary for common stdlib names. |
Sam McCall | f7d1805 | 2018-07-20 08:01:37 +0000 | [diff] [blame] | 203 | WordTypeSet = calculateRoles(StringRef(Word, WordN), |
| 204 | makeMutableArrayRef(WordRole, WordN)); |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 205 | return true; |
| 206 | } |
| 207 | |
| 208 | // The forwards pass finds the mappings of Pattern onto Word. |
| 209 | // Score = best score achieved matching Word[..W] against Pat[..P]. |
| 210 | // Unlike other tables, indices range from 0 to N *inclusive* |
| 211 | // Matched = whether we chose to match Word[W] with Pat[P] or not. |
| 212 | // |
| 213 | // Points are mostly assigned to matched characters, with 1 being a good score |
| 214 | // and 3 being a great one. So we treat the score range as [0, 3 * PatN]. |
| 215 | // This range is not strict: we can apply larger bonuses/penalties, or penalize |
| 216 | // non-matched characters. |
| 217 | void FuzzyMatcher::buildGraph() { |
| 218 | for (int W = 0; W < WordN; ++W) { |
| 219 | Scores[0][W + 1][Miss] = {Scores[0][W][Miss].Score - skipPenalty(W, Miss), |
| 220 | Miss}; |
| 221 | Scores[0][W + 1][Match] = {AwfulScore, Miss}; |
| 222 | } |
| 223 | for (int P = 0; P < PatN; ++P) { |
| 224 | for (int W = P; W < WordN; ++W) { |
| 225 | auto &Score = Scores[P + 1][W + 1], &PreMiss = Scores[P + 1][W]; |
| 226 | |
| 227 | auto MatchMissScore = PreMiss[Match].Score; |
| 228 | auto MissMissScore = PreMiss[Miss].Score; |
| 229 | if (P < PatN - 1) { // Skipping trailing characters is always free. |
| 230 | MatchMissScore -= skipPenalty(W, Match); |
| 231 | MissMissScore -= skipPenalty(W, Miss); |
| 232 | } |
| 233 | Score[Miss] = (MatchMissScore > MissMissScore) |
| 234 | ? ScoreInfo{MatchMissScore, Match} |
| 235 | : ScoreInfo{MissMissScore, Miss}; |
| 236 | |
Sam McCall | 8b2dcc1 | 2018-06-14 13:50:30 +0000 | [diff] [blame] | 237 | auto &PreMatch = Scores[P][W]; |
| 238 | auto MatchMatchScore = |
| 239 | allowMatch(P, W, Match) |
| 240 | ? PreMatch[Match].Score + matchBonus(P, W, Match) |
| 241 | : AwfulScore; |
| 242 | auto MissMatchScore = allowMatch(P, W, Miss) |
| 243 | ? PreMatch[Miss].Score + matchBonus(P, W, Miss) |
| 244 | : AwfulScore; |
| 245 | Score[Match] = (MatchMatchScore > MissMatchScore) |
| 246 | ? ScoreInfo{MatchMatchScore, Match} |
| 247 | : ScoreInfo{MissMatchScore, Miss}; |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 248 | } |
| 249 | } |
| 250 | } |
| 251 | |
Sam McCall | 8b2dcc1 | 2018-06-14 13:50:30 +0000 | [diff] [blame] | 252 | bool FuzzyMatcher::allowMatch(int P, int W, Action Last) const { |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 253 | if (LowPat[P] != LowWord[W]) |
| 254 | return false; |
Sam McCall | 8b2dcc1 | 2018-06-14 13:50:30 +0000 | [diff] [blame] | 255 | // We require a "strong" match: |
| 256 | // - for the first pattern character. [foo] !~ "barefoot" |
| 257 | // - after a gap. [pat] !~ "patnther" |
| 258 | if (Last == Miss) { |
| 259 | // We're banning matches outright, so conservatively accept some other cases |
| 260 | // where our segmentation might be wrong: |
| 261 | // - allow matching B in ABCDef (but not in NDEBUG) |
| 262 | // - we'd like to accept print in sprintf, but too many false positives |
| 263 | if (WordRole[W] == Tail && |
| 264 | (Word[W] == LowWord[W] || !(WordTypeSet & 1 << Lower))) |
| 265 | return false; |
| 266 | } |
| 267 | return true; |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 268 | } |
| 269 | |
| 270 | int FuzzyMatcher::skipPenalty(int W, Action Last) const { |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 271 | int S = 0; |
| 272 | if (WordRole[W] == Head) // Skipping a segment. |
| 273 | S += 1; |
| 274 | if (Last == Match) // Non-consecutive match. |
| 275 | S += 2; // We'd rather skip a segment than split our match. |
| 276 | return S; |
| 277 | } |
| 278 | |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 279 | int FuzzyMatcher::matchBonus(int P, int W, Action Last) const { |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 280 | assert(LowPat[P] == LowWord[W]); |
| 281 | int S = 1; |
| 282 | // Bonus: pattern so far is a (case-insensitive) prefix of the word. |
| 283 | if (P == W) // We can't skip pattern characters, so we must have matched all. |
| 284 | ++S; |
| 285 | // Bonus: case matches, or a Head in the pattern aligns with one in the word. |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 286 | if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) || |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 287 | (PatRole[P] == Head && WordRole[W] == Head)) |
| 288 | ++S; |
| 289 | // Penalty: matching inside a segment (and previous char wasn't matched). |
| 290 | if (WordRole[W] == Tail && P && Last == Miss) |
| 291 | S -= 3; |
| 292 | // Penalty: a Head in the pattern matches in the middle of a word segment. |
| 293 | if (PatRole[P] == Head && WordRole[W] == Tail) |
| 294 | --S; |
| 295 | // Penalty: matching the first pattern character in the middle of a segment. |
| 296 | if (P == 0 && WordRole[W] == Tail) |
| 297 | S -= 4; |
| 298 | assert(S <= PerfectBonus); |
| 299 | return S; |
| 300 | } |
| 301 | |
| 302 | llvm::SmallString<256> FuzzyMatcher::dumpLast(llvm::raw_ostream &OS) const { |
| 303 | llvm::SmallString<256> Result; |
| 304 | OS << "=== Match \"" << StringRef(Word, WordN) << "\" against [" |
| 305 | << StringRef(Pat, PatN) << "] ===\n"; |
Sam McCall | 09b4be5 | 2018-01-13 16:46:26 +0000 | [diff] [blame] | 306 | if (PatN == 0) { |
| 307 | OS << "Pattern is empty: perfect match.\n"; |
| 308 | return Result = StringRef(Word, WordN); |
| 309 | } |
| 310 | if (WordN == 0) { |
| 311 | OS << "Word is empty: no match.\n"; |
| 312 | return Result; |
| 313 | } |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 314 | if (!WordContainsPattern) { |
| 315 | OS << "Substring check failed.\n"; |
| 316 | return Result; |
| 317 | } else if (isAwful(std::max(Scores[PatN][WordN][Match].Score, |
| 318 | Scores[PatN][WordN][Miss].Score))) { |
| 319 | OS << "Substring check passed, but all matches are forbidden\n"; |
| 320 | } |
Sam McCall | 77a719c | 2018-03-05 17:34:33 +0000 | [diff] [blame] | 321 | if (!(PatTypeSet & 1 << Upper)) |
Sam McCall | 8749641 | 2017-12-01 17:08:02 +0000 | [diff] [blame] | 322 | OS << "Lowercase query, so scoring ignores case\n"; |
| 323 | |
| 324 | // Traverse Matched table backwards to reconstruct the Pattern/Word mapping. |
| 325 | // The Score table has cumulative scores, subtracting along this path gives |
| 326 | // us the per-letter scores. |
| 327 | Action Last = |
| 328 | (Scores[PatN][WordN][Match].Score > Scores[PatN][WordN][Miss].Score) |
| 329 | ? Match |
| 330 | : Miss; |
| 331 | int S[MaxWord]; |
| 332 | Action A[MaxWord]; |
| 333 | for (int W = WordN - 1, P = PatN - 1; W >= 0; --W) { |
| 334 | A[W] = Last; |
| 335 | const auto &Cell = Scores[P + 1][W + 1][Last]; |
| 336 | if (Last == Match) |
| 337 | --P; |
| 338 | const auto &Prev = Scores[P + 1][W][Cell.Prev]; |
| 339 | S[W] = Cell.Score - Prev.Score; |
| 340 | Last = Cell.Prev; |
| 341 | } |
| 342 | for (int I = 0; I < WordN; ++I) { |
| 343 | if (A[I] == Match && (I == 0 || A[I - 1] == Miss)) |
| 344 | Result.push_back('['); |
| 345 | if (A[I] == Miss && I > 0 && A[I - 1] == Match) |
| 346 | Result.push_back(']'); |
| 347 | Result.push_back(Word[I]); |
| 348 | } |
| 349 | if (A[WordN - 1] == Match) |
| 350 | Result.push_back(']'); |
| 351 | |
| 352 | for (char C : StringRef(Word, WordN)) |
| 353 | OS << " " << C << " "; |
| 354 | OS << "\n"; |
| 355 | for (int I = 0, J = 0; I < WordN; I++) |
| 356 | OS << " " << (A[I] == Match ? Pat[J++] : ' ') << " "; |
| 357 | OS << "\n"; |
| 358 | for (int I = 0; I < WordN; I++) |
| 359 | OS << format("%2d ", S[I]); |
| 360 | OS << "\n"; |
| 361 | |
| 362 | OS << "\nSegmentation:"; |
| 363 | OS << "\n'" << StringRef(Word, WordN) << "'\n "; |
| 364 | for (int I = 0; I < WordN; ++I) |
| 365 | OS << "?-+ "[static_cast<int>(WordRole[I])]; |
| 366 | OS << "\n[" << StringRef(Pat, PatN) << "]\n "; |
| 367 | for (int I = 0; I < PatN; ++I) |
| 368 | OS << "?-+ "[static_cast<int>(PatRole[I])]; |
| 369 | OS << "\n"; |
| 370 | |
| 371 | OS << "\nScoring table (last-Miss, last-Match):\n"; |
| 372 | OS << " | "; |
| 373 | for (char C : StringRef(Word, WordN)) |
| 374 | OS << " " << C << " "; |
| 375 | OS << "\n"; |
| 376 | OS << "-+----" << std::string(WordN * 4, '-') << "\n"; |
| 377 | for (int I = 0; I <= PatN; ++I) { |
| 378 | for (Action A : {Miss, Match}) { |
| 379 | OS << ((I && A == Miss) ? Pat[I - 1] : ' ') << "|"; |
| 380 | for (int J = 0; J <= WordN; ++J) { |
| 381 | if (!isAwful(Scores[I][J][A].Score)) |
| 382 | OS << format("%3d%c", Scores[I][J][A].Score, |
| 383 | Scores[I][J][A].Prev == Match ? '*' : ' '); |
| 384 | else |
| 385 | OS << " "; |
| 386 | } |
| 387 | OS << "\n"; |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | return Result; |
| 392 | } |
Sam McCall | 8fed634 | 2017-12-01 20:03:19 +0000 | [diff] [blame] | 393 | |
| 394 | } // namespace clangd |
| 395 | } // namespace clang |