blob: 13a25e28311811dce123b2836a0f1459e9c3c353 [file] [log] [blame]
Sam McCall87496412017-12-01 17:08:02 +00001//===--- FuzzyMatch.h - Approximate identifier matching ---------*- C++-*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements fuzzy-matching of strings against identifiers.
11// It indicates both the existence and quality of a match:
12// 'eb' matches both 'emplace_back' and 'embed', the former has a better score.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H
17#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H
18
19#include "llvm/ADT/Optional.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/raw_ostream.h"
23
24namespace clang {
25namespace clangd {
26
27// A matcher capable of matching and scoring strings against a single pattern.
28// It's optimized for matching against many strings - match() does not allocate.
29class FuzzyMatcher {
30public:
31 // Characters beyond MaxPat are ignored.
32 FuzzyMatcher(llvm::StringRef Pattern);
33
34 // If Word matches the pattern, return a score in [0,1] (higher is better).
35 // Characters beyond MaxWord are ignored.
36 llvm::Optional<float> match(llvm::StringRef Word);
37
Sam McCall545a20d2018-01-19 14:34:02 +000038 llvm::StringRef pattern() const { return llvm::StringRef(Pat, PatN); }
39 bool empty() const { return PatN == 0; }
Sam McCall84652cc2018-01-12 16:16:09 +000040
Sam McCall87496412017-12-01 17:08:02 +000041 // Dump internal state from the last match() to the stream, for debugging.
42 // Returns the pattern with [] around matched characters, e.g.
43 // [u_p] + "unique_ptr" --> "[u]nique[_p]tr"
44 llvm::SmallString<256> dumpLast(llvm::raw_ostream &) const;
45
46private:
47 // We truncate the pattern and the word to bound the cost of matching.
48 constexpr static int MaxPat = 63, MaxWord = 127;
Sam McCall8e97cca2017-12-02 03:35:19 +000049 enum CharRole : unsigned char; // For segmentation.
50 enum CharType : unsigned char; // For segmentation.
Sam McCall3ea96402017-12-02 04:15:55 +000051 // Action should be an enum, but this causes bitfield problems:
52 // - for MSVC the enum type must be explicitly unsigned for correctness
53 // - GCC 4.8 complains not all values fit if the type is unsigned
54 using Action = bool;
55 constexpr static Action Miss = false, Match = true;
Sam McCall87496412017-12-01 17:08:02 +000056
57 bool init(llvm::StringRef Word);
58 void buildGraph();
Sam McCall77a719c2018-03-05 17:34:33 +000059 void calculateRoles(const char *Text, CharRole *Out, int &Types, int N);
60 bool allowMatch(int P, int W) const;
61 int skipPenalty(int W, Action Last) const;
62 int matchBonus(int P, int W, Action Last) const;
Sam McCall87496412017-12-01 17:08:02 +000063
64 // Pattern data is initialized by the constructor, then constant.
65 char Pat[MaxPat]; // Pattern data
66 int PatN; // Length
67 char LowPat[MaxPat]; // Pattern in lowercase
68 CharRole PatRole[MaxPat]; // Pattern segmentation info
Sam McCall77a719c2018-03-05 17:34:33 +000069 int PatTypeSet; // Bitmask of 1<<CharType
Sam McCall87496412017-12-01 17:08:02 +000070 float ScoreScale; // Normalizes scores for the pattern length.
71
72 // Word data is initialized on each call to match(), mostly by init().
73 char Word[MaxWord]; // Word data
74 int WordN; // Length
75 char LowWord[MaxWord]; // Word in lowercase
76 CharRole WordRole[MaxWord]; // Word segmentation info
Sam McCall77a719c2018-03-05 17:34:33 +000077 int WordTypeSet; // Bitmask of 1<<CharType
Sam McCall87496412017-12-01 17:08:02 +000078 bool WordContainsPattern; // Simple substring check
79
80 // Cumulative best-match score table.
81 // Boundary conditions are filled in by the constructor.
82 // The rest is repopulated for each match(), by buildGraph().
83 struct ScoreInfo {
84 signed int Score : 15;
85 Action Prev : 1;
86 };
87 ScoreInfo Scores[MaxPat + 1][MaxWord + 1][/* Last Action */ 2];
88};
89
90} // namespace clangd
91} // namespace clang
92
93#endif