blob: b905e130cfd995c034a569df15812d682c926c1b [file] [log] [blame]
Jan Tattermuschf209c582015-08-24 17:47:35 -07001<?
2// Contributed to the Sandcastle Help File Builder project by Thomas Levesque
3
4class Ranking
5{
6 public $filename;
7 public $pageTitle;
8 public $rank;
9
10 function __construct($file, $title, $rank)
11 {
12 $this->filename = $file;
13 $this->pageTitle = $title;
14 $this->rank = $rank;
15 }
16}
17
18
19/// <summary>
20/// Split the search text up into keywords
21/// </summary>
22/// <param name="keywords">The keywords to parse</param>
23/// <returns>A list containing the words for which to search</returns>
24function ParseKeywords($keywords)
25{
26 $keywordList = array();
27 $words = preg_split("/[^\w]+/", $keywords);
28
29 foreach($words as $word)
30 {
31 $checkWord = strtolower($word);
32 $first = substr($checkWord, 0, 1);
33 if(strlen($checkWord) > 2 && !ctype_digit($first) && !in_array($checkWord, $keywordList))
34 {
35 array_push($keywordList, $checkWord);
36 }
37 }
38
39 return $keywordList;
40}
41
42
43/// <summary>
44/// Search for the specified keywords and return the results as a block of
45/// HTML.
46/// </summary>
47/// <param name="keywords">The keywords for which to search</param>
48/// <param name="fileInfo">The file list</param>
49/// <param name="wordDictionary">The dictionary used to find the words</param>
50/// <param name="sortByTitle">True to sort by title, false to sort by
51/// ranking</param>
52/// <returns>A block of HTML representing the search results.</returns>
53function Search($keywords, $fileInfo, $wordDictionary, $sortByTitle)
54{
55 $sb = "<ol>";
56 $matches = array();
57 $matchingFileIndices = array();
58 $rankings = array();
59
60 $isFirst = true;
61
62 foreach($keywords as $word)
63 {
64 if (!array_key_exists($word, $wordDictionary))
65 {
66 return "<strong>Nothing found</strong>";
67 }
68 $occurrences = $wordDictionary[$word];
69
70 $matches[$word] = $occurrences;
71 $occurrenceIndices = array();
72
73 // Get a list of the file indices for this match
74 foreach($occurrences as $entry)
75 array_push($occurrenceIndices, ($entry >> 16));
76
77 if($isFirst)
78 {
79 $isFirst = false;
80 foreach($occurrenceIndices as $i)
81 {
82 array_push($matchingFileIndices, $i);
83 }
84 }
85 else
86 {
87 // After the first match, remove files that do not appear for
88 // all found keywords.
89 for($idx = 0; $idx < count($matchingFileIndices); $idx++)
90 {
91 if (!in_array($matchingFileIndices[$idx], $occurrenceIndices))
92 {
93 array_splice($matchingFileIndices, $idx, 1);
94 $idx--;
95 }
96 }
97 }
98 }
99
100 if(count($matchingFileIndices) == 0)
101 {
102 return "<strong>Nothing found</strong>";
103 }
104
105 // Rank the files based on the number of times the words occurs
106 foreach($matchingFileIndices as $index)
107 {
108 // Split out the title, filename, and word count
109 $fileIndex = explode("\x00", $fileInfo[$index]);
110
111 $title = $fileIndex[0];
112 $filename = $fileIndex[1];
113 $wordCount = intval($fileIndex[2]);
114 $matchCount = 0;
115
116 foreach($keywords as $words)
117 {
118 $occurrences = $matches[$word];
119
120 foreach($occurrences as $entry)
121 {
122 if(($entry >> 16) == $index)
123 $matchCount += $entry & 0xFFFF;
124 }
125 }
126
127 $r = new Ranking($filename, $title, $matchCount * 1000 / $wordCount);
128 array_push($rankings, $r);
129
130 if(count($rankings) > 99)
131 break;
132 }
133
134 // Sort by rank in descending order or by page title in ascending order
135 if($sortByTitle)
136 {
137 usort($rankings, "cmprankbytitle");
138 }
139 else
140 {
141 usort($rankings, "cmprank");
142 }
143
144 // Format the file list and return the results
145 foreach($rankings as $r)
146 {
147 $f = $r->filename;
148 $t = $r->pageTitle;
149 $sb .= "<li><a href=\"$f\" target=\"_blank\">$t</a></li>";
150 }
151
152 $sb .= "</ol";
153
154 if(count($rankings) < count($matchingFileIndices))
155 {
156 $c = count(matchingFileIndices) - count(rankings);
157 $sb .= "<p>Omitted $c more results</p>";
158 }
159
160 return $sb;
161}
162
163function cmprank($x, $y)
164{
165 return $y->rank - $x->rank;
166}
167
168function cmprankbytitle($x, $y)
169{
170 return strcmp($x->pageTitle, $y->pageTitle);
171}
172
173?>