blob: 9471a1d0af906e29965f7143169caccc69f1aa46 [file] [log] [blame]
Marshall Clow29ae2ba2017-10-04 22:23:03 +00001// -*- C++ -*-
2//===------------------------- fuzzing.cpp -------------------------------===//
3//
4// The LLVM Compiler Infrastructure
5//
6// This file is dual licensed under the MIT and the University of Illinois Open
7// Source Licenses. See LICENSE.TXT for details.
8//
9//===----------------------------------------------------------------------===//
10
11// A set of routines to use when fuzzing the algorithms in libc++
12// Each one tests a single algorithm.
13//
14// They all have the form of:
15// int `algorithm`(const uint8_t *data, size_t size);
16//
17// They perform the operation, and then check to see if the results are correct.
18// If so, they return zero, and non-zero otherwise.
19//
20// For example, sort calls std::sort, then checks two things:
21// (1) The resulting vector is sorted
22// (2) The resulting vector contains the same elements as the original data.
23
24
25
26#include "fuzzing.h"
27#include <vector>
28#include <algorithm>
Marshall Clowf45f32b2017-10-23 23:19:30 +000029#include <functional>
Marshall Clowc85f85a2017-10-12 14:48:09 +000030#include <regex>
Marshall Clow56a3f4a2017-11-08 20:25:47 +000031#include <cassert>
Marshall Clow29ae2ba2017-10-04 22:23:03 +000032
Marshall Clowf45f32b2017-10-23 23:19:30 +000033#include <iostream>
34
35// If we had C++14, we could use the four iterator version of is_permutation and equal
Marshall Clow29ae2ba2017-10-04 22:23:03 +000036
37namespace fuzzing {
38
39// This is a struct we can use to test the stable_XXX algorithms.
40// perform the operation on the key, then check the order of the payload.
41
42struct stable_test {
43 uint8_t key;
Marshall Clow2ac694b2017-10-18 20:40:57 +000044 size_t payload;
Marshall Clow29ae2ba2017-10-04 22:23:03 +000045
46 stable_test(uint8_t k) : key(k), payload(0) {}
Marshall Clow2ac694b2017-10-18 20:40:57 +000047 stable_test(uint8_t k, size_t p) : key(k), payload(p) {}
Marshall Clow29ae2ba2017-10-04 22:23:03 +000048 };
49
50void swap(stable_test &lhs, stable_test &rhs)
51{
52 using std::swap;
53 swap(lhs.key, rhs.key);
54 swap(lhs.payload, rhs.payload);
55}
56
57struct key_less
58{
59 bool operator () (const stable_test &lhs, const stable_test &rhs) const
60 {
61 return lhs.key < rhs.key;
62 }
63};
64
65struct payload_less
66{
67 bool operator () (const stable_test &lhs, const stable_test &rhs) const
68 {
69 return lhs.payload < rhs.payload;
70 }
71};
72
73struct total_less
74{
75 bool operator () (const stable_test &lhs, const stable_test &rhs) const
76 {
77 return lhs.key == rhs.key ? lhs.payload < rhs.payload : lhs.key < rhs.key;
78 }
79};
80
81bool operator==(const stable_test &lhs, const stable_test &rhs)
82{
83 return lhs.key == rhs.key && lhs.payload == rhs.payload;
84}
85
86
87template<typename T>
88struct is_even
89{
90 bool operator () (const T &t) const
91 {
92 return t % 2 == 0;
93 }
94};
95
96
97template<>
98struct is_even<stable_test>
99{
100 bool operator () (const stable_test &t) const
101 {
102 return t.key % 2 == 0;
103 }
104};
105
Marshall Clow772a6d42017-10-30 19:51:58 +0000106typedef std::vector<uint8_t> Vec;
107typedef std::vector<stable_test> StableVec;
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000108
Marshall Clow772a6d42017-10-30 19:51:58 +0000109// == sort ==
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000110int sort(const uint8_t *data, size_t size)
111{
Marshall Clow772a6d42017-10-30 19:51:58 +0000112 Vec working(data, data + size);
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000113 std::sort(working.begin(), working.end());
114
115 if (!std::is_sorted(working.begin(), working.end())) return 1;
116 if (!std::is_permutation(data, data + size, working.begin())) return 99;
117 return 0;
118}
119
120
121// == stable_sort ==
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000122int stable_sort(const uint8_t *data, size_t size)
123{
Marshall Clow772a6d42017-10-30 19:51:58 +0000124 StableVec input;
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000125 for (size_t i = 0; i < size; ++i)
126 input.push_back(stable_test(data[i], i));
Marshall Clow772a6d42017-10-30 19:51:58 +0000127 StableVec working = input;
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000128 std::stable_sort(working.begin(), working.end(), key_less());
129
130 if (!std::is_sorted(working.begin(), working.end(), key_less())) return 1;
131 auto iter = working.begin();
132 while (iter != working.end())
133 {
134 auto range = std::equal_range(iter, working.end(), *iter, key_less());
135 if (!std::is_sorted(range.first, range.second, total_less())) return 2;
136 iter = range.second;
137 }
138 if (!std::is_permutation(input.begin(), input.end(), working.begin())) return 99;
139 return 0;
140}
141
142// == partition ==
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000143int partition(const uint8_t *data, size_t size)
144{
Marshall Clow772a6d42017-10-30 19:51:58 +0000145 Vec working(data, data + size);
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000146 auto iter = std::partition(working.begin(), working.end(), is_even<uint8_t>());
147
148 if (!std::all_of (working.begin(), iter, is_even<uint8_t>())) return 1;
149 if (!std::none_of(iter, working.end(), is_even<uint8_t>())) return 2;
150 if (!std::is_permutation(data, data + size, working.begin())) return 99;
151 return 0;
152}
153
154
Marshall Clow772a6d42017-10-30 19:51:58 +0000155// == partition_copy ==
156int partition_copy(const uint8_t *data, size_t size)
157{
158 Vec v1, v2;
159 auto iter = std::partition_copy(data, data + size,
160 std::back_inserter<Vec>(v1), std::back_inserter<Vec>(v2),
161 is_even<uint8_t>());
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000162
Marshall Clow772a6d42017-10-30 19:51:58 +0000163// The two vectors should add up to the original size
164 if (v1.size() + v2.size() != size) return 1;
165
166// All of the even values should be in the first vector, and none in the second
167 if (!std::all_of (v1.begin(), v1.end(), is_even<uint8_t>())) return 2;
168 if (!std::none_of(v2.begin(), v2.end(), is_even<uint8_t>())) return 3;
169
170// Every value in both vectors has to be in the original
171 for (auto v: v1)
172 if (std::find(data, data + size, v) == data + size) return 4;
173
174 for (auto v: v2)
175 if (std::find(data, data + size, v) == data + size) return 5;
176
177 return 0;
178}
179
180// == stable_partition ==
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000181int stable_partition (const uint8_t *data, size_t size)
182{
Marshall Clow772a6d42017-10-30 19:51:58 +0000183 StableVec input;
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000184 for (size_t i = 0; i < size; ++i)
185 input.push_back(stable_test(data[i], i));
Marshall Clow772a6d42017-10-30 19:51:58 +0000186 StableVec working = input;
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000187 auto iter = std::stable_partition(working.begin(), working.end(), is_even<stable_test>());
188
189 if (!std::all_of (working.begin(), iter, is_even<stable_test>())) return 1;
190 if (!std::none_of(iter, working.end(), is_even<stable_test>())) return 2;
191 if (!std::is_sorted(working.begin(), iter, payload_less())) return 3;
192 if (!std::is_sorted(iter, working.end(), payload_less())) return 4;
193 if (!std::is_permutation(input.begin(), input.end(), working.begin())) return 99;
194 return 0;
195}
196
197// == nth_element ==
198// use the first element as a position into the data
199int nth_element (const uint8_t *data, size_t size)
200{
201 if (size <= 1) return 0;
202 const size_t partition_point = data[0] % size;
Marshall Clow772a6d42017-10-30 19:51:58 +0000203 Vec working(data + 1, data + size);
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000204 const auto partition_iter = working.begin() + partition_point;
205 std::nth_element(working.begin(), partition_iter, working.end());
206
207// nth may be the end iterator, in this case nth_element has no effect.
208 if (partition_iter == working.end())
209 {
210 if (!std::equal(data + 1, data + size, working.begin())) return 98;
211 }
212 else
213 {
214 const uint8_t nth = *partition_iter;
215 if (!std::all_of(working.begin(), partition_iter, [=](uint8_t v) { return v <= nth; }))
216 return 1;
217 if (!std::all_of(partition_iter, working.end(), [=](uint8_t v) { return v >= nth; }))
218 return 2;
219 if (!std::is_permutation(data + 1, data + size, working.begin())) return 99;
220 }
221
222 return 0;
223}
224
225// == partial_sort ==
226// use the first element as a position into the data
227int partial_sort (const uint8_t *data, size_t size)
228{
229 if (size <= 1) return 0;
230 const size_t sort_point = data[0] % size;
Marshall Clow772a6d42017-10-30 19:51:58 +0000231 Vec working(data + 1, data + size);
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000232 const auto sort_iter = working.begin() + sort_point;
233 std::partial_sort(working.begin(), sort_iter, working.end());
234
235 if (sort_iter != working.end())
236 {
237 const uint8_t nth = *std::min_element(sort_iter, working.end());
238 if (!std::all_of(working.begin(), sort_iter, [=](uint8_t v) { return v <= nth; }))
239 return 1;
240 if (!std::all_of(sort_iter, working.end(), [=](uint8_t v) { return v >= nth; }))
241 return 2;
242 }
243 if (!std::is_sorted(working.begin(), sort_iter)) return 3;
244 if (!std::is_permutation(data + 1, data + size, working.begin())) return 99;
245
246 return 0;
247}
248
Marshall Clowc85f85a2017-10-12 14:48:09 +0000249
Marshall Clow772a6d42017-10-30 19:51:58 +0000250// == partial_sort_copy ==
251// use the first element as a count
252int partial_sort_copy (const uint8_t *data, size_t size)
253{
254 if (size <= 1) return 0;
255 const size_t num_results = data[0] % size;
256 Vec results(num_results);
257 (void) std::partial_sort_copy(data + 1, data + size, results.begin(), results.end());
Marshall Clowc85f85a2017-10-12 14:48:09 +0000258
Marshall Clow772a6d42017-10-30 19:51:58 +0000259// The results have to be sorted
260 if (!std::is_sorted(results.begin(), results.end())) return 1;
261// All the values in results have to be in the original data
262 for (auto v: results)
263 if (std::find(data + 1, data + size, v) == data + size) return 2;
264
265// The things in results have to be the smallest N in the original data
266 Vec sorted(data + 1, data + size);
267 std::sort(sorted.begin(), sorted.end());
268 if (!std::equal(results.begin(), results.end(), sorted.begin())) return 3;
269 return 0;
270}
271
272// The second sequence has been "uniqued"
273template <typename Iter1, typename Iter2>
274static bool compare_unique(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2)
275{
276 assert(first1 != last1 && first2 != last2);
277 if (*first1 != *first2) return false;
278
279 uint8_t last_value = *first1;
280 ++first1; ++first2;
281 while(first1 != last1 && first2 != last2)
282 {
283 // Skip over dups in the first sequence
284 while (*first1 == last_value)
285 if (++first1 == last1) return false;
286 if (*first1 != *first2) return false;
287 last_value = *first1;
288 ++first1; ++first2;
289 }
290
291// Still stuff left in the 'uniqued' sequence - oops
292 if (first1 == last1 && first2 != last2) return false;
293
294// Still stuff left in the original sequence - better be all the same
295 while (first1 != last1)
296 {
297 if (*first1 != last_value) return false;
298 ++first1;
299 }
300 return true;
301}
302
303// == unique ==
304int unique (const uint8_t *data, size_t size)
305{
306 Vec working(data, data + size);
307 std::sort(working.begin(), working.end());
308 Vec results = working;
309 Vec::iterator new_end = std::unique(results.begin(), results.end());
310 Vec::iterator it; // scratch iterator
311
312// Check the size of the unique'd sequence.
313// it should only be zero if the input sequence was empty.
314 if (results.begin() == new_end)
315 return working.size() == 0 ? 0 : 1;
316
317// 'results' is sorted
318 if (!std::is_sorted(results.begin(), new_end)) return 2;
319
320// All the elements in 'results' must be different
321 it = results.begin();
322 uint8_t prev_value = *it++;
323 for (; it != new_end; ++it)
324 {
325 if (*it == prev_value) return 3;
326 prev_value = *it;
327 }
328
329// Every element in 'results' must be in 'working'
330 for (it = results.begin(); it != new_end; ++it)
331 if (std::find(working.begin(), working.end(), *it) == working.end())
332 return 4;
333
334// Every element in 'working' must be in 'results'
335 for (auto v : working)
336 if (std::find(results.begin(), new_end, v) == new_end)
337 return 5;
338
339 return 0;
340}
341
342// == unique_copy ==
343int unique_copy (const uint8_t *data, size_t size)
344{
345 Vec working(data, data + size);
346 std::sort(working.begin(), working.end());
347 Vec results;
348 (void) std::unique_copy(working.begin(), working.end(),
349 std::back_inserter<Vec>(results));
350 Vec::iterator it; // scratch iterator
351
352// Check the size of the unique'd sequence.
353// it should only be zero if the input sequence was empty.
354 if (results.size() == 0)
355 return working.size() == 0 ? 0 : 1;
356
357// 'results' is sorted
358 if (!std::is_sorted(results.begin(), results.end())) return 2;
359
360// All the elements in 'results' must be different
361 it = results.begin();
362 uint8_t prev_value = *it++;
363 for (; it != results.end(); ++it)
364 {
365 if (*it == prev_value) return 3;
366 prev_value = *it;
367 }
368
369// Every element in 'results' must be in 'working'
370 for (auto v : results)
371 if (std::find(working.begin(), working.end(), v) == working.end())
372 return 4;
373
374// Every element in 'working' must be in 'results'
375 for (auto v : working)
376 if (std::find(results.begin(), results.end(), v) == results.end())
377 return 5;
378
379 return 0;
380}
381
382
383// -- regex fuzzers
Marshall Clowc85f85a2017-10-12 14:48:09 +0000384static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag)
385{
386 if (size > 0)
387 {
388 try
389 {
390 std::string s((const char *)data, size);
391 std::regex re(s, flag);
392 return std::regex_match(s, re) ? 1 : 0;
393 }
394 catch (std::regex_error &ex) {}
395 }
396 return 0;
397}
398
399
400int regex_ECMAScript (const uint8_t *data, size_t size)
401{
402 (void) regex_helper(data, size, std::regex_constants::ECMAScript);
403 return 0;
404}
405
406int regex_POSIX (const uint8_t *data, size_t size)
407{
408 (void) regex_helper(data, size, std::regex_constants::basic);
409 return 0;
410}
411
412int regex_extended (const uint8_t *data, size_t size)
413{
414 (void) regex_helper(data, size, std::regex_constants::extended);
415 return 0;
416}
417
418int regex_awk (const uint8_t *data, size_t size)
419{
420 (void) regex_helper(data, size, std::regex_constants::awk);
421 return 0;
422}
423
424int regex_grep (const uint8_t *data, size_t size)
425{
426 (void) regex_helper(data, size, std::regex_constants::grep);
427 return 0;
428}
429
430int regex_egrep (const uint8_t *data, size_t size)
431{
432 (void) regex_helper(data, size, std::regex_constants::egrep);
433 return 0;
434}
435
Marshall Clowf45f32b2017-10-23 23:19:30 +0000436// -- heap fuzzers
437int make_heap (const uint8_t *data, size_t size)
438{
Marshall Clow772a6d42017-10-30 19:51:58 +0000439 Vec working(data, data + size);
Marshall Clowf45f32b2017-10-23 23:19:30 +0000440 std::make_heap(working.begin(), working.end());
441
442 if (!std::is_heap(working.begin(), working.end())) return 1;
443 if (!std::is_permutation(data, data + size, working.begin())) return 99;
444 return 0;
445}
446
447int push_heap (const uint8_t *data, size_t size)
448{
449 if (size < 2) return 0;
450
451// Make a heap from the first half of the data
Marshall Clow772a6d42017-10-30 19:51:58 +0000452 Vec working(data, data + size);
Marshall Clowf45f32b2017-10-23 23:19:30 +0000453 auto iter = working.begin() + (size / 2);
454 std::make_heap(working.begin(), iter);
455 if (!std::is_heap(working.begin(), iter)) return 1;
456
457// Now push the rest onto the heap, one at a time
458 ++iter;
459 for (; iter != working.end(); ++iter) {
460 std::push_heap(working.begin(), iter);
461 if (!std::is_heap(working.begin(), iter)) return 2;
462 }
463
464 if (!std::is_permutation(data, data + size, working.begin())) return 99;
465 return 0;
466}
467
468int pop_heap (const uint8_t *data, size_t size)
469{
470 if (size < 2) return 0;
Marshall Clow772a6d42017-10-30 19:51:58 +0000471 Vec working(data, data + size);
Marshall Clowf45f32b2017-10-23 23:19:30 +0000472 std::make_heap(working.begin(), working.end());
473
474// Pop things off, one at a time
475 auto iter = --working.end();
476 while (iter != working.begin()) {
477 std::pop_heap(working.begin(), iter);
478 if (!std::is_heap(working.begin(), --iter)) return 2;
479 }
480
481 return 0;
482}
483
484
485// -- search fuzzers
486int search (const uint8_t *data, size_t size)
487{
488 if (size < 2) return 0;
489
490 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
491 assert(pat_size <= size - 1);
492 const uint8_t *pat_begin = data + 1;
493 const uint8_t *pat_end = pat_begin + pat_size;
494 const uint8_t *data_end = data + size;
495 assert(pat_end <= data_end);
496// std::cerr << "data[0] = " << size_t(data[0]) << " ";
497// std::cerr << "Pattern size = " << pat_size << "; corpus is " << size - 1 << std::endl;
498 auto it = std::search(pat_end, data_end, pat_begin, pat_end);
499 if (it != data_end) // not found
500 if (!std::equal(pat_begin, pat_end, it))
501 return 1;
502 return 0;
503}
504
505template <typename S>
506static int search_helper (const uint8_t *data, size_t size)
507{
508 if (size < 2) return 0;
509
510 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
511 const uint8_t *pat_begin = data + 1;
512 const uint8_t *pat_end = pat_begin + pat_size;
513 const uint8_t *data_end = data + size;
514
515 auto it = std::search(pat_end, data_end, S(pat_begin, pat_end));
516 if (it != data_end) // not found
517 if (!std::equal(pat_begin, pat_end, it))
518 return 1;
519 return 0;
520}
521
522// These are still in std::experimental
523// int search_boyer_moore (const uint8_t *data, size_t size)
524// {
525// return search_helper<std::boyer_moore_searcher<const uint8_t *>>(data, size);
526// }
527//
528// int search_boyer_moore_horspool (const uint8_t *data, size_t size)
529// {
530// return search_helper<std::boyer_moore_horspool_searcher<const uint8_t *>>(data, size);
531// }
532
Marshall Clow772a6d42017-10-30 19:51:58 +0000533
534// -- set operation fuzzers
535template <typename S>
536static void set_helper (const uint8_t *data, size_t size, Vec &v1, Vec &v2)
537{
538 assert(size > 1);
539
540 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
541 const uint8_t *pat_begin = data + 1;
542 const uint8_t *pat_end = pat_begin + pat_size;
543 const uint8_t *data_end = data + size;
544 v1.assign(pat_begin, pat_end);
545 v2.assign(pat_end, data_end);
546
547 std::sort(v1.begin(), v1.end());
548 std::sort(v2.begin(), v2.end());
549}
550
Marshall Clow29ae2ba2017-10-04 22:23:03 +0000551} // namespace fuzzing