blob: f6af14e35e656bfa39566c6257b43aebb2e63e38 [file] [log] [blame]
Wyatt Hepler80c6ee52020-01-03 09:54:58 -08001// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
Wyatt Heplereb020a12020-10-28 14:01:51 -070015// This tests the C hashing macros and C++ hashing functions.
16
17#include "pw_tokenizer/hash.h"
18
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080019#include <cstddef>
20#include <cstdint>
21
22#include "gtest/gtest.h"
23#include "pw_preprocessor/util.h"
24#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_128_hash_macro.h"
Wyatt Hepler63c9bee2021-02-23 11:37:15 -080025#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_256_hash_macro.h"
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080026#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_80_hash_macro.h"
27#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_96_hash_macro.h"
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080028#include "pw_tokenizer_private/generated_hash_test_cases.h"
29
30namespace pw::tokenizer {
31namespace {
32
33// Make sure the array test cases file isn't empty.
34static_assert(PW_ARRAY_SIZE(kHashTests) > 10,
35 "The test cases array is suspiciously small");
36
37constexpr bool CheckGeneratedCases() {
38 for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
39 const uint32_t calculated_hash =
40 PwTokenizer65599FixedLengthHash(string, hash_length);
41 if (calculated_hash != python_hash || calculated_hash != macro_hash) {
42 return false;
43 }
44 }
45
46 return true;
47}
48
49static_assert(CheckGeneratedCases(),
50 "Hashes in the generated test cases must match");
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080051TEST(Hashing, GeneratedCasesAtRuntime) {
52 for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
53 const uint32_t calculated_hash =
54 PwTokenizer65599FixedLengthHash(string, hash_length);
55 EXPECT_EQ(calculated_hash, python_hash);
56 EXPECT_EQ(calculated_hash, macro_hash);
57 }
58}
59
60// Gets the size of the string, excluding the null terminator. A uint32_t is
61// used instead of a size_t since the hash calculation requires a uint32_t.
Ewout van Bekkum5ea33402021-03-31 11:00:02 -070062template <uint32_t kSizeWithNull>
63constexpr uint32_t StringLength(const char (&)[kSizeWithNull]) {
64 static_assert(kSizeWithNull > 0u);
65 return kSizeWithNull - 1; // subtract the null terminator
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080066}
67
Aaron Green1f1c2512020-04-16 10:05:43 -070068TEST(Hashing, Runtime) PW_NO_SANITIZE("unsigned-integer-overflow") {
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080069 // Coefficients for the hash terms; k1 is 1.
70 static constexpr uint32_t k2 = k65599HashConstant;
71 static constexpr uint32_t k3 = k65599HashConstant * k2;
72 static constexpr uint32_t k4 = k65599HashConstant * k3;
73 static constexpr uint32_t k5 = k65599HashConstant * k4;
74
75 // Hash a few things at hash length 4
Aaron Green1f1c2512020-04-16 10:05:43 -070076 // The coefficient calculation is done modulo 0x100000000, so the unsigned
77 // integer overflows of the hash terms are intentional.
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080078 EXPECT_EQ(PwTokenizer65599FixedLengthHash("", 4), StringLength(""));
79 EXPECT_EQ(PwTokenizer65599FixedLengthHash("1", 4),
80 StringLength("1") + k2 * '1');
81 EXPECT_EQ(PwTokenizer65599FixedLengthHash("12", 4),
82 StringLength("12") + k2 * '1' + k3 * '2');
83 EXPECT_EQ(PwTokenizer65599FixedLengthHash("123", 4),
84 StringLength("123") + k2 * '1' + k3 * '2' + k4 * '3');
85 EXPECT_EQ(PwTokenizer65599FixedLengthHash("1234", 4),
86 StringLength("1234") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
87 EXPECT_EQ(PwTokenizer65599FixedLengthHash("12345", 4),
88 StringLength("12345") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
89 EXPECT_EQ(PwTokenizer65599FixedLengthHash("123456", 4),
90 StringLength("123456") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
91}
92
Wyatt Hepler63c9bee2021-02-23 11:37:15 -080093#define _CHECK_HASH_LENGTH(string, length) \
94 static_assert(PwTokenizer65599FixedLengthHash( \
95 std::string_view(string, sizeof(string) - 1), length) == \
96 PW_TOKENIZER_65599_FIXED_LENGTH_##length##_HASH(string), \
97 #length "-byte hash mismatch!")
98
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080099// Use std::string_view so that \0 can appear in strings.
Wyatt Hepler63c9bee2021-02-23 11:37:15 -0800100#define TEST_SUPPORTED_HASHES(string_literal) \
101 _CHECK_HASH_LENGTH(string_literal, 80); \
102 _CHECK_HASH_LENGTH(string_literal, 96); \
103 _CHECK_HASH_LENGTH(string_literal, 128); \
104 _CHECK_HASH_LENGTH(string_literal, 256); \
105 static_assert( \
106 PwTokenizer65599FixedLengthHash( \
107 std::string_view(string_literal, sizeof(string_literal) - 1), \
108 sizeof(string_literal) - 1) == Hash(string_literal), \
109 "Hash function mismatch!"); \
110 EXPECT_EQ(PwTokenizer65599FixedLengthHash( \
111 std::string_view(string_literal, sizeof(string_literal) - 1), \
112 sizeof(string_literal) - 1), \
113 pw_tokenizer_65599FixedLengthHash(string_literal, \
114 sizeof(string_literal) - 1, \
Wyatt Hepler656fac62020-10-28 14:30:59 -0700115 sizeof(string_literal) - 1))
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800116
117TEST(HashMacro, Empty) { TEST_SUPPORTED_HASHES(""); }
118
119TEST(HashMacro, NullTerminatorsAreIncludedInHash) {
120 using namespace std::literals; // use an sv literal to include the \0
121
122 TEST_SUPPORTED_HASHES("hello\0there");
123 TEST_SUPPORTED_HASHES("\0");
124 TEST_SUPPORTED_HASHES("\0\0");
125 TEST_SUPPORTED_HASHES("\0\0\0");
126
127 static_assert(
128 PwTokenizer65599FixedLengthHash(""sv, 80) !=
129 PwTokenizer65599FixedLengthHash("\0"sv, 80),
130 "Hashes of \\0 strings should differ since the lengths are included");
131 static_assert(
132 PwTokenizer65599FixedLengthHash("\0"sv, 80) !=
133 PwTokenizer65599FixedLengthHash("\0\0"sv, 80),
134 "Hashes of \\0 strings should differ since the lengths are included");
135 static_assert(
136 PwTokenizer65599FixedLengthHash("\0\0"sv, 80) !=
137 PwTokenizer65599FixedLengthHash("\0\0\0"sv, 80),
138 "Hashes of \\0 strings should differ since the lengths are included");
139}
140
141TEST(HashMacro, OneChar) {
142 TEST_SUPPORTED_HASHES("a");
143 TEST_SUPPORTED_HASHES("c");
144 TEST_SUPPORTED_HASHES("A");
145 TEST_SUPPORTED_HASHES("B");
146 TEST_SUPPORTED_HASHES("C");
147 TEST_SUPPORTED_HASHES("$");
148 TEST_SUPPORTED_HASHES("%");
149 TEST_SUPPORTED_HASHES("^");
150 TEST_SUPPORTED_HASHES("\xa1");
151 TEST_SUPPORTED_HASHES("\xff");
152}
153
154TEST(HashMacro, Phrases) {
155 TEST_SUPPORTED_HASHES("WASD");
156 TEST_SUPPORTED_HASHES("hello, world");
157 TEST_SUPPORTED_HASHES("Let the wookie win.");
158 TEST_SUPPORTED_HASHES("\x01 more test, just for \xffun");
159}
160
161TEST(HashMacro, HashesChange) {
162 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("") !=
163 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("\0"));
164 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("a") !=
165 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("b"));
166 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("z") !=
167 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("aa"));
168 static_assert(
169 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("make sure hashes change") !=
170 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("MAKE SURE HASHES CHANGE"));
171}
172
173// Define string literals to test boundary conditions.
174#define LITERAL_79 \
175 "0123456789ABCDEF" \
176 "0123456789ABCDEF" \
177 "0123456789ABCDEF" \
178 "0123456789ABCDEF" \
179 "0123456789ABCDE"
180#define LITERAL_80 LITERAL_79 "F"
181
182#define LITERAL_96 LITERAL_80 "0123456789ABCDEF"
183
184#define LITERAL_128 \
185 LITERAL_96 \
186 "0123456789ABCDEF" \
187 "0123456789ABCDEF"
188
189static_assert(sizeof(LITERAL_79) - 1 == 79);
190static_assert(sizeof(LITERAL_80) - 1 == 80);
191static_assert(sizeof(LITERAL_96) - 1 == 96);
192static_assert(sizeof(LITERAL_128) - 1 == 128);
193
194TEST(HashMacro, HashNearFixedHashLength_80) {
195 // 79, 80, 81, 82 bytes
196 TEST_SUPPORTED_HASHES(LITERAL_79);
197 TEST_SUPPORTED_HASHES(LITERAL_80);
198 TEST_SUPPORTED_HASHES(LITERAL_80 "!");
199 TEST_SUPPORTED_HASHES(LITERAL_80 "!!");
200}
201
202TEST(HashMacro, HashNearFixedHashLength_96) {
203 // 95, 96, 97, 98 bytes
204 TEST_SUPPORTED_HASHES(LITERAL_80 "0123456789ABCDE");
205 TEST_SUPPORTED_HASHES(LITERAL_96);
206 TEST_SUPPORTED_HASHES(LITERAL_96 "!");
207 TEST_SUPPORTED_HASHES(LITERAL_96 "XY");
208}
209
210TEST(HashMacro, HashNearFixedHashLength_128) {
211 // 127, 128, 129, 130 bytes
212 TEST_SUPPORTED_HASHES(LITERAL_96
213 "0123456789ABCDEF"
214 "0123456789ABCDE");
215 TEST_SUPPORTED_HASHES(LITERAL_128);
216 TEST_SUPPORTED_HASHES(LITERAL_128 "!");
217 TEST_SUPPORTED_HASHES(LITERAL_128 "AB");
218}
219
220TEST(HashMacro, HashVeryLongStrings) {
221 TEST_SUPPORTED_HASHES(
222 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
223 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
224 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
225 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
226 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
227 TEST_SUPPORTED_HASHES(LITERAL_128 LITERAL_128 LITERAL_128);
228}
229
230TEST(HashMacro, ExtraCharactersAreIgnored) {
231 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "?") ==
232 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "."));
233
234 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "ABCD") ==
235 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "zyxw"));
236
237 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "?") ==
238 PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "."));
239
240 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "ABCD") ==
241 PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "zyxw"));
242
243 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "?") ==
244 PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "."));
245
246 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "ABCD") ==
247 PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "zyxw"));
248}
249
250} // namespace
251} // namespace pw::tokenizer