blob: a472385f6b610663a48e615729e9b7ba316b9da8 [file] [log] [blame]
Wyatt Heplerbc254972020-01-06 18:35:30 -08001#!/usr/bin/env python3
2# Copyright 2020 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8# https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Tests for detokenize."""
16
17import base64
18import datetime as dt
19import io
20import os
21import struct
22import tempfile
23import unittest
24from unittest import mock
25
26from pw_tokenizer import database
27from pw_tokenizer import detokenize
28from pw_tokenizer import elf_reader
29from pw_tokenizer import tokens
30
31
32# This function is not part of this test. It was used to generate the binary
33# strings for EMPTY_ELF and ELF_WITH_TOKENIZER_SECTIONS. It takes a path and
34# returns a Python byte string suitable for copying into Python source code.
35def path_to_byte_string(path):
36 with open(path, 'rb') as fd:
37 data = fd.read()
38
39 output = []
40 indices = iter(range(len(data)))
41
42 while True:
43 line = ''
44
45 while len(line) < 70:
46 try:
47 i = next(indices)
48 except StopIteration:
49 break
50
51 line += repr(data[i:i + 1])[2:-1].replace("'", r'\'')
52
53 if not line:
54 return ''.join(output)
55
56 output.append(" b'{}'\n".format(''.join(line)))
57
58
59# This is an empty ELF file. It was created from the ELF file for
60# tokenize_test.cc with the command:
61#
62# arm-none-eabi-objcopy -S --only-section NO_SECTIONS_PLEASE <ELF> <OUTPUT>
63#
64# The resulting ELF was converted to a Python binary string using
65# path_to_byte_string function above.
66EMPTY_ELF = (
67 b'\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00(\x00\x01'
68 b'\x00\x00\x00\xd1\x83\x00\x084\x00\x00\x00\xe0\x00\x00\x00\x00\x04\x00\x05'
69 b'4\x00 \x00\x05\x00(\x00\x02\x00\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00'
70 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x07\x00'
71 b'\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00'
72 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00'
73 b'\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
74 b'\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00'
75 b'\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
76 b'\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00'
77 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
78 b'\x06\x00\x00\x00\x00\x00\x01\x00\x00.shstrtab\x00\x00\x00\x00\x00\x00\x00'
79 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
80 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01'
81 b'\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd4\x00\x00'
82 b'\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00'
83 b'\x00\x00\x00')
84
Wyatt Hepler23f831d2020-05-12 13:53:30 -070085# This is an ELF file with only .pw_tokenized and .pw_tokenizer_info sections.
Wyatt Heplerbc254972020-01-06 18:35:30 -080086# It was created from the ELF file for tokenize_test.cc with the command:
87#
Wyatt Hepler23f831d2020-05-12 13:53:30 -070088# arm-none-eabi-objcopy -S --only-section ".pw_tokenize*" <ELF> <OUTPUT>
Wyatt Heplerbc254972020-01-06 18:35:30 -080089#
90# The resulting ELF was converted to a Python binary string using
Wyatt Heplerd32daea2020-03-26 13:55:47 -070091# path_to_byte_string function above. The file is also included in the repo as
Wyatt Hepler23f831d2020-05-12 13:53:30 -070092# example_binary_with_tokenized_strings.elf.
Wyatt Heplerbc254972020-01-06 18:35:30 -080093ELF_WITH_TOKENIZER_SECTIONS = (
94 b'\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00(\x00\x01'
Wyatt Hepler23f831d2020-05-12 13:53:30 -070095 b'\x00\x00\x00!G\x00\x084\x00\x00\x00\xd4\x02\x00\x00\x00\x04\x00\x054\x00'
96 b' \x00\x04\x00(\x00\x04\x00\x03\x00\x01\x00\x00\x00\xb4\x00\x00\x00\x00'
97 b'\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00'
98 b'\x00\x00\x00\x01\x00\x01\x00\x00\x00\xb4\x00\x00\x00\x00\x02\x00\x08\x00'
99 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x01'
100 b'\x00\x01\x00\x00\x00\xb4\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00'
101 b'\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00'
102 b'\xb4\x00\x00\x00\x18D\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
103 b'\x00\x06\x00\x00\x00\x00\x00\x01\x00Hello %s! %hd %e\x00\x00\x00\x00%u'
104 b'%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c\x00%u%d%02x%X%hu%hhd%d%ld%lu%lld'
105 b'%llu%c%c%c\x00Won\'t fit : %s%d\x00\x00\x00\x00%llx\x00\x00\x00\x00%ld'
106 b'\x00%d\x00\x00%ld\x00The answer is: %s\x00\x00\x00The answer is: %s\x00'
107 b'\x00\x00The answer is: %s\x00\x00\x00The answer is: %s\x00\x00\x00The '
108 b'answer is: %s\x00\x00\x00The answer is: %s\x00\x00\x00The answer is: %'
109 b's\x00\x00\x00The answer is: %s\x00\x00\x00%u %d\x00\x00\x00The answer:'
110 b' "%s"\x00\x00\x00\x00Jello, world!\x00\x00\x00Jello!\x00\x00Jello?\x00'
111 b'\x00%s there are %x (%.2f) of them%c\x00\x00\x00\x00The answer is: %s\x00'
112 b'\x00\x00\x00\x00\x00\x00[:-)\x00\x00\x00\x00>:-[]\x00\x00\x00%llu\x00\x00'
113 b'\x00\x00The answer was: %s\x00\x00The answer is: %s\x00\x00.shstrtab\x00'
114 b'.pw_tokenized.default\x00.pw_tokenized.TEST_DOMAIN\x00\x00\x00\x00\x00'
Wyatt Heplerbc254972020-01-06 18:35:30 -0800115 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
116 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
117 b'\x00\x00\x00\x0b\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
Wyatt Hepler23f831d2020-05-12 13:53:30 -0700118 b'\x00\xb4\x00\x00\x00\xb9\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04'
119 b'\x00\x00\x00\x00\x00\x00\x00!\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00'
120 b'\x00\x00\x00\x00p\x02\x00\x00&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
121 b'\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x00'
122 b'\x00\x00\x00\x00\x00\x00\x00\x96\x02\x00\x00;\x00\x00\x00\x00\x00\x00\x00'
123 b'\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00')
Wyatt Heplerbc254972020-01-06 18:35:30 -0800124
125# 0x2e668cd6 is 'Jello, world!' (which is also used in database_test.py).
126JELLO_WORLD_TOKEN = b'\xd6\x8c\x66\x2e'
127
128
129class DetokenizeTest(unittest.TestCase):
130 """Tests the detokenize.Detokenizer."""
131 def test_simple(self):
132 detok = detokenize.Detokenizer(
133 tokens.Database([
134 tokens.TokenizedStringEntry(0xcdab, '%02d %s %c%%',
135 dt.datetime.now())
136 ]))
137 self.assertEqual(str(detok.detokenize(b'\xab\xcd\0\0\x02\x03Two\x66')),
138 '01 Two 3%')
139
140 def test_detokenize_extra_data_is_unsuccessful(self):
141 detok = detokenize.Detokenizer(
142 tokens.Database([
143 tokens.TokenizedStringEntry(1, 'no args', dt.datetime(1, 1, 1))
144 ]))
145
146 result = detok.detokenize(b'\x01\0\0\0\x04args')
147 self.assertEqual(len(result.failures), 1)
148 string, args, remaining = result.failures[0]
149 self.assertEqual('no args', string)
150 self.assertFalse(args)
151 self.assertEqual(b'\x04args', remaining)
152 self.assertEqual('no args', string)
153 self.assertEqual('no args', str(result))
154
155 def test_detokenize_missing_data_is_unsuccessful(self):
156 detok = detokenize.Detokenizer(
157 tokens.Database(
158 [tokens.TokenizedStringEntry(2, '%s', dt.datetime(1, 1, 1))]))
159
160 result = detok.detokenize(b'\x02\0\0\0')
161 string, args, remaining = result.failures[0]
162 self.assertEqual('%s', string)
163 self.assertEqual(len(args), 1)
164 self.assertEqual(b'', remaining)
165 self.assertEqual(len(result.failures), 1)
166 self.assertEqual('%s', str(result))
167
168 def test_detokenize_missing_data_with_errors_is_unsuccessful(self):
169 detok = detokenize.Detokenizer(tokens.Database(
170 [tokens.TokenizedStringEntry(2, '%s', dt.datetime(1, 1, 1))]),
171 show_errors=True)
172
173 result = detok.detokenize(b'\x02\0\0\0')
174 string, args, remaining = result.failures[0]
175 self.assertIn('%s MISSING', string)
176 self.assertEqual(len(args), 1)
177 self.assertEqual(b'', remaining)
178 self.assertEqual(len(result.failures), 1)
179 self.assertIn('%s MISSING', str(result))
180
181 def test_unparsed_data(self):
182 detok = detokenize.Detokenizer(
183 tokens.Database([
184 tokens.TokenizedStringEntry(1, 'no args',
185 dt.datetime(100, 1, 1)),
186 ]))
187 result = detok.detokenize(b'\x01\0\0\0o_o')
188 self.assertFalse(result.ok())
189 self.assertEqual('no args', str(result))
190 self.assertIn('o_o', repr(result))
191 self.assertIn('decoding failed', result.error_message())
192
193 def test_empty_db(self):
194 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF))
195 self.assertFalse(detok.detokenize(b'\x12\x34\0\0').ok())
196 self.assertIn('unknown token',
197 detok.detokenize(b'1234').error_message())
198 self.assertIn('unknown token', repr(detok.detokenize(b'1234')))
199 self.assertEqual('', str(detok.detokenize(b'1234')))
200
201 self.assertIsNone(detok.detokenize(b'').token)
202
203 def test_empty_db_show_errors(self):
204 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF), show_errors=True)
205 self.assertFalse(detok.detokenize(b'\x12\x34\0\0').ok())
206 self.assertIn('unknown token',
207 detok.detokenize(b'1234').error_message())
208 self.assertIn('unknown token', repr(detok.detokenize(b'1234')))
209 self.assertIn('unknown token', str(detok.detokenize(b'1234')))
210
211 self.assertIsNone(detok.detokenize(b'').token)
212
213 def test_missing_token_show_errors(self):
214 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF), show_errors=True)
215 self.assertIn('missing token', detok.detokenize(b'').error_message())
216 self.assertIn('missing token', str(detok.detokenize(b'')))
217 self.assertIn('missing token', repr(detok.detokenize(b'123')))
218
219 self.assertIn('missing token', detok.detokenize(b'1').error_message())
220 self.assertIn('missing token', str(detok.detokenize(b'1')))
221 self.assertIn('missing token', repr(detok.detokenize(b'1')))
222
223 self.assertIn('missing token',
224 detok.detokenize(b'123').error_message())
225 self.assertIn('missing token', str(detok.detokenize(b'123')))
226 self.assertIn('missing token', repr(detok.detokenize(b'123')))
227
228 def test_missing_token(self):
229 detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF))
230 self.assertIn('missing token', detok.detokenize(b'').error_message())
231 self.assertEqual('', str(detok.detokenize(b'')))
232 self.assertIn('missing token', repr(detok.detokenize(b'123')))
233
234 self.assertIn('missing token', detok.detokenize(b'1').error_message())
235 self.assertEqual('', str(detok.detokenize(b'1')))
236 self.assertIn('missing token', repr(detok.detokenize(b'1')))
237
238 self.assertIn('missing token',
239 detok.detokenize(b'123').error_message())
240 self.assertEqual('', str(detok.detokenize(b'123')))
241 self.assertIn('missing token', repr(detok.detokenize(b'123')))
242
243 def test_decode_from_elf_data(self):
244 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
245
246 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
247 self.assertEqual(str(detok.detokenize(JELLO_WORLD_TOKEN)),
248 'Jello, world!')
249
250 undecoded_args = detok.detokenize(JELLO_WORLD_TOKEN + b'some junk')
251 self.assertFalse(undecoded_args.ok())
252 self.assertEqual(str(undecoded_args), 'Jello, world!')
253
254 self.assertTrue(detok.detokenize(b'\0\0\0\0').ok())
255 self.assertEqual(str(detok.detokenize(b'\0\0\0\0')), '')
256
257 def test_decode_from_elf_file(self):
258 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
259 expected_tokens = frozenset(detok.database.token_to_entries.keys())
260
Wyatt Hepler6473c852020-10-12 21:54:09 -0700261 elf = tempfile.NamedTemporaryFile('wb', delete=False)
262 try:
Wyatt Heplerbc254972020-01-06 18:35:30 -0800263 elf.write(ELF_WITH_TOKENIZER_SECTIONS)
Wyatt Hepler6473c852020-10-12 21:54:09 -0700264 elf.close()
Wyatt Heplerbc254972020-01-06 18:35:30 -0800265
266 # Open ELF by file object
Wyatt Hepler6473c852020-10-12 21:54:09 -0700267 with open(elf.name, 'rb') as fd:
268 detok = detokenize.Detokenizer(fd)
269
Wyatt Heplerbc254972020-01-06 18:35:30 -0800270 self.assertEqual(expected_tokens,
271 frozenset(detok.database.token_to_entries.keys()))
272
273 # Open ELF by path
274 detok = detokenize.Detokenizer(elf.name)
275 self.assertEqual(expected_tokens,
276 frozenset(detok.database.token_to_entries.keys()))
277
278 # Open ELF by elf_reader.Elf
Wyatt Hepler6473c852020-10-12 21:54:09 -0700279 with open(elf.name, 'rb') as fd:
280 detok = detokenize.Detokenizer(elf_reader.Elf(fd))
281
Wyatt Heplerbc254972020-01-06 18:35:30 -0800282 self.assertEqual(expected_tokens,
283 frozenset(detok.database.token_to_entries.keys()))
Wyatt Hepler6473c852020-10-12 21:54:09 -0700284 finally:
285 os.unlink(elf.name)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800286
287 def test_decode_from_csv_file(self):
288 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
289 expected_tokens = frozenset(detok.database.token_to_entries.keys())
290
291 csv_database = str(detok.database)
Wyatt Hepler23f831d2020-05-12 13:53:30 -0700292 self.assertEqual(len(csv_database.splitlines()), 17)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800293
Wyatt Hepler6473c852020-10-12 21:54:09 -0700294 csv_file = tempfile.NamedTemporaryFile('w', delete=False)
295 try:
Wyatt Heplerbc254972020-01-06 18:35:30 -0800296 csv_file.write(csv_database)
Wyatt Hepler6473c852020-10-12 21:54:09 -0700297 csv_file.close()
Wyatt Heplerbc254972020-01-06 18:35:30 -0800298
299 # Open CSV by path
300 detok = detokenize.Detokenizer(csv_file.name)
301 self.assertEqual(expected_tokens,
302 frozenset(detok.database.token_to_entries.keys()))
303
304 # Open CSV by file object
Wyatt Hepler6473c852020-10-12 21:54:09 -0700305 with open(csv_file.name) as fd:
306 detok = detokenize.Detokenizer(fd)
307
Wyatt Heplerbc254972020-01-06 18:35:30 -0800308 self.assertEqual(expected_tokens,
309 frozenset(detok.database.token_to_entries.keys()))
Wyatt Hepler6473c852020-10-12 21:54:09 -0700310 finally:
311 os.unlink(csv_file.name)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800312
313 def test_create_detokenizer_with_token_database(self):
314 detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
315 expected_tokens = frozenset(detok.database.token_to_entries.keys())
316
317 detok = detokenize.Detokenizer(detok.database)
318 self.assertEqual(expected_tokens,
319 frozenset(detok.database.token_to_entries.keys()))
320
321
322class DetokenizeWithCollisions(unittest.TestCase):
323 """Tests collision resolution."""
324 def setUp(self):
325 super().setUp()
326 token = 0xbaad
327
328 # Database with several conflicting tokens.
329 self.detok = detokenize.Detokenizer(tokens.Database([
330 tokens.TokenizedStringEntry(token, 'REMOVED', dt.datetime(9, 1, 1)),
331 tokens.TokenizedStringEntry(token, 'newer'),
332 tokens.TokenizedStringEntry(token, 'A: %d', dt.datetime(30, 5, 9)),
333 tokens.TokenizedStringEntry(token, 'B: %c', dt.datetime(30, 5, 10)),
334 tokens.TokenizedStringEntry(token, 'C: %s'),
335 tokens.TokenizedStringEntry(token, '%d%u'),
336 tokens.TokenizedStringEntry(token, '%s%u %d'),
337 tokens.TokenizedStringEntry(1, '%s'),
338 tokens.TokenizedStringEntry(1, '%d'),
339 tokens.TokenizedStringEntry(2, 'Three %s %s %s'),
340 tokens.TokenizedStringEntry(2, 'Five %d %d %d %d %s'),
341 ])) # yapf: disable
342
343 def test_collision_no_args_favors_most_recently_present(self):
344 no_args = self.detok.detokenize(b'\xad\xba\0\0')
345 self.assertFalse(no_args.ok())
346 self.assertEqual(len(no_args.successes), 2)
347 self.assertEqual(len(no_args.failures), 5)
348 self.assertEqual(len(no_args.matches()), 7)
349 self.assertEqual(str(no_args), 'newer')
350 self.assertEqual(len(no_args.best_result()[1]), 0)
351 self.assertEqual(no_args.best_result()[0], 'newer')
352
353 def test_collision_one_integer_arg_favors_most_recently_present(self):
354 multiple_correct = self.detok.detokenize(b'\xad\xba\0\0\x7a')
355 self.assertFalse(multiple_correct.ok())
356 self.assertIn('ERROR', repr(multiple_correct))
357 self.assertEqual(len(multiple_correct.successes), 2)
358 self.assertEqual(len(multiple_correct.failures), 5)
359 self.assertEqual(len(multiple_correct.matches()), 7)
360 self.assertEqual(str(multiple_correct), 'B: =')
361
362 def test_collision_one_integer_arg_favor_successful_decode(self):
363 # One string decodes successfully, since the arg is out of range for %c.
364 int_arg = self.detok.detokenize(b'\xad\xba\0\0\xfe\xff\xff\xff\x0f')
365 self.assertTrue(int_arg.ok())
366 self.assertEqual(str(int_arg), 'A: 2147483647')
367
368 def test_collision_one_string_arg_favors_successful_decode(self):
369 # One string decodes successfully, since decoding the argument as an
370 # integer does not decode all the data.
371 string_arg = self.detok.detokenize(b'\xad\xba\0\0\x02Hi')
372 self.assertTrue(string_arg.ok())
373 self.assertEqual(str(string_arg), 'C: Hi')
374
375 def test_collision_one_string_arg_favors_decoding_all_data(self):
376 result = self.detok.detokenize(b'\1\0\0\0\x83hi')
377 self.assertEqual(len(result.failures), 2)
378 # Should resolve to the string since %d would leave one byte behind.
379 self.assertEqual(str(result), '%s')
380
381 def test_collision_multiple_args_favors_decoding_more_arguments(self):
382 result = self.detok.detokenize(b'\2\0\0\0\1\2\1\4\5')
383 self.assertEqual(len(result.matches()), 2)
384 self.assertEqual(result.matches()[0][0], 'Five -1 1 -1 2 %s')
385 self.assertEqual(result.matches()[1][0], 'Three \2 \4 %s')
386
387 def test_collision_multiple_args_favors_decoding_all_arguments(self):
388 unambiguous = self.detok.detokenize(b'\xad\xba\0\0\x01#\x00\x01')
389 self.assertTrue(unambiguous.ok())
390 self.assertEqual(len(unambiguous.matches()), 7)
391 self.assertEqual('#0 -1', str(unambiguous))
392 self.assertIn('#0 -1', repr(unambiguous))
393
394
395@mock.patch('os.path.getmtime')
396class AutoUpdatingDetokenizerTest(unittest.TestCase):
397 """Tests the AutoUpdatingDetokenizer class."""
398 def test_update(self, mock_getmtime):
Wyatt Hepler6473c852020-10-12 21:54:09 -0700399 """Tests the update command."""
400
Wyatt Heplerbc254972020-01-06 18:35:30 -0800401 db = database.load_token_database(
402 io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
Wyatt Hepler23f831d2020-05-12 13:53:30 -0700403 self.assertEqual(len(db), 17)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800404
405 the_time = [100]
406
407 def move_back_time_if_file_exists(path):
408 if os.path.exists(path):
409 the_time[0] -= 1
410 return the_time[0]
411
412 raise FileNotFoundError
413
414 mock_getmtime.side_effect = move_back_time_if_file_exists
415
Wyatt Hepler6473c852020-10-12 21:54:09 -0700416 file = tempfile.NamedTemporaryFile('wb', delete=False)
417 try:
418 file.close()
419
420 detok = detokenize.AutoUpdatingDetokenizer(file.name,
Wyatt Heplerbc254972020-01-06 18:35:30 -0800421 min_poll_period_s=0)
422 self.assertFalse(detok.detokenize(JELLO_WORLD_TOKEN).ok())
423
Wyatt Hepler6473c852020-10-12 21:54:09 -0700424 with open(file.name, 'wb') as fd:
425 tokens.write_binary(db, fd)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800426
427 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
Wyatt Hepler6473c852020-10-12 21:54:09 -0700428 finally:
429 os.unlink(file.name)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800430
431 # The database stays around if the file is deleted.
432 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
433
434 def test_no_update_if_time_is_same(self, mock_getmtime):
435 mock_getmtime.return_value = 100
436
Wyatt Hepler6473c852020-10-12 21:54:09 -0700437 file = tempfile.NamedTemporaryFile('wb', delete=False)
438 try:
Wyatt Heplerbc254972020-01-06 18:35:30 -0800439 tokens.write_csv(
440 database.load_token_database(
Wyatt Hepler6473c852020-10-12 21:54:09 -0700441 io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)), file)
442 file.close()
Wyatt Heplerbc254972020-01-06 18:35:30 -0800443
Wyatt Hepler6473c852020-10-12 21:54:09 -0700444 detok = detokenize.AutoUpdatingDetokenizer(file,
445 min_poll_period_s=0)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800446 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
447
Wyatt Hepler6473c852020-10-12 21:54:09 -0700448 # Empty the database, but keep the mock modified time the same.
449 with open(file.name, 'wb'):
450 pass
451
Wyatt Heplerbc254972020-01-06 18:35:30 -0800452 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
453 self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
454
455 # Move back time so the now-empty file is reloaded.
456 mock_getmtime.return_value = 50
457 self.assertFalse(detok.detokenize(JELLO_WORLD_TOKEN).ok())
Wyatt Hepler6473c852020-10-12 21:54:09 -0700458 finally:
459 os.unlink(file.name)
Wyatt Heplerbc254972020-01-06 18:35:30 -0800460
461
462def _next_char(message):
463 return bytes(b + 1 for b in message)
464
465
466class PrefixedMessageDecoderTest(unittest.TestCase):
467 def setUp(self):
468 super().setUp()
469 self.decode = detokenize.PrefixedMessageDecoder('$', 'abcdefg')
470
471 def test_transform_single_message(self):
472 self.assertEqual(
473 b'%bcde',
474 b''.join(self.decode.transform(io.BytesIO(b'$abcd'), _next_char)))
475
476 def test_transform_message_amidst_other_only_affects_message(self):
477 self.assertEqual(
478 b'%%WHAT?%bcd%WHY? is this %ok %', b''.join(
479 self.decode.transform(
480 io.BytesIO(b'$$WHAT?$abc$WHY? is this $ok $'),
481 _next_char)))
482
483 def test_transform_empty_message(self):
484 self.assertEqual(
485 b'%1%',
486 b''.join(self.decode.transform(io.BytesIO(b'$1$'), _next_char)))
487
488 def test_transform_sequential_messages(self):
489 self.assertEqual(
490 b'%bcd%efghh', b''.join(
491 self.decode.transform(io.BytesIO(b'$abc$defgh'), _next_char)))
492
493
494class DetokenizeBase64(unittest.TestCase):
495 """Tests detokenizing Base64 messages."""
496
497 JELLO = b'$' + base64.b64encode(JELLO_WORLD_TOKEN)
498
499 RECURSION_STRING = f'The secret message is "{JELLO.decode()}"'
500 RECURSION = b'$' + base64.b64encode(
501 struct.pack('I', tokens.default_hash(RECURSION_STRING)))
502
503 RECURSION_STRING_2 = f"'{RECURSION.decode()}', said the spy."
504 RECURSION_2 = b'$' + base64.b64encode(
505 struct.pack('I', tokens.default_hash(RECURSION_STRING_2)))
506
507 TEST_CASES = (
508 (b'', b''),
509 (JELLO, b'Jello, world!'),
510 (b'Hello ' + JELLO + b'?', b'Hello Jello, world!?'),
511 (b'$' + JELLO, b'$Jello, world!'),
512 (JELLO + JELLO, b'Jello, world!Jello, world!'),
513 (JELLO + b'$' + JELLO, b'Jello, world!$Jello, world!'),
514 (b'$3141', b'$3141'),
515 (JELLO + b'$3141', b'Jello, world!$3141'),
516 (RECURSION, b'The secret message is "Jello, world!"'),
517 (RECURSION_2,
518 b'\'The secret message is "Jello, world!"\', said the spy.'),
519 )
520
521 def setUp(self):
522 super().setUp()
523 db = database.load_token_database(
524 io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
525 db.add([self.RECURSION_STRING, self.RECURSION_STRING_2])
526 self.detok = detokenize.Detokenizer(db)
527
528 def test_detokenize_base64_live(self):
529 for data, expected in self.TEST_CASES:
530 output = io.BytesIO()
531 detokenize.detokenize_base64_live(self.detok, io.BytesIO(data),
532 output, '$')
533
534 self.assertEqual(expected, output.getvalue())
535
536 def test_detokenize_base64_to_file(self):
537 for data, expected in self.TEST_CASES:
538 output = io.BytesIO()
539 detokenize.detokenize_base64_to_file(self.detok, data, output, '$')
540
541 self.assertEqual(expected, output.getvalue())
542
543 def test_detokenize_base64(self):
544 for data, expected in self.TEST_CASES:
545 self.assertEqual(
546 expected, detokenize.detokenize_base64(self.detok, data, b'$'))
547
548
549class DetokenizeBase64InfiniteRecursion(unittest.TestCase):
550 """Tests that infinite Bas64 token recursion resolves."""
551 def setUp(self):
552 super().setUp()
553 self.detok = detokenize.Detokenizer(
554 tokens.Database([
555 tokens.TokenizedStringEntry(0, '$AAAAAA=='), # token for 0
556 tokens.TokenizedStringEntry(1, '$AgAAAA=='), # token for 2
557 tokens.TokenizedStringEntry(2, '$AwAAAA=='), # token for 3
558 tokens.TokenizedStringEntry(3, '$AgAAAA=='), # token for 2
559 ]))
560
561 def test_detokenize_self_recursion(self):
562 for depth in range(5):
563 self.assertEqual(
564 detokenize.detokenize_base64(self.detok,
565 b'This one is deep: $AAAAAA==',
566 recursion=depth),
567 b'This one is deep: $AAAAAA==')
568
569 def test_detokenize_self_recursion_default(self):
570 self.assertEqual(
571 detokenize.detokenize_base64(self.detok,
572 b'This one is deep: $AAAAAA=='),
573 b'This one is deep: $AAAAAA==')
574
575 def test_detokenize_cyclic_recursion_even(self):
576 self.assertEqual(
577 detokenize.detokenize_base64(self.detok,
578 b'I said "$AQAAAA=="',
579 recursion=2), b'I said "$AgAAAA=="')
580
581 def test_detokenize_cyclic_recursion_odd(self):
582 self.assertEqual(
583 detokenize.detokenize_base64(self.detok,
584 b'I said "$AQAAAA=="',
585 recursion=3), b'I said "$AwAAAA=="')
586
587
588if __name__ == '__main__':
589 unittest.main()