blob: 38c1cf4264c8c98924ef069002ab8bcd782c22b0 [file] [log] [blame]
Logan Chien0e53d882018-11-06 17:32:40 +08001#!/usr/bin/env python
2#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""ELF file checker.
18
19This command ensures all undefined symbols in an ELF file can be resolved to
20global (or weak) symbols defined in shared objects specified in DT_NEEDED
21entries.
22"""
23
24from __future__ import print_function
25
26import argparse
27import collections
28import os
29import os.path
30import re
31import struct
32import subprocess
33import sys
34
35
36_ELF_MAGIC = b'\x7fELF'
37
38
39# Known machines
40_EM_386 = 3
41_EM_ARM = 40
42_EM_X86_64 = 62
43_EM_AARCH64 = 183
44
45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64}
46
47
48# ELF header struct
49_ELF_HEADER_STRUCT = (
50 ('ei_magic', '4s'),
51 ('ei_class', 'B'),
52 ('ei_data', 'B'),
53 ('ei_version', 'B'),
54 ('ei_osabi', 'B'),
55 ('ei_pad', '8s'),
56 ('e_type', 'H'),
57 ('e_machine', 'H'),
58 ('e_version', 'I'),
59)
60
61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
62
63
64ELFHeader = collections.namedtuple(
65 'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
66
67
68ELF = collections.namedtuple(
69 'ELF',
70 ('dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
71
72
73def _get_os_name():
74 """Get the host OS name."""
75 if sys.platform == 'linux2':
76 return 'linux'
77 if sys.platform == 'darwin':
78 return 'darwin'
79 raise ValueError(sys.platform + ' is not supported')
80
81
82def _get_build_top():
83 """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
84 prev_path = None
85 curr_path = os.path.abspath(os.getcwd())
86 while prev_path != curr_path:
87 if os.path.exists(os.path.join(curr_path, '.repo')):
88 return curr_path
89 prev_path = curr_path
90 curr_path = os.path.dirname(curr_path)
91 return None
92
93
94def _select_latest_llvm_version(versions):
95 """Select the latest LLVM prebuilts version from a set of versions."""
96 pattern = re.compile('clang-r([0-9]+)([a-z]?)')
97 found_rev = 0
98 found_ver = None
99 for curr_ver in versions:
100 match = pattern.match(curr_ver)
101 if not match:
102 continue
103 curr_rev = int(match.group(1))
104 if not found_ver or curr_rev > found_rev or (
105 curr_rev == found_rev and curr_ver > found_ver):
106 found_rev = curr_rev
107 found_ver = curr_ver
108 return found_ver
109
110
111def _get_latest_llvm_version(llvm_dir):
112 """Find the latest LLVM prebuilts version from `llvm_dir`."""
113 return _select_latest_llvm_version(os.listdir(llvm_dir))
114
115
116def _get_llvm_dir():
117 """Find the path to LLVM prebuilts."""
118 build_top = _get_build_top()
119
120 llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
121 if not llvm_prebuilts_base:
122 llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
123
124 llvm_dir = os.path.join(
125 build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
126
127 if not os.path.exists(llvm_dir):
128 return None
129
130 llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
131 if not llvm_prebuilts_version:
132 llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
133
134 llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
135
136 if not os.path.exists(llvm_dir):
137 return None
138
139 return llvm_dir
140
141
142def _get_llvm_readobj():
143 """Find the path to llvm-readobj executable."""
144 llvm_dir = _get_llvm_dir()
145 llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
146 return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
147
148
149class ELFError(ValueError):
150 """Generic ELF parse error"""
151 pass
152
153
154class ELFInvalidMagicError(ELFError):
155 """Invalid ELF magic word error"""
156 def __init__(self):
157 super(ELFInvalidMagicError, self).__init__('bad ELF magic')
158
159
160class ELFParser(object):
161 """ELF file parser"""
162
163 @classmethod
164 def _read_elf_header(cls, elf_file_path):
165 """Read the ELF magic word from the beginning of the file."""
166 with open(elf_file_path, 'rb') as elf_file:
167 buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
168 try:
169 return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
170 except struct.error:
171 return None
172
173
174 @classmethod
175 def open(cls, elf_file_path, llvm_readobj):
176 """Open and parse the ELF file."""
177 # Parse the ELF header for simple sanity checks.
178 header = cls._read_elf_header(elf_file_path)
179 if not header or header.ei_magic != _ELF_MAGIC:
180 raise ELFInvalidMagicError()
181
182 # Run llvm-readobj and parse the output.
183 return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
184
185
186 @classmethod
187 def _find_prefix(cls, pattern, lines_it):
188 """Iterate `lines_it` until finding a string that starts with `pattern`."""
189 for line in lines_it:
190 if line.startswith(pattern):
191 return True
192 return False
193
194
195 @classmethod
196 def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
197 """Run llvm-readobj and parse the output."""
198 proc = subprocess.Popen(
199 [llvm_readobj, '-dynamic-table', '-dyn-symbols', elf_file_path],
200 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
201 out, _ = proc.communicate()
202 lines = out.splitlines()
203 return cls._parse_llvm_readobj(elf_file_path, header, lines)
204
205
206 @classmethod
207 def _parse_llvm_readobj(cls, elf_file_path, header, lines):
208 """Parse the output of llvm-readobj."""
209 lines_it = iter(lines)
210 imported, exported = cls._parse_dynamic_symbols(lines_it)
211 dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
212 return ELF(dt_soname, dt_needed, imported, exported, header)
213
214
215 _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
216
217 _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
218 '^ 0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
219
220 _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
221 '^ 0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
222
223 _DYNAMIC_SECTION_END_PATTERN = ']'
224
225
226 @classmethod
227 def _parse_dynamic_table(cls, elf_file_path, lines_it):
228 """Parse the dynamic table section."""
229 dt_soname = os.path.basename(elf_file_path)
230 dt_needed = []
231
232 dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
233 if not dynamic:
234 return (dt_soname, dt_needed)
235
236 for line in lines_it:
237 if line == cls._DYNAMIC_SECTION_END_PATTERN:
238 break
239
240 match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
241 if match:
242 dt_needed.append(match.group(1))
243 continue
244
245 match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
246 if match:
247 dt_soname = match.group(1)
248 continue
249
250 return (dt_soname, dt_needed)
251
252
253 _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
254 _DYNAMIC_SYMBOLS_END_PATTERN = ']'
255
256 _SYMBOL_ENTRY_START_PATTERN = ' Symbol {'
257 _SYMBOL_ENTRY_PATTERN = re.compile('^ ([A-Za-z0-9_]+): (.*)$')
258 _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
259 '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
260 _SYMBOL_ENTRY_END_PATTERN = ' }'
261
262
263 @classmethod
264 def _parse_symbol_name(cls, name_with_version):
265 """Split `name_with_version` into name and version. This function may split
266 at last occurrence of `@@` or `@`."""
267 name, version = name_with_version.rsplit('@', 1)
268 if name and name[-1] == '@':
269 name = name[:-1]
270 return (name, version)
271
272
273 @classmethod
274 def _parse_dynamic_symbols(cls, lines_it):
275 """Parse dynamic symbol table and collect imported and exported symbols."""
276 imported = collections.defaultdict(set)
277 exported = collections.defaultdict(set)
278
279 for symbol in cls._parse_dynamic_symbols_internal(lines_it):
280 name, version = cls._parse_symbol_name(symbol['Name'])
281 if name:
282 if symbol['Section'] == 'Undefined':
283 if symbol['Binding'] != 'Weak':
284 imported[name].add(version)
285 else:
286 if symbol['Binding'] != 'Local':
287 exported[name].add(version)
288
289 # Freeze the returned imported/exported dict.
290 return (dict(imported), dict(exported))
291
292
293 @classmethod
294 def _parse_dynamic_symbols_internal(cls, lines_it):
295 """Parse symbols entries and yield each symbols."""
296
297 if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
298 return
299
300 for line in lines_it:
301 if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
302 return
303
304 if line == cls._SYMBOL_ENTRY_START_PATTERN:
305 symbol = {}
306 continue
307
308 if line == cls._SYMBOL_ENTRY_END_PATTERN:
309 yield symbol
310 symbol = None
311 continue
312
313 match = cls._SYMBOL_ENTRY_PATTERN.match(line)
314 if match:
315 key = match.group(1)
316 value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
317 symbol[key] = value
318 continue
319
320
321class Checker(object):
322 """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
323
324 def __init__(self, llvm_readobj):
325 self._file_path = ''
326 self._file_under_test = None
327 self._shared_libs = []
328
329 self._llvm_readobj = llvm_readobj
330
331
332 if sys.stderr.isatty():
333 _ERROR_TAG = '\033[0;1;31merror:\033[m' # Red error
334 _NOTE_TAG = '\033[0;1;30mnote:\033[m' # Black note
335 else:
336 _ERROR_TAG = 'error:' # Red error
337 _NOTE_TAG = 'note:' # Black note
338
339
340 def _error(self, *args):
341 """Emit an error to stderr."""
342 print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
343
344
345 def _note(self, *args):
346 """Emit a note to stderr."""
347 print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
348
349
350 def _load_elf_file(self, path, skip_bad_elf_magic):
351 """Load an ELF file from the `path`."""
352 try:
353 return ELFParser.open(path, self._llvm_readobj)
354 except (IOError, OSError):
355 self._error('Failed to open "{}".'.format(path))
356 sys.exit(2)
357 except ELFInvalidMagicError:
358 if skip_bad_elf_magic:
359 sys.exit(0)
360 else:
361 self._error('File "{}" must have a valid ELF magic word.'.format(path))
362 sys.exit(2)
363 except:
364 self._error('An unknown error occurred while opening "{}".'.format(path))
365 raise
366
367
368 def load_file_under_test(self, path, skip_bad_elf_magic,
369 skip_unknown_elf_machine):
370 """Load file-under-test (either an executable or a shared lib)."""
371 self._file_path = path
372 self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
373
374 if skip_unknown_elf_machine and \
375 self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
376 sys.exit(0)
377
378
379 def load_shared_libs(self, shared_lib_paths):
380 """Load shared libraries."""
381 for path in shared_lib_paths:
382 self._shared_libs.append(self._load_elf_file(path, False))
383
384
385 def check_dt_soname(self, soname):
386 """Check whether DT_SONAME matches installation file name."""
387 if self._file_under_test.dt_soname != soname:
388 self._error('DT_SONAME "{}" must be equal to the file name "{}".'
389 .format(self._file_under_test.dt_soname, soname))
390 sys.exit(2)
391
392
393 def check_dt_needed(self):
394 """Check whether all DT_NEEDED entries are specified in the build
395 system."""
396
397 missing_shared_libs = False
398
399 # Collect the DT_SONAMEs from shared libs specified in the build system.
400 specified_sonames = {lib.dt_soname for lib in self._shared_libs}
401
402 # Chech whether all DT_NEEDED entries are specified.
403 for lib in self._file_under_test.dt_needed:
404 if lib not in specified_sonames:
405 self._error('DT_NEEDED "{}" is not specified in shared_libs.'
406 .format(lib.decode('utf-8')))
407 missing_shared_libs = True
408
409 if missing_shared_libs:
410 dt_needed = sorted(set(self._file_under_test.dt_needed))
411 modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
412
413 self._note()
414 self._note('Fix suggestions:')
415 self._note(
416 ' Android.bp: shared_libs: [' +
417 ', '.join('"' + module + '"' for module in modules) + '],')
418 self._note(
419 ' Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
420
421 self._note()
422 self._note('If the fix above doesn\'t work, bypass this check with:')
423 self._note(' Android.bp: check_elf_files: false,')
424 self._note(' Android.mk: LOCAL_CHECK_ELF_FILES := false')
425
426 sys.exit(2)
427
428
429 @staticmethod
430 def _find_symbol(lib, name, version):
431 """Check whether the symbol name and version matches a definition in
432 lib."""
433 try:
434 lib_sym_vers = lib.exported[name]
435 except KeyError:
436 return False
437 if version == '': # Symbol version is not requested
438 return True
439 return version in lib_sym_vers
440
441
442 @classmethod
443 def _find_symbol_from_libs(cls, libs, name, version):
444 """Check whether the symbol name and version is defined in one of the
445 shared libraries in libs."""
446 for lib in libs:
447 if cls._find_symbol(lib, name, version):
448 return lib
449 return None
450
451
452 def check_symbols(self):
453 """Check whether all undefined symbols are resolved to a definition."""
454 all_elf_files = [self._file_under_test] + self._shared_libs
455 missing_symbols = []
456 for sym, imported_vers in self._file_under_test.imported.iteritems():
457 for imported_ver in imported_vers:
458 lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
459 if not lib:
460 missing_symbols.append((sym, imported_ver))
461
462 if missing_symbols:
463 for sym, ver in sorted(missing_symbols):
464 sym = sym.decode('utf-8')
465 if ver:
466 sym += '@' + ver.decode('utf-8')
467 self._error('Unresolved symbol: {}'.format(sym))
468
469 self._note()
470 self._note('Some dependencies might be changed, thus the symbol(s) '
471 'above cannot be resolved.')
472 self._note('Please re-build the prebuilt file: "{}".'
473 .format(self._file_path))
474
475 self._note()
476 self._note('If this is a new prebuilt file and it is designed to have '
477 'unresolved symbols, add one of the following properties:')
478 self._note(' Android.bp: allow_undefined_symbols: true,')
479 self._note(' Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
480
481 sys.exit(2)
482
483
484def _parse_args():
485 """Parse command line options."""
486 parser = argparse.ArgumentParser()
487
488 # Input file
489 parser.add_argument('file',
490 help='Path to the input file to be checked')
491 parser.add_argument('--soname',
492 help='Shared object name of the input file')
493
494 # Shared library dependencies
495 parser.add_argument('--shared-lib', action='append', default=[],
496 help='Path to shared library dependencies')
497
498 # Check options
499 parser.add_argument('--skip-bad-elf-magic', action='store_true',
500 help='Ignore the input file without the ELF magic word')
501 parser.add_argument('--skip-unknown-elf-machine', action='store_true',
502 help='Ignore the input file with unknown machine ID')
503 parser.add_argument('--allow-undefined-symbols', action='store_true',
504 help='Ignore unresolved undefined symbols')
505
506 # Other options
507 parser.add_argument('--llvm-readobj',
508 help='Path to the llvm-readobj executable')
509
510 return parser.parse_args()
511
512
513def main():
514 """Main function"""
515 args = _parse_args()
516
517 llvm_readobj = args.llvm_readobj
518 if not llvm_readobj:
519 llvm_readobj = _get_llvm_readobj()
520
521 # Load ELF files
522 checker = Checker(llvm_readobj)
523 checker.load_file_under_test(
524 args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
525 checker.load_shared_libs(args.shared_lib)
526
527 # Run checks
528 if args.soname:
529 checker.check_dt_soname(args.soname)
530
531 checker.check_dt_needed()
532
533 if not args.allow_undefined_symbols:
534 checker.check_symbols()
535
536
537if __name__ == '__main__':
538 main()