blob: a609b09ed0ebf3ffb997553cdbc42be5ad0071c1 [file] [log] [blame]
Greg Clayton6a23d212013-09-04 17:31:40 +00001#!/usr/bin/python
2
3import cmd
4import dict_utils
5import file_extract
6import optparse
7import re
8import struct
9import string
10import StringIO
11import sys
12import uuid
13
14# Mach header "magic" constants
15MH_MAGIC = 0xfeedface
16MH_CIGAM = 0xcefaedfe
17MH_MAGIC_64 = 0xfeedfacf
18MH_CIGAM_64 = 0xcffaedfe
19FAT_MAGIC = 0xcafebabe
20FAT_CIGAM = 0xbebafeca
21
22# Mach haeder "filetype" constants
23MH_OBJECT = 0x00000001
24MH_EXECUTE = 0x00000002
25MH_FVMLIB = 0x00000003
26MH_CORE = 0x00000004
27MH_PRELOAD = 0x00000005
28MH_DYLIB = 0x00000006
29MH_DYLINKER = 0x00000007
30MH_BUNDLE = 0x00000008
31MH_DYLIB_STUB = 0x00000009
32MH_DSYM = 0x0000000a
33MH_KEXT_BUNDLE = 0x0000000b
34
35# Mach haeder "flag" constant bits
36MH_NOUNDEFS = 0x00000001
37MH_INCRLINK = 0x00000002
38MH_DYLDLINK = 0x00000004
39MH_BINDATLOAD = 0x00000008
40MH_PREBOUND = 0x00000010
41MH_SPLIT_SEGS = 0x00000020
42MH_LAZY_INIT = 0x00000040
43MH_TWOLEVEL = 0x00000080
44MH_FORCE_FLAT = 0x00000100
45MH_NOMULTIDEFS = 0x00000200
46MH_NOFIXPREBINDING = 0x00000400
47MH_PREBINDABLE = 0x00000800
48MH_ALLMODSBOUND = 0x00001000
49MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000
50MH_CANONICAL = 0x00004000
51MH_WEAK_DEFINES = 0x00008000
52MH_BINDS_TO_WEAK = 0x00010000
53MH_ALLOW_STACK_EXECUTION = 0x00020000
54MH_ROOT_SAFE = 0x00040000
55MH_SETUID_SAFE = 0x00080000
56MH_NO_REEXPORTED_DYLIBS = 0x00100000
57MH_PIE = 0x00200000
58MH_DEAD_STRIPPABLE_DYLIB = 0x00400000
59MH_HAS_TLV_DESCRIPTORS = 0x00800000
60MH_NO_HEAP_EXECUTION = 0x01000000
61
62# Mach load command constants
63LC_REQ_DYLD = 0x80000000
64LC_SEGMENT = 0x00000001
65LC_SYMTAB = 0x00000002
66LC_SYMSEG = 0x00000003
67LC_THREAD = 0x00000004
68LC_UNIXTHREAD = 0x00000005
69LC_LOADFVMLIB = 0x00000006
70LC_IDFVMLIB = 0x00000007
71LC_IDENT = 0x00000008
72LC_FVMFILE = 0x00000009
73LC_PREPAGE = 0x0000000a
74LC_DYSYMTAB = 0x0000000b
75LC_LOAD_DYLIB = 0x0000000c
76LC_ID_DYLIB = 0x0000000d
77LC_LOAD_DYLINKER = 0x0000000e
78LC_ID_DYLINKER = 0x0000000f
79LC_PREBOUND_DYLIB = 0x00000010
80LC_ROUTINES = 0x00000011
81LC_SUB_FRAMEWORK = 0x00000012
82LC_SUB_UMBRELLA = 0x00000013
83LC_SUB_CLIENT = 0x00000014
84LC_SUB_LIBRARY = 0x00000015
85LC_TWOLEVEL_HINTS = 0x00000016
86LC_PREBIND_CKSUM = 0x00000017
87LC_LOAD_WEAK_DYLIB = 0x00000018 | LC_REQ_DYLD
88LC_SEGMENT_64 = 0x00000019
89LC_ROUTINES_64 = 0x0000001a
90LC_UUID = 0x0000001b
91LC_RPATH = 0x0000001c | LC_REQ_DYLD
92LC_CODE_SIGNATURE = 0x0000001d
93LC_SEGMENT_SPLIT_INFO = 0x0000001e
94LC_REEXPORT_DYLIB = 0x0000001f | LC_REQ_DYLD
95LC_LAZY_LOAD_DYLIB = 0x00000020
96LC_ENCRYPTION_INFO = 0x00000021
97LC_DYLD_INFO = 0x00000022
98LC_DYLD_INFO_ONLY = 0x00000022 | LC_REQ_DYLD
99LC_LOAD_UPWARD_DYLIB = 0x00000023 | LC_REQ_DYLD
100LC_VERSION_MIN_MACOSX = 0x00000024
101LC_VERSION_MIN_IPHONEOS = 0x00000025
102LC_FUNCTION_STARTS = 0x00000026
103LC_DYLD_ENVIRONMENT = 0x00000027
104
105# Mach CPU constants
106CPU_ARCH_MASK = 0xff000000
107CPU_ARCH_ABI64 = 0x01000000
108CPU_TYPE_ANY = 0xffffffff
109CPU_TYPE_VAX = 1
110CPU_TYPE_MC680x0 = 6
111CPU_TYPE_I386 = 7
112CPU_TYPE_X86_64 = CPU_TYPE_I386 | CPU_ARCH_ABI64
113CPU_TYPE_MIPS = 8
114CPU_TYPE_MC98000 = 10
115CPU_TYPE_HPPA = 11
116CPU_TYPE_ARM = 12
117CPU_TYPE_MC88000 = 13
118CPU_TYPE_SPARC = 14
119CPU_TYPE_I860 = 15
120CPU_TYPE_ALPHA = 16
121CPU_TYPE_POWERPC = 18
122CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
123
124# VM protection constants
125VM_PROT_READ = 1
126VM_PROT_WRITE = 2
127VM_PROT_EXECUTE = 4
128
129# VM protection constants
130N_STAB = 0xe0
131N_PEXT = 0x10
132N_TYPE = 0x0e
133N_EXT = 0x01
134
135# Values for nlist N_TYPE bits of the "Mach.NList.type" field.
136N_UNDF = 0x0
137N_ABS = 0x2
138N_SECT = 0xe
139N_PBUD = 0xc
140N_INDR = 0xa
141
142# Section indexes for the "Mach.NList.sect_idx" fields
143NO_SECT = 0
144MAX_SECT = 255
145
146# Stab defines
147N_GSYM = 0x20
148N_FNAME = 0x22
149N_FUN = 0x24
150N_STSYM = 0x26
151N_LCSYM = 0x28
152N_BNSYM = 0x2e
153N_OPT = 0x3c
154N_RSYM = 0x40
155N_SLINE = 0x44
156N_ENSYM = 0x4e
157N_SSYM = 0x60
158N_SO = 0x64
159N_OSO = 0x66
160N_LSYM = 0x80
161N_BINCL = 0x82
162N_SOL = 0x84
163N_PARAMS = 0x86
164N_VERSION = 0x88
165N_OLEVEL = 0x8A
166N_PSYM = 0xa0
167N_EINCL = 0xa2
168N_ENTRY = 0xa4
169N_LBRAC = 0xc0
170N_EXCL = 0xc2
171N_RBRAC = 0xe0
172N_BCOMM = 0xe2
173N_ECOMM = 0xe4
174N_ECOML = 0xe8
175N_LENG = 0xfe
176
177vm_prot_names = [ '---', 'r--', '-w-', 'rw-', '--x', 'r-x', '-wx', 'rwx' ]
178
179def dump_memory(base_addr, data, hex_bytes_len, num_per_line):
180 hex_bytes = data.encode('hex')
181 if hex_bytes_len == -1:
182 hex_bytes_len = len(hex_bytes)
183 addr = base_addr
184 ascii_str = ''
185 i = 0
186 while i < hex_bytes_len:
187 if ((i/2) % num_per_line) == 0:
188 if i > 0:
189 print ' %s' % (ascii_str)
190 ascii_str = ''
191 print '0x%8.8x:' % (addr+i),
192 hex_byte = hex_bytes[i:i+2]
193 print hex_byte,
194 int_byte = int (hex_byte, 16)
195 ascii_char = '%c' % (int_byte)
196 if int_byte >= 32 and int_byte < 127:
197 ascii_str += ascii_char
198 else:
199 ascii_str += '.'
200 i = i + 2
201 if ascii_str:
202 if (i/2) % num_per_line:
203 padding = num_per_line - ((i/2) % num_per_line)
204 else:
205 padding = 0
206 print '%*s%s' % (padding*3+1,'',ascii_str)
207 print
208
209
210class TerminalColors:
211 '''Simple terminal colors class'''
212 def __init__(self, enabled = True):
213 # TODO: discover terminal type from "file" and disable if
214 # it can't handle the color codes
215 self.enabled = enabled
216
217 def reset(self):
218 '''Reset all terminal colors and formatting.'''
219 if self.enabled:
220 return "\x1b[0m";
221 return ''
222
223 def bold(self, on = True):
Bruce Mitchenerd93c4a32014-07-01 21:22:11 +0000224 '''Enable or disable bold depending on the "on" parameter.'''
Greg Clayton6a23d212013-09-04 17:31:40 +0000225 if self.enabled:
226 if on:
227 return "\x1b[1m";
228 else:
229 return "\x1b[22m";
230 return ''
231
232 def italics(self, on = True):
Bruce Mitchenerd93c4a32014-07-01 21:22:11 +0000233 '''Enable or disable italics depending on the "on" parameter.'''
Greg Clayton6a23d212013-09-04 17:31:40 +0000234 if self.enabled:
235 if on:
236 return "\x1b[3m";
237 else:
238 return "\x1b[23m";
239 return ''
240
241 def underline(self, on = True):
Bruce Mitchenerd93c4a32014-07-01 21:22:11 +0000242 '''Enable or disable underline depending on the "on" parameter.'''
Greg Clayton6a23d212013-09-04 17:31:40 +0000243 if self.enabled:
244 if on:
245 return "\x1b[4m";
246 else:
247 return "\x1b[24m";
248 return ''
249
250 def inverse(self, on = True):
Bruce Mitchenerd93c4a32014-07-01 21:22:11 +0000251 '''Enable or disable inverse depending on the "on" parameter.'''
Greg Clayton6a23d212013-09-04 17:31:40 +0000252 if self.enabled:
253 if on:
254 return "\x1b[7m";
255 else:
256 return "\x1b[27m";
257 return ''
258
259 def strike(self, on = True):
Bruce Mitchenerd93c4a32014-07-01 21:22:11 +0000260 '''Enable or disable strike through depending on the "on" parameter.'''
Greg Clayton6a23d212013-09-04 17:31:40 +0000261 if self.enabled:
262 if on:
263 return "\x1b[9m";
264 else:
265 return "\x1b[29m";
266 return ''
267
268 def black(self, fg = True):
269 '''Set the foreground or background color to black.
270 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
271 if self.enabled:
272 if fg:
273 return "\x1b[30m";
274 else:
275 return "\x1b[40m";
276 return ''
277
278 def red(self, fg = True):
279 '''Set the foreground or background color to red.
280 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
281 if self.enabled:
282 if fg:
283 return "\x1b[31m";
284 else:
285 return "\x1b[41m";
286 return ''
287
288 def green(self, fg = True):
289 '''Set the foreground or background color to green.
290 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
291 if self.enabled:
292 if fg:
293 return "\x1b[32m";
294 else:
295 return "\x1b[42m";
296 return ''
297
298 def yellow(self, fg = True):
299 '''Set the foreground or background color to yellow.
300 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
301 if self.enabled:
302 if fg:
303 return "\x1b[43m";
304 else:
305 return "\x1b[33m";
306 return ''
307
308 def blue(self, fg = True):
309 '''Set the foreground or background color to blue.
310 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
311 if self.enabled:
312 if fg:
313 return "\x1b[34m";
314 else:
315 return "\x1b[44m";
316 return ''
317
318 def magenta(self, fg = True):
319 '''Set the foreground or background color to magenta.
320 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
321 if self.enabled:
322 if fg:
323 return "\x1b[35m";
324 else:
325 return "\x1b[45m";
326 return ''
327
328 def cyan(self, fg = True):
329 '''Set the foreground or background color to cyan.
330 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
331 if self.enabled:
332 if fg:
333 return "\x1b[36m";
334 else:
335 return "\x1b[46m";
336 return ''
337
338 def white(self, fg = True):
339 '''Set the foreground or background color to white.
340 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
341 if self.enabled:
342 if fg:
343 return "\x1b[37m";
344 else:
345 return "\x1b[47m";
346 return ''
347
348 def default(self, fg = True):
349 '''Set the foreground or background color to the default.
350 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
351 if self.enabled:
352 if fg:
353 return "\x1b[39m";
354 else:
355 return "\x1b[49m";
356 return ''
357
358def swap_unpack_char():
359 """Returns the unpack prefix that will for non-native endian-ness."""
360 if struct.pack('H', 1).startswith("\x00"):
361 return '<'
362 return '>'
363
364
365def dump_hex_bytes(addr, s, bytes_per_line=16):
366 i = 0
367 line = ''
368 for ch in s:
369 if (i % bytes_per_line) == 0:
370 if line:
371 print line
372 line = '%#8.8x: ' % (addr + i)
373 line += "%02X " % ord(ch)
374 i += 1
375 print line
376
377def dump_hex_byte_string_diff(addr, a, b, bytes_per_line=16):
378 i = 0
379 line = ''
380 a_len = len(a)
381 b_len = len(b)
382 if a_len < b_len:
383 max_len = b_len
384 else:
385 max_len = a_len
386 tty_colors = TerminalColors (True)
387 for i in range(max_len):
388 ch = None
389 if i < a_len:
390 ch_a = a[i]
391 ch = ch_a
392 else:
393 ch_a = None
394 if i < b_len:
395 ch_b = b[i]
396 if not ch:
397 ch = ch_b
398 else:
399 ch_b = None
400 mismatch = ch_a != ch_b
401 if (i % bytes_per_line) == 0:
402 if line:
403 print line
404 line = '%#8.8x: ' % (addr + i)
405 if mismatch: line += tty_colors.red()
406 line += "%02X " % ord(ch)
407 if mismatch: line += tty_colors.default()
408 i += 1
409
410 print line
411
412class Mach:
413 """Class that does everything mach-o related"""
414
415 class Arch:
416 """Class that implements mach-o architectures"""
417
418 def __init__(self, c=0, s=0):
419 self.cpu=c
420 self.sub=s
421
422 def set_cpu_type(self, c):
423 self.cpu=c
424 def set_cpu_subtype(self, s):
425 self.sub=s
426 def set_arch(self, c, s):
427 self.cpu=c
428 self.sub=s
429 def is_64_bit(self):
430 return (self.cpu & CPU_ARCH_ABI64) != 0
431
432 cpu_infos = [
433 [ "arm" , CPU_TYPE_ARM , CPU_TYPE_ANY ],
434 [ "arm" , CPU_TYPE_ARM , 0 ],
435 [ "armv4" , CPU_TYPE_ARM , 5 ],
436 [ "armv6" , CPU_TYPE_ARM , 6 ],
437 [ "armv5" , CPU_TYPE_ARM , 7 ],
438 [ "xscale" , CPU_TYPE_ARM , 8 ],
439 [ "armv7" , CPU_TYPE_ARM , 9 ],
440 [ "armv7f" , CPU_TYPE_ARM , 10 ],
441 [ "armv7s" , CPU_TYPE_ARM , 11 ],
442 [ "armv7k" , CPU_TYPE_ARM , 12 ],
443 [ "armv7m" , CPU_TYPE_ARM , 15 ],
444 [ "armv7em" , CPU_TYPE_ARM , 16 ],
445 [ "ppc" , CPU_TYPE_POWERPC , CPU_TYPE_ANY ],
446 [ "ppc" , CPU_TYPE_POWERPC , 0 ],
447 [ "ppc601" , CPU_TYPE_POWERPC , 1 ],
448 [ "ppc602" , CPU_TYPE_POWERPC , 2 ],
449 [ "ppc603" , CPU_TYPE_POWERPC , 3 ],
450 [ "ppc603e" , CPU_TYPE_POWERPC , 4 ],
451 [ "ppc603ev" , CPU_TYPE_POWERPC , 5 ],
452 [ "ppc604" , CPU_TYPE_POWERPC , 6 ],
453 [ "ppc604e" , CPU_TYPE_POWERPC , 7 ],
454 [ "ppc620" , CPU_TYPE_POWERPC , 8 ],
455 [ "ppc750" , CPU_TYPE_POWERPC , 9 ],
456 [ "ppc7400" , CPU_TYPE_POWERPC , 10 ],
457 [ "ppc7450" , CPU_TYPE_POWERPC , 11 ],
458 [ "ppc970" , CPU_TYPE_POWERPC , 100 ],
459 [ "ppc64" , CPU_TYPE_POWERPC64 , 0 ],
460 [ "ppc970-64" , CPU_TYPE_POWERPC64 , 100 ],
461 [ "i386" , CPU_TYPE_I386 , 3 ],
462 [ "i486" , CPU_TYPE_I386 , 4 ],
463 [ "i486sx" , CPU_TYPE_I386 , 0x84 ],
464 [ "i386" , CPU_TYPE_I386 , CPU_TYPE_ANY ],
465 [ "x86_64" , CPU_TYPE_X86_64 , 3 ],
466 [ "x86_64" , CPU_TYPE_X86_64 , CPU_TYPE_ANY ],
467 ]
468
469 def __str__(self):
470 for info in self.cpu_infos:
471 if self.cpu == info[1] and (self.sub & 0x00ffffff) == info[2]:
472 return info[0]
473 return "{0}.{1}".format(self.cpu,self.sub)
474
475
476 class Magic(dict_utils.Enum):
477
478 enum = {
479 'MH_MAGIC' : MH_MAGIC,
480 'MH_CIGAM' : MH_CIGAM,
481 'MH_MAGIC_64' : MH_MAGIC_64,
482 'MH_CIGAM_64' : MH_CIGAM_64,
483 'FAT_MAGIC' : FAT_MAGIC,
484 'FAT_CIGAM' : FAT_CIGAM
485 }
486
487 def __init__(self, initial_value = 0):
488 dict_utils.Enum.__init__(self, initial_value, self.enum)
489
490 def is_skinny_mach_file(self):
491 return self.value == MH_MAGIC or self.value == MH_CIGAM or self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
492
493 def is_universal_mach_file(self):
494 return self.value == FAT_MAGIC or self.value == FAT_CIGAM
495
496 def unpack(self, data):
497 data.set_byte_order('native')
498 self.value = data.get_uint32();
499
500 def get_byte_order(self):
501 if self.value == MH_CIGAM or self.value == MH_CIGAM_64 or self.value == FAT_CIGAM:
502 return swap_unpack_char()
503 else:
504 return '='
505
506 def is_64_bit(self):
507 return self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
508
509 def __init__(self):
510 self.magic = Mach.Magic()
511 self.content = None
512 self.path = None
513
514 def extract (self, path, extractor):
515 self.path = path;
516 self.unpack(extractor)
517
518 def parse(self, path):
519 self.path = path;
520 try:
521 f = open(self.path)
522 file_extractor = file_extract.FileExtract(f, '=')
523 self.unpack(file_extractor)
524 #f.close()
525 except IOError as (errno, strerror):
526 print "I/O error({0}): {1}".format(errno, strerror)
527 except ValueError:
528 print "Could not convert data to an integer."
529 except:
530 print "Unexpected error:", sys.exc_info()[0]
531 raise
532
533 def compare(self, rhs):
534 self.content.compare(rhs.content)
535
536 def dump(self, options = None):
537 self.content.dump(options)
538
539 def dump_header(self, dump_description = True, options = None):
540 self.content.dump_header(dump_description, options)
541
542 def dump_load_commands(self, dump_description = True, options = None):
543 self.content.dump_load_commands(dump_description, options)
544
545 def dump_sections(self, dump_description = True, options = None):
546 self.content.dump_sections(dump_description, options)
547
548 def dump_section_contents(self, options):
549 self.content.dump_section_contents(options)
550
551 def dump_symtab(self, dump_description = True, options = None):
552 self.content.dump_symtab(dump_description, options)
553
554 def dump_symbol_names_matching_regex(self, regex, file=None):
555 self.content.dump_symbol_names_matching_regex(regex, file)
556
557 def description(self):
558 return self.content.description()
559
560 def unpack(self, data):
561 self.magic.unpack(data)
562 if self.magic.is_skinny_mach_file():
563 self.content = Mach.Skinny(self.path)
564 elif self.magic.is_universal_mach_file():
565 self.content = Mach.Universal(self.path)
566 else:
567 self.content = None
568
569 if self.content != None:
570 self.content.unpack(data, self.magic)
571
572 def is_valid(self):
573 return self.content != None
574
575 class Universal:
576
577 def __init__(self, path):
578 self.path = path
579 self.type = 'universal'
580 self.file_off = 0
581 self.magic = None
582 self.nfat_arch = 0
583 self.archs = list()
584
585 def description(self):
586 s = '%#8.8x: %s (' % (self.file_off, self.path)
587 archs_string = ''
588 for arch in self.archs:
589 if len(archs_string):
590 archs_string += ', '
591 archs_string += '%s' % arch.arch
592 s += archs_string
593 s += ')'
594 return s
595
596 def unpack(self, data, magic = None):
597 self.file_off = data.tell()
598 if magic is None:
599 self.magic = Mach.Magic()
600 self.magic.unpack(data)
601 else:
602 self.magic = magic
603 self.file_off = self.file_off - 4
604 # Universal headers are always in big endian
605 data.set_byte_order('big')
606 self.nfat_arch = data.get_uint32()
607 for i in range(self.nfat_arch):
608 self.archs.append(Mach.Universal.ArchInfo())
609 self.archs[i].unpack(data)
610 for i in range(self.nfat_arch):
611 self.archs[i].mach = Mach.Skinny(self.path)
612 data.seek (self.archs[i].offset, 0)
613 skinny_magic = Mach.Magic()
614 skinny_magic.unpack (data)
615 self.archs[i].mach.unpack(data, skinny_magic)
616
617 def compare(self, rhs):
618 print 'error: comparing two universal files is not supported yet'
619 return False
620
621 def dump(self, options):
622 if options.dump_header:
623 print
624 print "Universal Mach File: magic = %s, nfat_arch = %u" % (self.magic, self.nfat_arch)
625 print
626 if self.nfat_arch > 0:
627 if options.dump_header:
628 self.archs[0].dump_header(True, options)
629 for i in range(self.nfat_arch):
630 self.archs[i].dump_flat(options)
631 if options.dump_header:
632 print
633 for i in range(self.nfat_arch):
634 self.archs[i].mach.dump(options)
635
636 def dump_header(self, dump_description = True, options = None):
637 if dump_description:
638 print self.description()
639 for i in range(self.nfat_arch):
640 self.archs[i].mach.dump_header(True, options)
641 print
642
643 def dump_load_commands(self, dump_description = True, options = None):
644 if dump_description:
645 print self.description()
646 for i in range(self.nfat_arch):
647 self.archs[i].mach.dump_load_commands(True, options)
648 print
649
650 def dump_sections(self, dump_description = True, options = None):
651 if dump_description:
652 print self.description()
653 for i in range(self.nfat_arch):
654 self.archs[i].mach.dump_sections(True, options)
655 print
656
657 def dump_section_contents(self, options):
658 for i in range(self.nfat_arch):
659 self.archs[i].mach.dump_section_contents(options)
660 print
661
662 def dump_symtab(self, dump_description = True, options = None):
663 if dump_description:
664 print self.description()
665 for i in range(self.nfat_arch):
666 self.archs[i].mach.dump_symtab(True, options)
667 print
668
669 def dump_symbol_names_matching_regex(self, regex, file=None):
670 for i in range(self.nfat_arch):
671 self.archs[i].mach.dump_symbol_names_matching_regex(regex, file)
672
673 class ArchInfo:
674
675 def __init__(self):
676 self.arch = Mach.Arch(0,0)
677 self.offset = 0
678 self.size = 0
679 self.align = 0
680 self.mach = None
681
682 def unpack(self, data):
683 # Universal headers are always in big endian
684 data.set_byte_order('big')
685 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align = data.get_n_uint32(5)
686
687 def dump_header(self, dump_description = True, options = None):
688 if options.verbose:
689 print "CPU SUBTYPE OFFSET SIZE ALIGN"
690 print "---------- ---------- ---------- ---------- ----------"
691 else:
692 print "ARCH FILEOFFSET FILESIZE ALIGN"
693 print "---------- ---------- ---------- ----------"
694 def dump_flat(self, options):
695 if options.verbose:
696 print "%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
697 else:
698 print "%-10s %#8.8x %#8.8x %#8.8x" % (self.arch, self.offset, self.size, self.align)
699 def dump(self):
700 print " cputype: %#8.8x" % self.arch.cpu
701 print "cpusubtype: %#8.8x" % self.arch.sub
702 print " offset: %#8.8x" % self.offset
703 print " size: %#8.8x" % self.size
704 print " align: %#8.8x" % self.align
705 def __str__(self):
706 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
707 def __repr__(self):
708 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
709
710 class Flags:
711
712 def __init__(self, b):
713 self.bits = b
714
715 def __str__(self):
716 s = ''
717 if self.bits & MH_NOUNDEFS:
718 s += 'MH_NOUNDEFS | '
719 if self.bits & MH_INCRLINK:
720 s += 'MH_INCRLINK | '
721 if self.bits & MH_DYLDLINK:
722 s += 'MH_DYLDLINK | '
723 if self.bits & MH_BINDATLOAD:
724 s += 'MH_BINDATLOAD | '
725 if self.bits & MH_PREBOUND:
726 s += 'MH_PREBOUND | '
727 if self.bits & MH_SPLIT_SEGS:
728 s += 'MH_SPLIT_SEGS | '
729 if self.bits & MH_LAZY_INIT:
730 s += 'MH_LAZY_INIT | '
731 if self.bits & MH_TWOLEVEL:
732 s += 'MH_TWOLEVEL | '
733 if self.bits & MH_FORCE_FLAT:
734 s += 'MH_FORCE_FLAT | '
735 if self.bits & MH_NOMULTIDEFS:
736 s += 'MH_NOMULTIDEFS | '
737 if self.bits & MH_NOFIXPREBINDING:
738 s += 'MH_NOFIXPREBINDING | '
739 if self.bits & MH_PREBINDABLE:
740 s += 'MH_PREBINDABLE | '
741 if self.bits & MH_ALLMODSBOUND:
742 s += 'MH_ALLMODSBOUND | '
743 if self.bits & MH_SUBSECTIONS_VIA_SYMBOLS:
744 s += 'MH_SUBSECTIONS_VIA_SYMBOLS | '
745 if self.bits & MH_CANONICAL:
746 s += 'MH_CANONICAL | '
747 if self.bits & MH_WEAK_DEFINES:
748 s += 'MH_WEAK_DEFINES | '
749 if self.bits & MH_BINDS_TO_WEAK:
750 s += 'MH_BINDS_TO_WEAK | '
751 if self.bits & MH_ALLOW_STACK_EXECUTION:
752 s += 'MH_ALLOW_STACK_EXECUTION | '
753 if self.bits & MH_ROOT_SAFE:
754 s += 'MH_ROOT_SAFE | '
755 if self.bits & MH_SETUID_SAFE:
756 s += 'MH_SETUID_SAFE | '
757 if self.bits & MH_NO_REEXPORTED_DYLIBS:
758 s += 'MH_NO_REEXPORTED_DYLIBS | '
759 if self.bits & MH_PIE:
760 s += 'MH_PIE | '
761 if self.bits & MH_DEAD_STRIPPABLE_DYLIB:
762 s += 'MH_DEAD_STRIPPABLE_DYLIB | '
763 if self.bits & MH_HAS_TLV_DESCRIPTORS:
764 s += 'MH_HAS_TLV_DESCRIPTORS | '
765 if self.bits & MH_NO_HEAP_EXECUTION:
766 s += 'MH_NO_HEAP_EXECUTION | '
767 # Strip the trailing " |" if we have any flags
768 if len(s) > 0:
769 s = s[0:-2]
770 return s
771
772 class FileType(dict_utils.Enum):
773
774 enum = {
775 'MH_OBJECT' : MH_OBJECT ,
776 'MH_EXECUTE' : MH_EXECUTE ,
777 'MH_FVMLIB' : MH_FVMLIB ,
778 'MH_CORE' : MH_CORE ,
779 'MH_PRELOAD' : MH_PRELOAD ,
780 'MH_DYLIB' : MH_DYLIB ,
781 'MH_DYLINKER' : MH_DYLINKER ,
782 'MH_BUNDLE' : MH_BUNDLE ,
783 'MH_DYLIB_STUB' : MH_DYLIB_STUB ,
784 'MH_DSYM' : MH_DSYM ,
785 'MH_KEXT_BUNDLE' : MH_KEXT_BUNDLE
786 }
787
788 def __init__(self, initial_value = 0):
789 dict_utils.Enum.__init__(self, initial_value, self.enum)
790
791 class Skinny:
792
793 def __init__(self, path):
794 self.path = path
795 self.type = 'skinny'
796 self.data = None
797 self.file_off = 0
798 self.magic = 0
799 self.arch = Mach.Arch(0,0)
800 self.filetype = Mach.FileType(0)
801 self.ncmds = 0
802 self.sizeofcmds = 0
803 self.flags = Mach.Flags(0)
804 self.uuid = None
805 self.commands = list()
806 self.segments = list()
807 self.sections = list()
808 self.symbols = list()
809 self.sections.append(Mach.Section())
810
811 def description(self):
812 return '%#8.8x: %s (%s)' % (self.file_off, self.path, self.arch)
813
814 def unpack(self, data, magic = None):
815 self.data = data
816 self.file_off = data.tell()
817 if magic is None:
818 self.magic = Mach.Magic()
819 self.magic.unpack(data)
820 else:
821 self.magic = magic
822 self.file_off = self.file_off - 4
823 data.set_byte_order(self.magic.get_byte_order())
824 self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, bits = data.get_n_uint32(6)
825 self.flags.bits = bits
826
827 if self.is_64_bit():
828 data.get_uint32() # Skip reserved word in mach_header_64
829
830 for i in range(0,self.ncmds):
831 lc = self.unpack_load_command (data)
832 self.commands.append (lc)
833
834 def get_data(self):
835 if self.data:
836 self.data.set_byte_order(self.magic.get_byte_order())
837 return self.data
838 return None
839
840 def unpack_load_command (self, data):
841 lc = Mach.LoadCommand()
842 lc.unpack (self, data)
843 lc_command = lc.command.get_enum_value();
844 if (lc_command == LC_SEGMENT or
845 lc_command == LC_SEGMENT_64):
846 lc = Mach.SegmentLoadCommand(lc)
847 lc.unpack(self, data)
848 elif (lc_command == LC_LOAD_DYLIB or
849 lc_command == LC_ID_DYLIB or
850 lc_command == LC_LOAD_WEAK_DYLIB or
851 lc_command == LC_REEXPORT_DYLIB):
852 lc = Mach.DylibLoadCommand(lc)
853 lc.unpack(self, data)
854 elif (lc_command == LC_LOAD_DYLINKER or
855 lc_command == LC_SUB_FRAMEWORK or
856 lc_command == LC_SUB_CLIENT or
857 lc_command == LC_SUB_UMBRELLA or
858 lc_command == LC_SUB_LIBRARY or
859 lc_command == LC_ID_DYLINKER or
860 lc_command == LC_RPATH):
861 lc = Mach.LoadDYLDLoadCommand(lc)
862 lc.unpack(self, data)
863 elif (lc_command == LC_DYLD_INFO_ONLY):
864 lc = Mach.DYLDInfoOnlyLoadCommand(lc)
865 lc.unpack(self, data)
866 elif (lc_command == LC_SYMTAB):
867 lc = Mach.SymtabLoadCommand(lc)
868 lc.unpack(self, data)
869 elif (lc_command == LC_DYSYMTAB):
870 lc = Mach.DYLDSymtabLoadCommand(lc)
871 lc.unpack(self, data)
872 elif (lc_command == LC_UUID):
873 lc = Mach.UUIDLoadCommand(lc)
874 lc.unpack(self, data)
875 elif (lc_command == LC_CODE_SIGNATURE or
876 lc_command == LC_SEGMENT_SPLIT_INFO or
877 lc_command == LC_FUNCTION_STARTS):
878 lc = Mach.DataBlobLoadCommand(lc)
879 lc.unpack(self, data)
880 elif (lc_command == LC_UNIXTHREAD):
881 lc = Mach.UnixThreadLoadCommand(lc)
882 lc.unpack(self, data)
883 elif (lc_command == LC_ENCRYPTION_INFO):
884 lc = Mach.EncryptionInfoLoadCommand(lc)
885 lc.unpack(self, data)
886 lc.skip(data)
887 return lc
888
889 def compare(self, rhs):
890 print "\nComparing:"
891 print "a) %s %s" % (self.arch, self.path)
892 print "b) %s %s" % (rhs.arch, rhs.path)
893 result = True
894 if self.type == rhs.type:
895 for lhs_section in self.sections[1:]:
896 rhs_section = rhs.get_section_by_section(lhs_section)
897 if rhs_section:
898 print 'comparing %s.%s...' % (lhs_section.segname, lhs_section.sectname),
899 sys.stdout.flush()
900 lhs_data = lhs_section.get_contents (self)
901 rhs_data = rhs_section.get_contents (rhs)
902 if lhs_data and rhs_data:
903 if lhs_data == rhs_data:
904 print 'ok'
905 else:
906 lhs_data_len = len(lhs_data)
907 rhs_data_len = len(rhs_data)
908 # if lhs_data_len < rhs_data_len:
909 # if lhs_data == rhs_data[0:lhs_data_len]:
910 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
911 # else:
912 # # TODO: check padding
913 # result = False
914 # elif lhs_data_len > rhs_data_len:
915 # if lhs_data[0:rhs_data_len] == rhs_data:
916 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
917 # else:
918 # # TODO: check padding
919 # result = False
920 # else:
921 result = False
922 print 'error: sections differ'
923 #print 'a) %s' % (lhs_section)
924 # dump_hex_byte_string_diff(0, lhs_data, rhs_data)
925 #print 'b) %s' % (rhs_section)
926 # dump_hex_byte_string_diff(0, rhs_data, lhs_data)
927 elif lhs_data and not rhs_data:
928 print 'error: section data missing from b:'
929 print 'a) %s' % (lhs_section)
930 print 'b) %s' % (rhs_section)
931 result = False
932 elif not lhs_data and rhs_data:
933 print 'error: section data missing from a:'
934 print 'a) %s' % (lhs_section)
935 print 'b) %s' % (rhs_section)
936 result = False
937 elif lhs_section.offset or rhs_section.offset:
938 print 'error: section data missing for both a and b:'
939 print 'a) %s' % (lhs_section)
940 print 'b) %s' % (rhs_section)
941 result = False
942 else:
943 print 'ok'
944 else:
945 result = False
946 print 'error: section %s is missing in %s' % (lhs_section.sectname, rhs.path)
947 else:
948 print 'error: comaparing a %s mach-o file with a %s mach-o file is not supported' % (self.type, rhs.type)
949 result = False
950 if not result:
951 print 'error: mach files differ'
952 return result
953 def dump_header(self, dump_description = True, options = None):
954 if options.verbose:
955 print "MAGIC CPU SUBTYPE FILETYPE NUM CMDS SIZE CMDS FLAGS"
956 print "---------- ---------- ---------- ---------- -------- ---------- ----------"
957 else:
958 print "MAGIC ARCH FILETYPE NUM CMDS SIZE CMDS FLAGS"
959 print "------------ ---------- -------------- -------- ---------- ----------"
960
961 def dump_flat(self, options):
962 if options.verbose:
963 print "%#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x" % (self.magic, self.arch.cpu , self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, self.flags.bits)
964 else:
965 print "%-12s %-10s %-14s %#8u %#8.8x %s" % (self.magic, self.arch, self.filetype, self.ncmds, self.sizeofcmds, self.flags)
966
967 def dump(self, options):
968 if options.dump_header:
969 self.dump_header(True, options)
970 if options.dump_load_commands:
971 self.dump_load_commands(False, options)
972 if options.dump_sections:
973 self.dump_sections(False, options)
974 if options.section_names:
975 self.dump_section_contents(options)
976 if options.dump_symtab:
977 self.get_symtab()
978 if len(self.symbols):
979 self.dump_sections(False, options)
980 else:
981 print "No symbols"
982 if options.find_mangled:
983 self.dump_symbol_names_matching_regex (re.compile('^_?_Z'))
984
985 def dump_header(self, dump_description = True, options = None):
986 if dump_description:
987 print self.description()
988 print "Mach Header"
989 print " magic: %#8.8x %s" % (self.magic.value, self.magic)
990 print " cputype: %#8.8x %s" % (self.arch.cpu, self.arch)
991 print " cpusubtype: %#8.8x" % self.arch.sub
992 print " filetype: %#8.8x %s" % (self.filetype.get_enum_value(), self.filetype.get_enum_name())
993 print " ncmds: %#8.8x %u" % (self.ncmds, self.ncmds)
994 print " sizeofcmds: %#8.8x" % self.sizeofcmds
995 print " flags: %#8.8x %s" % (self.flags.bits, self.flags)
996
997 def dump_load_commands(self, dump_description = True, options = None):
998 if dump_description:
999 print self.description()
1000 for lc in self.commands:
1001 print lc
1002
1003 def get_section_by_name (self, name):
1004 for section in self.sections:
1005 if section.sectname and section.sectname == name:
1006 return section
1007 return None
1008
1009 def get_section_by_section (self, other_section):
1010 for section in self.sections:
1011 if section.sectname == other_section.sectname and section.segname == other_section.segname:
1012 return section
1013 return None
1014
1015 def dump_sections(self, dump_description = True, options = None):
1016 if dump_description:
1017 print self.description()
1018 num_sections = len(self.sections)
1019 if num_sections > 1:
1020 self.sections[1].dump_header()
1021 for sect_idx in range(1,num_sections):
1022 print "%s" % self.sections[sect_idx]
1023
1024 def dump_section_contents(self, options):
1025 saved_section_to_disk = False
1026 for sectname in options.section_names:
1027 section = self.get_section_by_name(sectname)
1028 if section:
1029 sect_bytes = section.get_contents (self)
1030 if options.outfile:
1031 if not saved_section_to_disk:
1032 outfile = open(options.outfile, 'w')
1033 if options.extract_modules:
1034 #print "Extracting modules from mach file..."
1035 data = file_extract.FileExtract(StringIO.StringIO(sect_bytes), self.data.byte_order)
1036 version = data.get_uint32()
1037 num_modules = data.get_uint32()
1038 #print "version = %u, num_modules = %u" % (version, num_modules)
1039 for i in range(num_modules):
1040 data_offset = data.get_uint64()
1041 data_size = data.get_uint64()
1042 name_offset = data.get_uint32()
1043 language = data.get_uint32()
1044 flags = data.get_uint32()
1045 data.seek (name_offset)
1046 module_name = data.get_c_string()
1047 #print "module[%u] data_offset = %#16.16x, data_size = %#16.16x, name_offset = %#16.16x (%s), language = %u, flags = %#x" % (i, data_offset, data_size, name_offset, module_name, language, flags)
1048 data.seek (data_offset)
1049 outfile.write(data.read_size (data_size))
1050 else:
1051 print "Saving section %s to '%s'" % (sectname, options.outfile)
1052 outfile.write(sect_bytes)
1053 outfile.close()
1054 saved_section_to_disk = True
1055 else:
1056 print "error: you can only save a single section to disk at a time, skipping section '%s'" % (sectname)
1057 else:
1058 print 'section %s:\n' % (sectname)
1059 section.dump_header()
1060 print '%s\n' % (section)
1061 dump_memory (0, sect_bytes, options.max_count, 16)
1062 else:
1063 print 'error: no section named "%s" was found' % (sectname)
1064
1065 def get_segment(self, segname):
1066 if len(self.segments) == 1 and self.segments[0].segname == '':
1067 return self.segments[0]
1068 for segment in self.segments:
1069 if segment.segname == segname:
1070 return segment
1071 return None
1072
1073 def get_first_load_command(self, lc_enum_value):
1074 for lc in self.commands:
1075 if lc.command.value == lc_enum_value:
1076 return lc
1077 return None
1078
1079 def get_symtab(self):
1080 if self.data and not self.symbols:
1081 lc_symtab = self.get_first_load_command (LC_SYMTAB)
1082 if lc_symtab:
1083 symtab_offset = self.file_off
1084 if self.data.is_in_memory():
1085 linkedit_segment = self.get_segment('__LINKEDIT')
1086 if linkedit_segment:
1087 linkedit_vmaddr = linkedit_segment.vmaddr
1088 linkedit_fileoff = linkedit_segment.fileoff
1089 symtab_offset = linkedit_vmaddr + lc_symtab.symoff - linkedit_fileoff
1090 symtab_offset = linkedit_vmaddr + lc_symtab.stroff - linkedit_fileoff
1091 else:
1092 symtab_offset += lc_symtab.symoff
1093
1094 self.data.seek (symtab_offset)
1095 is_64 = self.is_64_bit()
1096 for i in range(lc_symtab.nsyms):
1097 nlist = Mach.NList()
1098 nlist.unpack (self, self.data, lc_symtab)
1099 self.symbols.append(nlist)
1100 else:
1101 print "no LC_SYMTAB"
1102
1103 def dump_symtab(self, dump_description = True, options = None):
1104 self.get_symtab()
1105 if dump_description:
1106 print self.description()
1107 for i, symbol in enumerate(self.symbols):
1108 print '[%5u] %s' % (i, symbol)
1109
1110 def dump_symbol_names_matching_regex(self, regex, file=None):
1111 self.get_symtab()
1112 for symbol in self.symbols:
1113 if symbol.name and regex.search (symbol.name):
1114 print symbol.name
1115 if file:
1116 file.write('%s\n' % (symbol.name))
1117
1118 def is_64_bit(self):
1119 return self.magic.is_64_bit()
1120
1121 class LoadCommand:
1122 class Command(dict_utils.Enum):
1123 enum = {
1124 'LC_SEGMENT' : LC_SEGMENT,
1125 'LC_SYMTAB' : LC_SYMTAB,
1126 'LC_SYMSEG' : LC_SYMSEG,
1127 'LC_THREAD' : LC_THREAD,
1128 'LC_UNIXTHREAD' : LC_UNIXTHREAD,
1129 'LC_LOADFVMLIB' : LC_LOADFVMLIB,
1130 'LC_IDFVMLIB' : LC_IDFVMLIB,
1131 'LC_IDENT' : LC_IDENT,
1132 'LC_FVMFILE' : LC_FVMFILE,
1133 'LC_PREPAGE' : LC_PREPAGE,
1134 'LC_DYSYMTAB' : LC_DYSYMTAB,
1135 'LC_LOAD_DYLIB' : LC_LOAD_DYLIB,
1136 'LC_ID_DYLIB' : LC_ID_DYLIB,
1137 'LC_LOAD_DYLINKER' : LC_LOAD_DYLINKER,
1138 'LC_ID_DYLINKER' : LC_ID_DYLINKER,
1139 'LC_PREBOUND_DYLIB' : LC_PREBOUND_DYLIB,
1140 'LC_ROUTINES' : LC_ROUTINES,
1141 'LC_SUB_FRAMEWORK' : LC_SUB_FRAMEWORK,
1142 'LC_SUB_UMBRELLA' : LC_SUB_UMBRELLA,
1143 'LC_SUB_CLIENT' : LC_SUB_CLIENT,
1144 'LC_SUB_LIBRARY' : LC_SUB_LIBRARY,
1145 'LC_TWOLEVEL_HINTS' : LC_TWOLEVEL_HINTS,
1146 'LC_PREBIND_CKSUM' : LC_PREBIND_CKSUM,
1147 'LC_LOAD_WEAK_DYLIB' : LC_LOAD_WEAK_DYLIB,
1148 'LC_SEGMENT_64' : LC_SEGMENT_64,
1149 'LC_ROUTINES_64' : LC_ROUTINES_64,
1150 'LC_UUID' : LC_UUID,
1151 'LC_RPATH' : LC_RPATH,
1152 'LC_CODE_SIGNATURE' : LC_CODE_SIGNATURE,
1153 'LC_SEGMENT_SPLIT_INFO' : LC_SEGMENT_SPLIT_INFO,
1154 'LC_REEXPORT_DYLIB' : LC_REEXPORT_DYLIB,
1155 'LC_LAZY_LOAD_DYLIB' : LC_LAZY_LOAD_DYLIB,
1156 'LC_ENCRYPTION_INFO' : LC_ENCRYPTION_INFO,
1157 'LC_DYLD_INFO' : LC_DYLD_INFO,
1158 'LC_DYLD_INFO_ONLY' : LC_DYLD_INFO_ONLY,
1159 'LC_LOAD_UPWARD_DYLIB' : LC_LOAD_UPWARD_DYLIB,
1160 'LC_VERSION_MIN_MACOSX' : LC_VERSION_MIN_MACOSX,
1161 'LC_VERSION_MIN_IPHONEOS' : LC_VERSION_MIN_IPHONEOS,
1162 'LC_FUNCTION_STARTS' : LC_FUNCTION_STARTS,
1163 'LC_DYLD_ENVIRONMENT' : LC_DYLD_ENVIRONMENT
1164 }
1165
1166 def __init__(self, initial_value = 0):
1167 dict_utils.Enum.__init__(self, initial_value, self.enum)
1168
1169
1170 def __init__(self, c=None, l=0,o=0):
1171 if c != None:
1172 self.command = c
1173 else:
1174 self.command = Mach.LoadCommand.Command(0)
1175 self.length = l
1176 self.file_off = o
1177
1178 def unpack(self, mach_file, data):
1179 self.file_off = data.tell()
1180 self.command.value, self.length = data.get_n_uint32(2)
1181
1182 def skip(self, data):
1183 data.seek (self.file_off + self.length, 0)
1184
1185 def __str__(self):
1186 lc_name = self.command.get_enum_name()
1187 return '%#8.8x: <%#4.4x> %-24s' % (self.file_off, self.length, lc_name)
1188
1189 class Section:
1190
1191 def __init__(self):
1192 self.index = 0
1193 self.is_64 = False
1194 self.sectname = None
1195 self.segname = None
1196 self.addr = 0
1197 self.size = 0
1198 self.offset = 0
1199 self.align = 0
1200 self.reloff = 0
1201 self.nreloc = 0
1202 self.flags = 0
1203 self.reserved1 = 0
1204 self.reserved2 = 0
1205 self.reserved3 = 0
1206
1207 def unpack(self, is_64, data):
1208 self.is_64 = is_64
1209 self.sectname = data.get_fixed_length_c_string (16, '', True)
1210 self.segname = data.get_fixed_length_c_string (16, '', True)
1211 if self.is_64:
1212 self.addr, self.size = data.get_n_uint64(2)
1213 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3 = data.get_n_uint32(8)
1214 else:
1215 self.addr, self.size = data.get_n_uint32(2)
1216 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2 = data.get_n_uint32(7)
1217
1218 def dump_header(self):
1219 if self.is_64:
1220 print "INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 RESERVED3 NAME";
1221 print "===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------";
1222 else:
1223 print "INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 NAME";
1224 print "===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------";
1225
1226 def __str__(self):
1227 if self.is_64:
1228 return "[%3u] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3, self.segname, self.sectname)
1229 else:
1230 return "[%3u] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.segname, self.sectname)
1231
1232 def get_contents(self, mach_file):
1233 '''Get the section contents as a python string'''
1234 if self.size > 0 and mach_file.get_segment(self.segname).filesize > 0:
1235 data = mach_file.get_data()
1236 if data:
1237 section_data_offset = mach_file.file_off + self.offset
1238 #print '%s.%s is at offset 0x%x with size 0x%x' % (self.segname, self.sectname, section_data_offset, self.size)
1239 data.push_offset_and_seek (section_data_offset)
1240 bytes = data.read_size(self.size)
1241 data.pop_offset_and_seek()
1242 return bytes
1243 return None
1244
1245 class DylibLoadCommand(LoadCommand):
1246 def __init__(self, lc):
1247 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1248 self.name = None
1249 self.timestamp = 0
1250 self.current_version = 0
1251 self.compatibility_version = 0
1252
1253 def unpack(self, mach_file, data):
1254 byte_order_char = mach_file.magic.get_byte_order()
1255 name_offset, self.timestamp, self.current_version, self.compatibility_version = data.get_n_uint32(4)
1256 data.seek(self.file_off + name_offset, 0)
1257 self.name = data.get_fixed_length_c_string(self.length - 24)
1258
1259 def __str__(self):
1260 s = Mach.LoadCommand.__str__(self);
1261 s += "%#8.8x %#8.8x %#8.8x " % (self.timestamp, self.current_version, self.compatibility_version)
1262 s += self.name
1263 return s
1264
1265 class LoadDYLDLoadCommand(LoadCommand):
1266 def __init__(self, lc):
1267 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1268 self.name = None
1269
1270 def unpack(self, mach_file, data):
1271 data.get_uint32()
1272 self.name = data.get_fixed_length_c_string(self.length - 12)
1273
1274 def __str__(self):
1275 s = Mach.LoadCommand.__str__(self);
1276 s += "%s" % self.name
1277 return s
1278
1279 class UnixThreadLoadCommand(LoadCommand):
1280 class ThreadState:
1281 def __init__(self):
1282 self.flavor = 0
1283 self.count = 0
1284 self.register_values = list()
1285
1286 def unpack(self, data):
1287 self.flavor, self.count = data.get_n_uint32(2)
1288 self.register_values = data.get_n_uint32(self.count)
1289
1290 def __str__(self):
1291 s = "flavor = %u, count = %u, regs =" % (self.flavor, self.count)
1292 i = 0
1293 for register_value in self.register_values:
1294 if i % 8 == 0:
1295 s += "\n "
1296 s += " %#8.8x" % register_value
1297 i += 1
1298 return s
1299
1300 def __init__(self, lc):
1301 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1302 self.reg_sets = list()
1303
1304 def unpack(self, mach_file, data):
1305 reg_set = Mach.UnixThreadLoadCommand.ThreadState()
1306 reg_set.unpack (data)
1307 self.reg_sets.append(reg_set)
1308
1309 def __str__(self):
1310 s = Mach.LoadCommand.__str__(self);
1311 for reg_set in self.reg_sets:
1312 s += "%s" % reg_set
1313 return s
1314
1315 class DYLDInfoOnlyLoadCommand(LoadCommand):
1316 def __init__(self, lc):
1317 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1318 self.rebase_off = 0
1319 self.rebase_size = 0
1320 self.bind_off = 0
1321 self.bind_size = 0
1322 self.weak_bind_off = 0
1323 self.weak_bind_size = 0
1324 self.lazy_bind_off = 0
1325 self.lazy_bind_size = 0
1326 self.export_off = 0
1327 self.export_size = 0
1328
1329 def unpack(self, mach_file, data):
1330 byte_order_char = mach_file.magic.get_byte_order()
1331 self.rebase_off, self.rebase_size, self.bind_off, self.bind_size, self.weak_bind_off, self.weak_bind_size, self.lazy_bind_off, self.lazy_bind_size, self.export_off, self.export_size = data.get_n_uint32(10)
1332
1333 def __str__(self):
1334 s = Mach.LoadCommand.__str__(self);
1335 s += "rebase_off = %#8.8x, rebase_size = %u, " % (self.rebase_off, self.rebase_size)
1336 s += "bind_off = %#8.8x, bind_size = %u, " % (self.bind_off, self.bind_size)
1337 s += "weak_bind_off = %#8.8x, weak_bind_size = %u, " % (self.weak_bind_off, self.weak_bind_size)
1338 s += "lazy_bind_off = %#8.8x, lazy_bind_size = %u, " % (self.lazy_bind_off, self.lazy_bind_size)
1339 s += "export_off = %#8.8x, export_size = %u, " % (self.export_off, self.export_size)
1340 return s
1341
1342 class DYLDSymtabLoadCommand(LoadCommand):
1343 def __init__(self, lc):
1344 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1345 self.ilocalsym = 0
1346 self.nlocalsym = 0
1347 self.iextdefsym = 0
1348 self.nextdefsym = 0
1349 self.iundefsym = 0
1350 self.nundefsym = 0
1351 self.tocoff = 0
1352 self.ntoc = 0
1353 self.modtaboff = 0
1354 self.nmodtab = 0
1355 self.extrefsymoff = 0
1356 self.nextrefsyms = 0
1357 self.indirectsymoff = 0
1358 self.nindirectsyms = 0
1359 self.extreloff = 0
1360 self.nextrel = 0
1361 self.locreloff = 0
1362 self.nlocrel = 0
1363
1364 def unpack(self, mach_file, data):
1365 byte_order_char = mach_file.magic.get_byte_order()
1366 self.ilocalsym, self.nlocalsym, self.iextdefsym, self.nextdefsym, self.iundefsym, self.nundefsym, self.tocoff, self.ntoc, self.modtaboff, self.nmodtab, self.extrefsymoff, self.nextrefsyms, self.indirectsymoff, self.nindirectsyms, self.extreloff, self.nextrel, self.locreloff, self.nlocrel = data.get_n_uint32(18)
1367
1368 def __str__(self):
1369 s = Mach.LoadCommand.__str__(self);
1370 # s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym)
1371 # s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym)
1372 # s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym)
1373 # s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc)
1374 # s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab)
1375 # s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms)
1376 # s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms)
1377 # s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel)
1378 # s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff, self.nlocrel)
1379 s += "ilocalsym = %-10u, nlocalsym = %u\n" % (self.ilocalsym, self.nlocalsym)
1380 s += " iextdefsym = %-10u, nextdefsym = %u\n" % (self.iextdefsym, self.nextdefsym)
1381 s += " iundefsym = %-10u, nundefsym = %u\n" % (self.iundefsym, self.nundefsym)
1382 s += " tocoff = %#8.8x, ntoc = %u\n" % (self.tocoff, self.ntoc)
1383 s += " modtaboff = %#8.8x, nmodtab = %u\n" % (self.modtaboff, self.nmodtab)
1384 s += " extrefsymoff = %#8.8x, nextrefsyms = %u\n" % (self.extrefsymoff, self.nextrefsyms)
1385 s += " indirectsymoff = %#8.8x, nindirectsyms = %u\n" % (self.indirectsymoff, self.nindirectsyms)
1386 s += " extreloff = %#8.8x, nextrel = %u\n" % (self.extreloff, self.nextrel)
1387 s += " locreloff = %#8.8x, nlocrel = %u" % (self.locreloff, self.nlocrel)
1388 return s
1389
1390 class SymtabLoadCommand(LoadCommand):
1391 def __init__(self, lc):
1392 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1393 self.symoff = 0
1394 self.nsyms = 0
1395 self.stroff = 0
1396 self.strsize = 0
1397
1398 def unpack(self, mach_file, data):
1399 byte_order_char = mach_file.magic.get_byte_order()
1400 self.symoff, self.nsyms, self.stroff, self.strsize = data.get_n_uint32(4)
1401
1402 def __str__(self):
1403 s = Mach.LoadCommand.__str__(self);
1404 s += "symoff = %#8.8x, nsyms = %u, stroff = %#8.8x, strsize = %u" % (self.symoff, self.nsyms, self.stroff, self.strsize)
1405 return s
1406
1407
1408 class UUIDLoadCommand(LoadCommand):
1409 def __init__(self, lc):
1410 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1411 self.uuid = None
1412
1413 def unpack(self, mach_file, data):
1414 uuid_data = data.get_n_uint8(16)
1415 uuid_str = ''
1416 for byte in uuid_data:
1417 uuid_str += '%2.2x' % byte
1418 self.uuid = uuid.UUID(uuid_str)
1419 mach_file.uuid = self.uuid
1420
1421 def __str__(self):
1422 s = Mach.LoadCommand.__str__(self);
1423 s += self.uuid.__str__()
1424 return s
1425
1426 class DataBlobLoadCommand(LoadCommand):
1427 def __init__(self, lc):
1428 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1429 self.dataoff = 0
1430 self.datasize = 0
1431
1432 def unpack(self, mach_file, data):
1433 byte_order_char = mach_file.magic.get_byte_order()
1434 self.dataoff, self.datasize = data.get_n_uint32(2)
1435
1436 def __str__(self):
1437 s = Mach.LoadCommand.__str__(self);
1438 s += "dataoff = %#8.8x, datasize = %u" % (self.dataoff, self.datasize)
1439 return s
1440
1441 class EncryptionInfoLoadCommand(LoadCommand):
1442 def __init__(self, lc):
1443 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1444 self.cryptoff = 0
1445 self.cryptsize = 0
1446 self.cryptid = 0
1447
1448 def unpack(self, mach_file, data):
1449 byte_order_char = mach_file.magic.get_byte_order()
1450 self.cryptoff, self.cryptsize, self.cryptid = data.get_n_uint32(3)
1451
1452 def __str__(self):
1453 s = Mach.LoadCommand.__str__(self);
1454 s += "file-range = [%#8.8x - %#8.8x), cryptsize = %u, cryptid = %u" % (self.cryptoff, self.cryptoff + self.cryptsize, self.cryptsize, self.cryptid)
1455 return s
1456
1457 class SegmentLoadCommand(LoadCommand):
1458
1459 def __init__(self, lc):
1460 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1461 self.segname = None
1462 self.vmaddr = 0
1463 self.vmsize = 0
1464 self.fileoff = 0
1465 self.filesize = 0
1466 self.maxprot = 0
1467 self.initprot = 0
1468 self.nsects = 0
1469 self.flags = 0
1470
1471 def unpack(self, mach_file, data):
1472 is_64 = self.command.get_enum_value() == LC_SEGMENT_64;
1473 self.segname = data.get_fixed_length_c_string (16, '', True)
1474 if is_64:
1475 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint64(4)
1476 else:
1477 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint32(4)
1478 self.maxprot, self.initprot, self.nsects, self.flags = data.get_n_uint32(4)
1479 mach_file.segments.append(self)
1480 for i in range(self.nsects):
1481 section = Mach.Section()
1482 section.unpack(is_64, data)
1483 section.index = len (mach_file.sections)
1484 mach_file.sections.append(section)
1485
1486
1487 def __str__(self):
1488 s = Mach.LoadCommand.__str__(self);
1489 if self.command.get_enum_value() == LC_SEGMENT:
1490 s += "%#8.8x %#8.8x %#8.8x %#8.8x " % (self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1491 else:
1492 s += "%#16.16x %#16.16x %#16.16x %#16.16x " % (self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1493 s += "%s %s %3u %#8.8x" % (vm_prot_names[self.maxprot], vm_prot_names[self.initprot], self.nsects, self.flags)
1494 s += ' ' + self.segname
1495 return s
1496
1497 class NList:
1498 class Type:
1499 class Stab(dict_utils.Enum):
1500 enum = {
1501 'N_GSYM' : N_GSYM ,
1502 'N_FNAME' : N_FNAME ,
1503 'N_FUN' : N_FUN ,
1504 'N_STSYM' : N_STSYM ,
1505 'N_LCSYM' : N_LCSYM ,
1506 'N_BNSYM' : N_BNSYM ,
1507 'N_OPT' : N_OPT ,
1508 'N_RSYM' : N_RSYM ,
1509 'N_SLINE' : N_SLINE ,
1510 'N_ENSYM' : N_ENSYM ,
1511 'N_SSYM' : N_SSYM ,
1512 'N_SO' : N_SO ,
1513 'N_OSO' : N_OSO ,
1514 'N_LSYM' : N_LSYM ,
1515 'N_BINCL' : N_BINCL ,
1516 'N_SOL' : N_SOL ,
1517 'N_PARAMS' : N_PARAMS ,
1518 'N_VERSION' : N_VERSION ,
1519 'N_OLEVEL' : N_OLEVEL ,
1520 'N_PSYM' : N_PSYM ,
1521 'N_EINCL' : N_EINCL ,
1522 'N_ENTRY' : N_ENTRY ,
1523 'N_LBRAC' : N_LBRAC ,
1524 'N_EXCL' : N_EXCL ,
1525 'N_RBRAC' : N_RBRAC ,
1526 'N_BCOMM' : N_BCOMM ,
1527 'N_ECOMM' : N_ECOMM ,
1528 'N_ECOML' : N_ECOML ,
1529 'N_LENG' : N_LENG
1530 }
1531
1532 def __init__(self, magic = 0):
1533 dict_utils.Enum.__init__(self, magic, self.enum)
1534
1535 def __init__(self, t = 0):
1536 self.value = t
1537
1538 def __str__(self):
1539 n_type = self.value
1540 if n_type & N_STAB:
1541 stab = Mach.NList.Type.Stab(self.value)
1542 return '%s' % stab
1543 else:
1544 type = self.value & N_TYPE
1545 type_str = ''
1546 if type == N_UNDF:
1547 type_str = 'N_UNDF'
1548 elif type == N_ABS:
1549 type_str = 'N_ABS '
1550 elif type == N_SECT:
1551 type_str = 'N_SECT'
1552 elif type == N_PBUD:
1553 type_str = 'N_PBUD'
1554 elif type == N_INDR:
1555 type_str = 'N_INDR'
1556 else:
1557 type_str = "??? (%#2.2x)" % type
1558 if n_type & N_PEXT:
1559 type_str += ' | PEXT'
1560 if n_type & N_EXT:
1561 type_str += ' | EXT '
1562 return type_str
1563
1564
1565 def __init__(self):
1566 self.index = 0
1567 self.name_offset = 0
1568 self.name = 0
1569 self.type = Mach.NList.Type()
1570 self.sect_idx = 0
1571 self.desc = 0
1572 self.value = 0
1573
1574 def unpack(self, mach_file, data, symtab_lc):
1575 self.index = len(mach_file.symbols)
1576 self.name_offset = data.get_uint32()
1577 self.type.value, self.sect_idx = data.get_n_uint8(2)
1578 self.desc = data.get_uint16()
1579 if mach_file.is_64_bit():
1580 self.value = data.get_uint64()
1581 else:
1582 self.value = data.get_uint32()
1583 data.push_offset_and_seek (mach_file.file_off + symtab_lc.stroff + self.name_offset)
1584 #print "get string for symbol[%u]" % self.index
1585 self.name = data.get_c_string()
1586 data.pop_offset_and_seek()
1587
1588 def __str__(self):
1589 name_display = ''
1590 if len(self.name):
1591 name_display = ' "%s"' % self.name
1592 return '%#8.8x %#2.2x (%-20s) %#2.2x %#4.4x %16.16x%s' % (self.name_offset, self.type.value, self.type, self.sect_idx, self.desc, self.value, name_display)
1593
1594
1595 class Interactive(cmd.Cmd):
1596 '''Interactive command interpreter to mach-o files.'''
1597
1598 def __init__(self, mach, options):
1599 cmd.Cmd.__init__(self)
1600 self.intro = 'Interactive mach-o command interpreter'
1601 self.prompt = 'mach-o: %s %% ' % mach.path
1602 self.mach = mach
1603 self.options = options
1604
1605 def default(self, line):
1606 '''Catch all for unknown command, which will exit the interpreter.'''
1607 print "uknown command: %s" % line
1608 return True
1609
1610 def do_q(self, line):
1611 '''Quit command'''
1612 return True
1613
1614 def do_quit(self, line):
1615 '''Quit command'''
1616 return True
1617
1618 def do_header(self, line):
1619 '''Dump mach-o file headers'''
1620 self.mach.dump_header(True, self.options)
1621 return False
1622
1623 def do_load(self, line):
1624 '''Dump all mach-o load commands'''
1625 self.mach.dump_load_commands(True, self.options)
1626 return False
1627
1628 def do_sections(self, line):
1629 '''Dump all mach-o sections'''
1630 self.mach.dump_sections(True, self.options)
1631 return False
1632
1633 def do_symtab(self, line):
1634 '''Dump all mach-o symbols in the symbol table'''
1635 self.mach.dump_symtab(True, self.options)
1636 return False
1637
1638if __name__ == '__main__':
1639 parser = optparse.OptionParser(description='A script that parses skinny and universal mach-o files.')
1640 parser.add_option('--arch', '-a', type='string', metavar='arch', dest='archs', action='append', help='specify one or more architectures by name')
1641 parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
1642 parser.add_option('-H', '--header', action='store_true', dest='dump_header', help='dump the mach-o file header', default=False)
1643 parser.add_option('-l', '--load-commands', action='store_true', dest='dump_load_commands', help='dump the mach-o load commands', default=False)
1644 parser.add_option('-s', '--symtab', action='store_true', dest='dump_symtab', help='dump the mach-o symbol table', default=False)
1645 parser.add_option('-S', '--sections', action='store_true', dest='dump_sections', help='dump the mach-o sections', default=False)
1646 parser.add_option('--section', type='string', metavar='sectname', dest='section_names', action='append', help='Specify one or more section names to dump', default=[])
1647 parser.add_option('-o', '--out', type='string', dest='outfile', help='Used in conjunction with the --section=NAME option to save a single section\'s data to disk.', default=False)
1648 parser.add_option('-i', '--interactive', action='store_true', dest='interactive', help='enable interactive mode', default=False)
1649 parser.add_option('-m', '--mangled', action='store_true', dest='find_mangled', help='dump all mangled names in a mach file', default=False)
1650 parser.add_option('-c', '--compare', action='store_true', dest='compare', help='compare two mach files', default=False)
1651 parser.add_option('-M', '--extract-modules', action='store_true', dest='extract_modules', help='Extract modules from file', default=False)
1652 parser.add_option('-C', '--count', type='int', dest='max_count', help='Sets the max byte count when dumping section data', default=-1)
1653
1654 (options, mach_files) = parser.parse_args()
1655 if options.extract_modules:
1656 if options.section_names:
1657 print "error: can't use --section option with the --extract-modules option"
1658 exit(1)
1659 if not options.outfile:
1660 print "error: the --output=FILE option must be specified with the --extract-modules option"
1661 exit(1)
1662 options.section_names.append("__apple_ast")
1663 if options.compare:
1664 if len(mach_files) == 2:
1665 mach_a = Mach()
1666 mach_b = Mach()
1667 mach_a.parse(mach_files[0])
1668 mach_b.parse(mach_files[1])
1669 mach_a.compare(mach_b)
1670 else:
1671 print 'error: --compare takes two mach files as arguments'
1672 else:
1673 if not (options.dump_header or options.dump_load_commands or options.dump_symtab or options.dump_sections or options.find_mangled or options.section_names):
1674 options.dump_header = True
1675 options.dump_load_commands = True
1676 if options.verbose:
1677 print 'options', options
1678 print 'mach_files', mach_files
1679 for path in mach_files:
1680 mach = Mach()
1681 mach.parse(path)
1682 if options.interactive:
1683 interpreter = Mach.Interactive(mach, options)
1684 interpreter.cmdloop()
1685 else:
1686 mach.dump(options)
1687