blob: 6cc7b1069b01ca44fa75fe3f77161beea3138bc5 [file] [log] [blame]
Carl Shapiro1fb86202011-06-27 17:43:13 -07001// Copyright 2011 Google Inc. All Rights Reserved.
2
3#ifndef ART_SRC_DEX_FILE_H_
4#define ART_SRC_DEX_FILE_H_
5
Brian Carlstrom7e49dca2011-07-22 18:07:34 -07006#include <map>
7
Brian Carlstrom578bbdc2011-07-21 14:07:47 -07008#include "globals.h"
Brian Carlstrom7e49dca2011-07-22 18:07:34 -07009#include "leb128.h"
10#include "logging.h"
11#include "scoped_ptr.h"
12#include "stringpiece.h"
13#include "strutil.h"
Carl Shapiro1fb86202011-06-27 17:43:13 -070014
15namespace art {
16
Carl Shapiro5fafe2b2011-07-09 15:34:41 -070017union JValue;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070018
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070019// TODO: move all of the macro functionality into the DexCache class.
Brian Carlstromf615a612011-07-23 12:50:34 -070020class DexFile {
Carl Shapiro1fb86202011-06-27 17:43:13 -070021 public:
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070022 static const byte kDexMagic[];
23 static const byte kDexMagicVersion[];
24 static const size_t kSha1DigestSize = 20;
Carl Shapiro80d4dde2011-06-28 16:24:07 -070025
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070026 static const byte kEncodedValueTypeMask = 0x1f; // 0b11111
27 static const byte kEncodedValueArgShift = 5;
28
29 // The value of an invalid index.
30 static const uint32_t kDexNoIndex = 0xFFFFFFFF;
31
32 enum ValueType {
33 kByte = 0x00,
34 kShort = 0x02,
35 kChar = 0x03,
36 kInt = 0x04,
37 kLong = 0x06,
38 kFloat = 0x10,
39 kDouble = 0x11,
40 kString = 0x17,
41 kType = 0x18,
42 kField = 0x19,
43 kMethod = 0x1a,
44 kEnum = 0x1b,
45 kArray = 0x1c,
46 kAnnotation = 0x1d,
47 kNull = 0x1e,
48 kBoolean = 0x1f
Brian Carlstrom578bbdc2011-07-21 14:07:47 -070049 };
Carl Shapiro1fb86202011-06-27 17:43:13 -070050
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070051 // Raw header_item.
52 struct Header {
53 uint8_t magic_[8];
54 uint32_t checksum_;
55 uint8_t signature_[kSha1DigestSize];
56 uint32_t file_size_; // length of entire file
57 uint32_t header_size_; // offset to start of next section
58 uint32_t endian_tag_;
59 uint32_t link_size_;
60 uint32_t link_off_;
61 uint32_t map_off_;
62 uint32_t string_ids_size_;
63 uint32_t string_ids_off_;
64 uint32_t type_ids_size_;
65 uint32_t type_ids_off_;
66 uint32_t proto_ids_size_;
67 uint32_t proto_ids_off_;
68 uint32_t field_ids_size_;
69 uint32_t field_ids_off_;
70 uint32_t method_ids_size_;
71 uint32_t method_ids_off_;
72 uint32_t class_defs_size_;
73 uint32_t class_defs_off_;
74 uint32_t data_size_;
75 uint32_t data_off_;
76 };
Carl Shapiro1fb86202011-06-27 17:43:13 -070077
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070078 // Raw string_id_item.
79 struct StringId {
80 uint32_t string_data_off_; // offset in bytes from the base address
81 };
82
83 // Raw type_id_item.
84 struct TypeId {
85 uint32_t descriptor_idx_; // index into string_ids
86 };
87
88 // Raw field_id_item.
89 struct FieldId {
Brian Carlstrom4a96b602011-07-26 16:40:23 -070090 uint16_t class_idx_; // index into type_ids_ list for defining class
91 uint16_t type_idx_; // index into type_ids_ for field type
92 uint32_t name_idx_; // index into string_ids_ for field name
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070093 };
94
95 // Raw method_id_item.
96 struct MethodId {
Brian Carlstrom4a96b602011-07-26 16:40:23 -070097 uint16_t class_idx_; // index into type_ids_ list for defining class
98 uint16_t proto_idx_; // index into proto_ids_ for method prototype
99 uint32_t name_idx_; // index into string_ids_ for method name
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700100 };
101
102 // Raw proto_id_item.
103 struct ProtoId {
104 uint32_t shorty_idx_; // index into string_ids for shorty descriptor
105 uint32_t return_type_idx_; // index into type_ids list for return type
106 uint32_t parameters_off_; // file offset to type_list for parameter types
107 };
108
109 // Raw class_def_item.
110 struct ClassDef {
Brian Carlstrom4a96b602011-07-26 16:40:23 -0700111 uint32_t class_idx_; // index into type_ids_ for this class
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700112 uint32_t access_flags_;
Brian Carlstrom4a96b602011-07-26 16:40:23 -0700113 uint32_t superclass_idx_; // index into type_ids_ for superclass
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700114 uint32_t interfaces_off_; // file offset to TypeList
Brian Carlstrom4a96b602011-07-26 16:40:23 -0700115 uint32_t source_file_idx_; // index into string_ids_ for source file name
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700116 uint32_t annotations_off_; // file offset to annotations_directory_item
117 uint32_t class_data_off_; // file offset to class_data_item
118 uint32_t static_values_off_; // file offset to EncodedArray
119 };
120
121 // Raw type_item.
122 struct TypeItem {
123 uint16_t type_idx_; // index into type_ids section
124 };
125
126 // Raw type_list.
127 class TypeList {
128 public:
129 uint32_t Size() const {
130 return size_;
131 }
132
133 const TypeItem& GetTypeItem(uint32_t idx) const {
134 CHECK_LT(idx, this->size_);
135 return this->list_[idx];
136 }
137
138 private:
139 uint32_t size_; // size of the list, in entries
140 TypeItem list_[1]; // elements of the list
141 };
142
143 class ParameterIterator { // TODO: stream
144 public:
Brian Carlstromf615a612011-07-23 12:50:34 -0700145 ParameterIterator(const DexFile& dex_file, const ProtoId& proto_id)
146 : dex_file_(dex_file), size_(0), pos_(0) {
147 type_list_ = dex_file_.GetProtoParameters(proto_id);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700148 if (type_list_ != NULL) {
149 size_ = type_list_->Size();
150 }
151 }
152 bool HasNext() const { return pos_ != size_; }
153 void Next() { ++pos_; }
154 const char* GetDescriptor() {
155 uint32_t type_idx = type_list_->GetTypeItem(pos_).type_idx_;
Brian Carlstromf615a612011-07-23 12:50:34 -0700156 return dex_file_.dexStringByTypeIdx(type_idx);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700157 }
158 private:
Brian Carlstromf615a612011-07-23 12:50:34 -0700159 const DexFile& dex_file_;
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700160 const TypeList* type_list_;
161 uint32_t size_;
162 uint32_t pos_;
163 DISALLOW_IMPLICIT_CONSTRUCTORS(ParameterIterator);
164 };
165
166 ParameterIterator* GetParameterIterator(const ProtoId& proto_id) const {
167 return new ParameterIterator(*this, proto_id);
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700168 }
169
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700170 const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const {
171 return dexStringByTypeIdx(proto_id.return_type_idx_);
Carl Shapiro1fb86202011-06-27 17:43:13 -0700172 }
173
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700174 // Raw code_item.
175 struct CodeItem {
176 uint16_t registers_size_;
177 uint16_t ins_size_;
178 uint16_t outs_size_;
179 uint16_t tries_size_;
180 uint32_t debug_info_off_; // file offset to debug info stream
181 uint32_t insns_size_; // size of the insns array, in 2 byte code units
182 uint16_t insns_[1];
183 };
184
185 // Partially decoded form of class_data_item.
186 struct ClassDataHeader {
187 uint32_t static_fields_size_; // the number of static fields
188 uint32_t instance_fields_size_; // the number of instance fields
189 uint32_t direct_methods_size_; // the number of direct methods
190 uint32_t virtual_methods_size_; // the number of virtual methods
191 };
192
193 // Decoded form of encoded_field.
194 struct Field {
195 uint32_t field_idx_; // index into the field_ids list for the identity of this field
196 uint32_t access_flags_; // access flags for the field
197 };
198
199 // Decoded form of encoded_method.
200 struct Method {
201 uint32_t method_idx_;
202 uint32_t access_flags_;
203 uint32_t code_off_;
204 };
205
206 // Opens a .dex file from the file system.
Brian Carlstromb0460ea2011-07-29 10:08:05 -0700207 static DexFile* OpenFile(const std::string& filename);
208
209 // Opens a .jar, .zip, or .apk file from the file system.
210 static DexFile* OpenZip(const std::string& filename);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700211
212 // Opens a .dex file from a new allocated pointer
Brian Carlstromf615a612011-07-23 12:50:34 -0700213 static DexFile* OpenPtr(byte* ptr, size_t length);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700214
215 // Closes a .dex file.
Brian Carlstromf615a612011-07-23 12:50:34 -0700216 virtual ~DexFile();
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700217
218 const Header& GetHeader() {
219 CHECK(header_ != NULL);
220 return *header_;
Carl Shapiro1fb86202011-06-27 17:43:13 -0700221 }
222
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700223 // Looks up a class definition by its class descriptor.
224 const ClassDef* FindClassDef(const StringPiece& descriptor) const;
225
226 // Returns the number of string identifiers in the .dex file.
227 size_t NumStringIds() const {
228 CHECK(header_ != NULL);
229 return header_->string_ids_size_;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700230 }
231
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700232 // Returns the number of type identifiers in the .dex file.
233 size_t NumTypeIds() const {
234 CHECK(header_ != NULL);
235 return header_->type_ids_size_;
Carl Shapiro5fafe2b2011-07-09 15:34:41 -0700236 }
237
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700238 // Returns the number of prototype identifiers in the .dex file.
239 size_t NumProtoIds() const {
240 CHECK(header_ != NULL);
241 return header_->proto_ids_size_;
Carl Shapiro5fafe2b2011-07-09 15:34:41 -0700242 }
243
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700244 // Returns the number of field identifiers in the .dex file.
245 size_t NumFieldIds() const {
246 CHECK(header_ != NULL);
247 return header_->field_ids_size_;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700248 }
249
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700250 // Returns the number of method identifiers in the .dex file.
251 size_t NumMethodIds() const {
252 CHECK(header_ != NULL);
253 return header_->method_ids_size_;
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700254 }
255
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700256 // Returns the number of class definitions in the .dex file.
257 size_t NumClassDefs() const {
258 CHECK(header_ != NULL);
259 return header_->class_defs_size_;
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700260 }
261
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700262 // Returns a pointer to the memory mapped class data.
263 // TODO: return a stream
264 const byte* GetClassData(const ClassDef& class_def) const {
265 if (class_def.class_data_off_ == 0) {
266 return NULL;
267 } else {
268 return base_ + class_def.class_data_off_;
269 }
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700270 }
271
Brian Carlstromf615a612011-07-23 12:50:34 -0700272 // Decodes the header section from the class data bytes.
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700273 ClassDataHeader ReadClassDataHeader(const byte** class_data) const {
274 CHECK(class_data != NULL);
275 ClassDataHeader header;
276 memset(&header, 0, sizeof(ClassDataHeader));
277 if (*class_data != NULL) {
278 header.static_fields_size_ = DecodeUnsignedLeb128(class_data);
279 header.instance_fields_size_ = DecodeUnsignedLeb128(class_data);
280 header.direct_methods_size_ = DecodeUnsignedLeb128(class_data);
281 header.virtual_methods_size_ = DecodeUnsignedLeb128(class_data);
282 }
283 return header;
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700284 }
285
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700286 // Returns the class descriptor string of a class definition.
287 const char* GetClassDescriptor(const ClassDef& class_def) const {
288 return dexStringByTypeIdx(class_def.class_idx_);
289 }
290
291 // Returns the StringId at the specified index.
292 const StringId& GetStringId(uint32_t idx) const {
293 CHECK_LT(idx, NumStringIds());
294 return string_ids_[idx];
295 }
296
297 // Returns the TypeId at the specified index.
298 const TypeId& GetTypeId(uint32_t idx) const {
299 CHECK_LT(idx, NumTypeIds());
300 return type_ids_[idx];
301 }
302
303 // Returns the FieldId at the specified index.
304 const FieldId& GetFieldId(uint32_t idx) const {
305 CHECK_LT(idx, NumFieldIds());
306 return field_ids_[idx];
307 }
308
309 // Returns the MethodId at the specified index.
310 const MethodId& GetMethodId(uint32_t idx) const {
311 CHECK_LT(idx, NumMethodIds());
312 return method_ids_[idx];
313 }
314
315 // Returns the ProtoId at the specified index.
316 const ProtoId& GetProtoId(uint32_t idx) const {
317 CHECK_LT(idx, NumProtoIds());
318 return proto_ids_[idx];
319 }
320
321 // Returns the ClassDef at the specified index.
322 const ClassDef& GetClassDef(uint32_t idx) const {
323 CHECK_LT(idx, NumClassDefs());
324 return class_defs_[idx];
325 }
326
327 const TypeList* GetInterfacesList(const ClassDef& class_def) const {
328 if (class_def.interfaces_off_ == 0) {
329 return NULL;
330 } else {
331 const byte* addr = base_ + class_def.interfaces_off_;
332 return reinterpret_cast<const TypeList*>(addr);
333 }
334 }
335
336 const CodeItem* GetCodeItem(const Method& method) const {
337 if (method.code_off_ == 0) {
338 return NULL; // native or abstract method
339 } else {
340 const byte* addr = base_ + method.code_off_;
341 return reinterpret_cast<const CodeItem*>(addr);
342 }
343 }
344
345 // Returns the short form method descriptor for the given prototype.
346 const char* GetShorty(uint32_t proto_idx) const {
347 const ProtoId& proto_id = GetProtoId(proto_idx);
348 return dexStringById(proto_id.shorty_idx_);
349 }
350
351 const TypeList* GetProtoParameters(const ProtoId& proto_id) const {
352 if (proto_id.parameters_off_ == 0) {
353 return NULL;
354 } else {
355 const byte* addr = base_ + proto_id.parameters_off_;
356 return reinterpret_cast<const TypeList*>(addr);
357 }
358 }
359
Carl Shapiro419ec7b2011-08-03 14:48:33 -0700360 char* CreateMethodDescriptor(uint32_t proto_idx,
361 int32_t* unicode_length) const;
362
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700363 const byte* GetEncodedArray(const ClassDef& class_def) const {
364 if (class_def.static_values_off_ == 0) {
365 return 0;
366 } else {
367 return base_ + class_def.static_values_off_;
368 }
369 }
370
371 int32_t GetStringLength(const StringId& string_id) const {
372 const byte* ptr = base_ + string_id.string_data_off_;
373 return DecodeUnsignedLeb128(&ptr);
374 }
375
376 ValueType ReadEncodedValue(const byte** encoded_value, JValue* value) const;
377
378 // From libdex...
379
380 // Returns a pointer to the UTF-8 string data referred to by the
381 // given string_id.
Carl Shapiro419ec7b2011-08-03 14:48:33 -0700382 const char* GetStringData(const StringId& string_id, int32_t* length) const {
383 CHECK(length != NULL);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700384 const byte* ptr = base_ + string_id.string_data_off_;
Carl Shapiro419ec7b2011-08-03 14:48:33 -0700385 *length = DecodeUnsignedLeb128(&ptr);
Brian Carlstrom0b138b22011-07-27 15:19:17 -0700386 return reinterpret_cast<const char*>(ptr);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700387 }
388
Carl Shapiro419ec7b2011-08-03 14:48:33 -0700389 const char* GetStringData(const StringId& string_id) const {
390 int32_t length;
391 return GetStringData(string_id, &length);
392 }
393
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700394 // return the UTF-8 encoded string with the specified string_id index
Carl Shapiro419ec7b2011-08-03 14:48:33 -0700395 const char* dexStringById(uint32_t idx, int32_t* unicode_length) const {
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700396 const StringId& string_id = GetStringId(idx);
Carl Shapiro419ec7b2011-08-03 14:48:33 -0700397 return GetStringData(string_id, unicode_length);
398 }
399
400 const char* dexStringById(uint32_t idx) const {
401 int32_t unicode_length;
402 return dexStringById(idx, &unicode_length);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700403 }
404
405 // Get the descriptor string associated with a given type index.
Carl Shapiro419ec7b2011-08-03 14:48:33 -0700406 const char* dexStringByTypeIdx(uint32_t idx, int32_t* unicode_length) const {
407 const TypeId& type_id = GetTypeId(idx);
408 return dexStringById(type_id.descriptor_idx_, unicode_length);
409 }
410
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700411 const char* dexStringByTypeIdx(uint32_t idx) const {
412 const TypeId& type_id = GetTypeId(idx);
413 return dexStringById(type_id.descriptor_idx_);
414 }
415
416 // TODO: encoded_field is actually a stream of bytes
417 void dexReadClassDataField(const byte** encoded_field,
Brian Carlstromf615a612011-07-23 12:50:34 -0700418 DexFile::Field* field,
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700419 uint32_t* last_idx) const {
420 uint32_t idx = *last_idx + DecodeUnsignedLeb128(encoded_field);
421 field->access_flags_ = DecodeUnsignedLeb128(encoded_field);
422 field->field_idx_ = idx;
423 *last_idx = idx;
424 }
425
426 // TODO: encoded_method is actually a stream of bytes
427 void dexReadClassDataMethod(const byte** encoded_method,
Brian Carlstromf615a612011-07-23 12:50:34 -0700428 DexFile::Method* method,
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700429 uint32_t* last_idx) const {
430 uint32_t idx = *last_idx + DecodeUnsignedLeb128(encoded_method);
431 method->access_flags_ = DecodeUnsignedLeb128(encoded_method);
432 method->code_off_ = DecodeUnsignedLeb128(encoded_method);
433 method->method_idx_ = idx;
434 *last_idx = idx;
435 }
436
437
438 // TODO: const reference
439 uint32_t dexGetIndexForClassDef(const ClassDef* class_def) const {
440 CHECK_GE(class_def, class_defs_);
441 CHECK_LT(class_def, class_defs_ + header_->class_defs_size_);
442 return class_def - class_defs_;
443 }
444
445 const char* dexGetSourceFile(const ClassDef& class_def) const {
446 if (class_def.source_file_idx_ == 0xffffffff) {
447 return NULL;
448 } else {
449 return dexStringById(class_def.source_file_idx_);
450 }
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700451 }
452
Carl Shapiro1fb86202011-06-27 17:43:13 -0700453 private:
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700454 // Helper class to deallocate underlying storage.
455 class Closer {
456 public:
457 virtual ~Closer();
458 };
459
460 // Helper class to deallocate mmap-backed .dex files.
461 class MmapCloser : public Closer {
462 public:
463 MmapCloser(void* addr, size_t length);
464 virtual ~MmapCloser();
465 private:
466 void* addr_;
467 size_t length_;
468 };
469
470 // Helper class for deallocating new/delete-backed .dex files.
471 class PtrCloser : public Closer {
472 public:
473 PtrCloser(byte* addr);
474 virtual ~PtrCloser();
475 private:
476 byte* addr_;
477 };
478
479 // Opens a .dex file at a the given address.
Brian Carlstromf615a612011-07-23 12:50:34 -0700480 static DexFile* Open(const byte* dex_file, size_t length, Closer* closer);
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700481
Brian Carlstromf615a612011-07-23 12:50:34 -0700482 DexFile(const byte* addr, size_t length, Closer* closer)
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700483 : base_(addr),
484 length_(length),
485 closer_(closer),
486 header_(0),
487 string_ids_(0),
488 type_ids_(0),
489 field_ids_(0),
490 method_ids_(0),
491 proto_ids_(0),
492 class_defs_(0) {}
493
494 // Top-level initializer that calls other Init methods.
495 bool Init();
496
497 // Caches pointers into to the various file sections.
498 void InitMembers();
499
500 // Builds the index of descriptors to class definitions.
501 void InitIndex();
502
503 // Returns true if the byte string equals the magic value.
504 bool CheckMagic(const byte* magic);
505
506 // Returns true if the header magic is of the expected value.
507 bool IsMagicValid();
508
509 // The index of descriptors to class definitions.
Brian Carlstromf615a612011-07-23 12:50:34 -0700510 typedef std::map<const StringPiece, const DexFile::ClassDef*> Index;
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700511 Index index_;
512
513 // The base address of the memory mapping.
514 const byte* base_;
515
516 // The size of the underlying memory allocation in bytes.
517 size_t length_;
518
519 // Helper object to free the underlying allocation.
520 scoped_ptr<Closer> closer_;
521
522 // Points to the header section.
523 const Header* header_;
524
525 // Points to the base of the string identifier list.
526 const StringId* string_ids_;
527
528 // Points to the base of the type identifier list.
529 const TypeId* type_ids_;
530
531 // Points to the base of the field identifier list.
532 const FieldId* field_ids_;
533
534 // Points to the base of the method identifier list.
535 const MethodId* method_ids_;
536
537 // Points to the base of the prototype identifier list.
538 const ProtoId* proto_ids_;
539
540 // Points to the base of the class definition list.
541 const ClassDef* class_defs_;
Carl Shapiro1fb86202011-06-27 17:43:13 -0700542};
543
544} // namespace art
545
546#endif // ART_SRC_DEX_FILE_H_