blob: 7e158e415d0a66656ea5d51165af27057a71e857 [file] [log] [blame]
Carl Shapiro1fb86202011-06-27 17:43:13 -07001// Copyright 2011 Google Inc. All Rights Reserved.
2
3#ifndef ART_SRC_RAW_DEX_FILE_H_
4#define ART_SRC_RAW_DEX_FILE_H_
5
6#include "src/globals.h"
7#include "src/leb128.h"
8#include "src/logging.h"
Carl Shapiro80d4dde2011-06-28 16:24:07 -07009#include "src/scoped_ptr.h"
Carl Shapiro1fb86202011-06-27 17:43:13 -070010#include "src/strutil.h"
11
12#include <map>
13
14namespace art {
15
16class RawDexFile {
17 public:
18 static const byte kDexMagic[];
19 static const byte kDexMagicVersion[];
20 static const size_t kSha1DigestSize = 20;
21
22 // Raw header_item.
23 struct Header {
Carl Shapiro80d4dde2011-06-28 16:24:07 -070024 uint8_t magic_[8];
25 uint32_t checksum_;
26 uint8_t signature_[kSha1DigestSize];
Carl Shapiro1fb86202011-06-27 17:43:13 -070027 uint32_t file_size_; // length of entire file
28 uint32_t header_size_; // offset to start of next section
29 uint32_t endian_tag_;
30 uint32_t link_size_;
31 uint32_t link_off_;
32 uint32_t map_off_;
33 uint32_t string_ids_size_;
34 uint32_t string_ids_off_;
35 uint32_t type_ids_size_;
36 uint32_t type_ids_off_;
37 uint32_t proto_ids_size_;
38 uint32_t proto_ids_off_;
39 uint32_t field_ids_size_;
40 uint32_t field_ids_off_;
41 uint32_t method_ids_size_;
42 uint32_t method_ids_off_;
43 uint32_t class_defs_size_;
44 uint32_t class_defs_off_;
45 uint32_t data_size_;
46 uint32_t data_off_;
47 };
48
49 // Raw string_id_item.
50 struct StringId {
51 uint32_t string_data_off_; // offset in bytes from the base address
52 };
53
54 // Raw type_id_item.
55 struct TypeId {
56 uint32_t descriptor_idx_; // index into string_ids
57 };
58
59 // Raw field_id_item.
60 struct FieldId {
Carl Shapiro3ee755d2011-06-28 12:11:04 -070061 uint16_t class_idx_; // index into typeIds list for defining class
62 uint16_t type_idx_; // index into typeIds for field type
Carl Shapiro1fb86202011-06-27 17:43:13 -070063 uint32_t name_idx_; // index into stringIds for field name
64 };
65
66 // Raw method_id_item.
67 struct MethodId {
Carl Shapiro3ee755d2011-06-28 12:11:04 -070068 uint16_t class_idx_; // index into typeIds list for defining class
69 uint16_t proto_idx_; // index into protoIds for method prototype
Carl Shapiro1fb86202011-06-27 17:43:13 -070070 uint32_t name_idx_; // index into stringIds for method name
71 };
72
73 // Raw proto_id_item.
74 struct ProtoId {
75 uint32_t shorty_idx_; // index into string_ids for shorty descriptor
76 uint32_t return_type_idx_; // index into type_ids list for return type
77 uint32_t parameters_off_; // file offset to type_list for parameter types
78 };
79
80 // Raw class_def_item.
81 struct ClassDef {
82 uint32_t class_idx_; // index into typeIds for this class
83 uint32_t access_flags_;
84 uint32_t superclass_idx_; // index into typeIds for superclass
85 uint32_t interfaces_off_; // file offset to TypeList
86 uint32_t source_file_idx_; // index into stringIds for source file name
87 uint32_t annotations_off_; // file offset to annotations_directory_item
88 uint32_t class_data_off_; // file offset to class_data_item
89 uint32_t static_values_off_; // file offset to EncodedArray
90 };
91
92 // Raw type_item.
93 struct TypeItem {
94 uint16_t type_idx_; // index into type_ids section
95 };
96
97 // Raw type_list.
98 class TypeList {
99 public:
100 uint32_t Size() const {
101 return size_;
102 }
103
104 const TypeItem& GetTypeItem(uint32_t idx) const {
105 CHECK_LT(idx, this->size_);
106 return this->list_[idx];
107 }
108
109 private:
110 uint32_t size_; // size of the list, in entries
111 TypeItem list_[1]; // elements of the list
112 };
113
114 // Raw code_item.
115 struct Code {
116 uint16_t registers_size_;
117 uint16_t ins_size_;
118 uint16_t outs_size_;
119 uint16_t tries_size_;
120 uint32_t debug_info_off_; // file offset to debug info stream
121 uint32_t insns_size_; // size of the insns array, in 2 byte code units
122 uint16_t insns_[1];
123 };
124
125 // Partially decoded form of class_data_item.
126 struct ClassDataHeader {
127 uint32_t static_fields_size_; // the number of static fields
128 uint32_t instance_fields_size_; // the number of instance fields
129 uint32_t direct_methods_size_; // the number of direct methods
130 uint32_t virtual_methods_size_; // the number of virtual methods
131 };
132
133 // Decoded form of encoded_field.
134 struct Field {
135 uint32_t field_idx_; // index into the field_ids list for the identity of this field
136 uint32_t access_flags_; // access flags for the field
137 };
138
139 // Decoded form of encoded_method.
140 struct Method {
141 uint32_t method_idx_;
142 uint32_t access_flags_;
143 uint32_t code_off_;
144 };
145
Carl Shapiro80d4dde2011-06-28 16:24:07 -0700146 // Helper class to deallocate underlying storage.
147 class Closer {
148 public:
149 virtual ~Closer();
150 };
151
152 // Opens a .dex file from the file system.
153 static RawDexFile* OpenFile(const char* filename);
154
155 // Opens a .dex file from a base64 encoded array.
Carl Shapiroa506cb02011-06-28 22:53:46 -0700156 // TODO: move this into the RawDexFile unit test
Carl Shapiro80d4dde2011-06-28 16:24:07 -0700157 static RawDexFile* OpenBase64(const char* base64);
158
159 // Opens a .dex file at a the given address.
Carl Shapiroa506cb02011-06-28 22:53:46 -0700160 static RawDexFile* Open(const byte* dex_file, size_t length, Closer* closer);
Carl Shapiro1fb86202011-06-27 17:43:13 -0700161
162 // Closes a .dex file.
Carl Shapiro80d4dde2011-06-28 16:24:07 -0700163 virtual ~RawDexFile();
Carl Shapiro1fb86202011-06-27 17:43:13 -0700164
Carl Shapiro3ee755d2011-06-28 12:11:04 -0700165 const Header& GetHeader() {
166 CHECK(header_ != NULL);
167 return *header_;
168 }
169
Carl Shapiro1fb86202011-06-27 17:43:13 -0700170 // Looks up a class definition by its class descriptor.
171 const ClassDef* FindClassDef(const char* descriptor);
172
173 // Returns the number of string identifiers in the .dex file.
174 size_t NumStringIds() const {
175 CHECK(header_ != NULL);
176 return header_->string_ids_size_;
177 }
178
179 // Returns the number of type identifiers in the .dex file.
180 size_t NumTypeIds() const {
181 CHECK(header_ != NULL);
182 return header_->type_ids_size_;
183 }
184
185 // Returns the number of prototype identifiers in the .dex file.
186 size_t NumProtoIds() const {
187 CHECK(header_ != NULL);
188 return header_->proto_ids_size_;
189 }
190
191 // Returns the number of field identifiers in the .dex file.
192 size_t NumFieldIds() const {
193 CHECK(header_ != NULL);
194 return header_->field_ids_size_;
195 }
196
197 // Returns the number of method identifiers in the .dex file.
198 size_t NumMethodIds() const {
199 CHECK(header_ != NULL);
200 return header_->method_ids_size_;
201 }
202
203 // Returns the number of class definitions in the .dex file.
204 size_t NumClassDefs() const {
205 CHECK(header_ != NULL);
206 return header_->class_defs_size_;
207 }
208
209 // Returns a pointer to the memory mapped class data.
210 const byte* GetClassData(const ClassDef& class_def) const {
211 if (class_def.class_data_off_ == 0) {
212 LG << "class_def.class_data_off_ == 0";
213 return NULL;
214 }
215 return base_ + class_def.class_data_off_;
216 }
217
218 // Decodes the header section from the raw class data bytes.
Carl Shapiro3ee755d2011-06-28 12:11:04 -0700219 ClassDataHeader ReadClassDataHeader(const byte** class_data) {
220 ClassDataHeader header;
221 header.static_fields_size_ = DecodeUnsignedLeb128(class_data);
222 header.instance_fields_size_ = DecodeUnsignedLeb128(class_data);
223 header.direct_methods_size_ = DecodeUnsignedLeb128(class_data);
224 header.virtual_methods_size_ = DecodeUnsignedLeb128(class_data);
225 return header;
Carl Shapiro1fb86202011-06-27 17:43:13 -0700226 }
227
228 // Returns the class descriptor string of a class definition.
229 const char* GetClassDescriptor(const ClassDef& class_def) const {
230 return dexStringByTypeIdx(class_def.class_idx_);
231 }
232
233 // Returns the StringId at the specified index.
234 const StringId& GetStringId(uint32_t idx) const {
235 CHECK_LT(idx, NumStringIds());
236 return string_ids_[idx];
237 }
238
239 // Returns the TypeId at the specified index.
240 const TypeId& GetTypeId(uint32_t idx) const {
241 CHECK_LT(idx, NumTypeIds());
242 return type_ids_[idx];
243 }
244
245 // Returns the FieldId at the specified index.
246 const FieldId& GetFieldId(uint32_t idx) const {
247 CHECK_LT(idx, NumFieldIds());
248 return field_ids_[idx];
249 }
250
251 // Returns the MethodId at the specified index.
252 const MethodId& GetMethodId(uint32_t idx) const {
253 CHECK_LT(idx, NumMethodIds());
254 return method_ids_[idx];
255 }
256
257 // Returns the ProtoId at the specified index.
258 const ProtoId& GetProtoId(uint32_t idx) const {
259 CHECK_LT(idx, NumProtoIds());
260 return proto_ids_[idx];
261 }
262
263 // Returns the ClassDef at the specified index.
264 const ClassDef& GetClassDef(uint32_t idx) const {
265 CHECK_LT(idx, NumClassDefs());
266 return class_defs_[idx];
267 }
268
269 const TypeList* GetInterfacesList(const ClassDef& class_def) {
270 if (class_def.interfaces_off_ == 0) {
271 return NULL;
272 } else {
273 const byte* addr = base_ + class_def.interfaces_off_;
274 return reinterpret_cast<const TypeList*>(addr);
275 }
276 }
277
Carl Shapiro3ee755d2011-06-28 12:11:04 -0700278 const Code* GetCode(const Method& method) const {
279 if (method.code_off_ == 0) {
280 return NULL; // native or abstract method
281 } else {
282 const byte* addr = base_ + method.code_off_;
283 return reinterpret_cast<const Code*>(addr);
284 }
285 }
286
287 // Returns the short form method descriptor for the given prototype.
288 const char* GetShorty(uint32_t proto_idx) {
289 const ProtoId& proto_id = GetProtoId(proto_idx);
290 return dexStringById(proto_id.shorty_idx_);
291 }
292
Carl Shapiro1fb86202011-06-27 17:43:13 -0700293 // From libdex...
294
295 // Returns a pointer to the UTF-8 string data referred to by the
296 // given string_id.
297 const char* dexGetStringData(const StringId& string_id) const {
298 const byte* ptr = base_ + string_id.string_data_off_;
299 // Skip the uleb128 length.
300 while (*(ptr++) > 0x7f) /* empty */ ;
301 return (const char*) ptr;
302 }
303
304 // return the UTF-8 encoded string with the specified string_id index
305 const char* dexStringById(uint32_t idx) const {
306 const StringId& string_id = GetStringId(idx);
307 return dexGetStringData(string_id);
308 }
309
310 // Get the descriptor string associated with a given type index.
311 const char* dexStringByTypeIdx(uint32_t idx) const {
312 const TypeId& type_id = GetTypeId(idx);
313 return dexStringById(type_id.descriptor_idx_);
314 }
315
Carl Shapiro3ee755d2011-06-28 12:11:04 -0700316 void dexReadClassDataField(const byte** encoded_field,
317 RawDexFile::Field* field,
318 uint32_t* last_idx) const {
319 uint32_t idx = *last_idx + DecodeUnsignedLeb128(encoded_field);
320 field->access_flags_ = DecodeUnsignedLeb128(encoded_field);
321 field->field_idx_ = idx;
322 *last_idx = idx;
323 }
324
325 void dexReadClassDataMethod(const byte** encoded_method,
326 RawDexFile::Method* method,
327 uint32_t* last_idx) const {
328 uint32_t idx = *last_idx + DecodeUnsignedLeb128(encoded_method);
329 method->access_flags_ = DecodeUnsignedLeb128(encoded_method);
330 method->code_off_ = DecodeUnsignedLeb128(encoded_method);
331 method->method_idx_ = idx;
332 *last_idx = idx;
333 }
334
335
336 // TODO: const reference
337 uint32_t dexGetIndexForClassDef(const ClassDef* class_def) const {
338 CHECK_GE(class_def, class_defs_);
339 CHECK_LT(class_def, class_defs_ + header_->class_defs_size_);
340 return class_def - class_defs_;
341 }
342
343 const char* dexGetSourceFile(const ClassDef& class_def) const {
344 if (class_def.source_file_idx_ == 0xffffffff) {
345 return NULL;
346 } else {
347 return dexStringById(class_def.source_file_idx_);
348 }
349 }
350
Carl Shapiro1fb86202011-06-27 17:43:13 -0700351 private:
Carl Shapiroa506cb02011-06-28 22:53:46 -0700352 RawDexFile(const byte* addr, size_t length, Closer* closer)
Carl Shapiro1fb86202011-06-27 17:43:13 -0700353 : base_(addr),
Carl Shapiroa506cb02011-06-28 22:53:46 -0700354 length_(length),
Carl Shapiro80d4dde2011-06-28 16:24:07 -0700355 closer_(closer),
Carl Shapiro1fb86202011-06-27 17:43:13 -0700356 header_(0),
357 string_ids_(0),
358 type_ids_(0),
359 field_ids_(0),
360 method_ids_(0),
361 proto_ids_(0),
362 class_defs_(0) {}
363
364 // Top-level initializer that calls other Init methods.
365 bool Init();
366
367 // Caches pointers into to the various file sections.
368 void InitMembers();
369
370 // Builds the index of descriptors to class definitions.
371 void InitIndex();
372
373 // Returns true if the byte string equals the magic value.
374 bool CheckMagic(const byte* magic);
375
376 // Returns true if the header magic is of the expected value.
377 bool IsMagicValid();
378
379 // The index of descriptors to class definitions.
380 typedef std::map<const char*, const RawDexFile::ClassDef*, CStringLt> Index;
381 Index index_;
382
383 // The base address of the memory mapping.
384 const byte* base_;
385
Carl Shapiroa506cb02011-06-28 22:53:46 -0700386 // The size of the underlying memory allocation in bytes.
387 size_t length_;
388
Carl Shapiro80d4dde2011-06-28 16:24:07 -0700389 // Helper object to free the underlying allocation.
390 scoped_ptr<Closer> closer_;
Carl Shapiro1fb86202011-06-27 17:43:13 -0700391
392 // Points to the header section.
393 const Header* header_;
394
395 // Points to the base of the string identifier list.
396 const StringId* string_ids_;
397
398 // Points to the base of the type identifier list.
399 const TypeId* type_ids_;
400
401 // Points to the base of the field identifier list.
402 const FieldId* field_ids_;
403
404 // Points to the base of the method identifier list.
405 const MethodId* method_ids_;
406
407 // Points to the base of the prototype identifier list.
408 const ProtoId* proto_ids_;
409
410 // Points to the base of the class definition list.
411 const ClassDef* class_defs_;
412};
413
414} // namespace art
415
416#endif // ART_SRC_RAW_DEX_FILE_H_