blob: a30b9a085bc15412a59310e0cfd4cc0241e494be [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
Narayan Kamath7462f022013-11-21 13:05:04 +000020
21#include <assert.h>
22#include <errno.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070023#include <fcntl.h>
24#include <inttypes.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000025#include <limits.h>
26#include <log/log.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000027#include <stdlib.h>
28#include <string.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000029#include <unistd.h>
Mark Salyzyn51d562d2014-05-05 14:38:05 -070030#include <utils/Compat.h>
Narayan Kamatheaf98852013-12-11 14:51:51 +000031#include <utils/FileMap.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070032#include <zlib.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000033
34#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
35
Mark Salyzyn99ef9912014-03-14 14:26:22 -070036#include "ziparchive/zip_archive.h"
37
Narayan Kamath7462f022013-11-21 13:05:04 +000038// This is for windows. If we don't open a file in binary mode, weirds
39// things will happen.
40#ifndef O_BINARY
41#define O_BINARY 0
42#endif
43
44/*
45 * Zip file constants.
46 */
Narayan Kamath4f6b4992014-06-03 13:59:23 +010047static const uint32_t kEOCDSignature = 0x06054b50;
48static const uint32_t kEOCDLen = 2;
49static const uint32_t kEOCDNumEntries = 8; // number of entries in the archive
50static const uint32_t kEOCDSize = 12; // size of the central directory
51static const uint32_t kEOCDFileOffset = 16; // offset to central directory
52static const uint32_t kEOCDCommentLen = 20; // length of the EOCD comment
53static const uint32_t kEOCDComment = 22; // offset of the EOCD comment
Narayan Kamath7462f022013-11-21 13:05:04 +000054
55static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort
56static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen);
57
58static const uint32_t kLFHSignature = 0x04034b50;
59static const uint32_t kLFHLen = 30; // excluding variable-len fields
60static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags
61static const uint32_t kLFHCRC = 14; // offset to CRC
62static const uint32_t kLFHCompLen = 18; // offset to compressed length
63static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length
64static const uint32_t kLFHNameLen = 26; // offset to filename length
65static const uint32_t kLFHExtraLen = 28; // offset to extra length
66
67static const uint32_t kCDESignature = 0x02014b50;
68static const uint32_t kCDELen = 46; // excluding variable-len fields
69static const uint32_t kCDEMethod = 10; // offset to compression method
70static const uint32_t kCDEModWhen = 12; // offset to modification timestamp
71static const uint32_t kCDECRC = 16; // offset to entry CRC
72static const uint32_t kCDECompLen = 20; // offset to compressed length
73static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length
74static const uint32_t kCDENameLen = 28; // offset to filename length
75static const uint32_t kCDEExtraLen = 30; // offset to extra length
76static const uint32_t kCDECommentLen = 32; // offset to comment length
77static const uint32_t kCDELocalOffset = 42; // offset to local hdr
78
79static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature
80static const uint32_t kDDSignatureLen = 4;
81static const uint32_t kDDLen = 12;
82static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without
83static const uint32_t kDDCrc32 = 0; // offset to crc32
84static const uint32_t kDDCompLen = 4; // offset to compressed length
85static const uint32_t kDDUncompLen = 8; // offset to uncompressed length
86
87static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
88
89static const uint32_t kMaxErrorLen = 1024;
90
91static const char* kErrorMessages[] = {
92 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +000093 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +000094 "Zlib error",
95 "Invalid file",
96 "Invalid handle",
97 "Duplicate entries in archive",
98 "Empty archive",
99 "Entry not found",
100 "Invalid offset",
101 "Inconsistent information",
102 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000103 "I/O Error",
Narayan Kamatheaf98852013-12-11 14:51:51 +0000104 "File mapping failed"
Narayan Kamath7462f022013-11-21 13:05:04 +0000105};
106
107static const int32_t kErrorMessageUpperBound = 0;
108
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000109static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000110
111// We encountered a Zlib error when inflating a stream from this file.
112// Usually indicates file corruption.
113static const int32_t kZlibError = -2;
114
115// The input file cannot be processed as a zip archive. Usually because
116// it's too small, too large or does not have a valid signature.
117static const int32_t kInvalidFile = -3;
118
119// An invalid iteration / ziparchive handle was passed in as an input
120// argument.
121static const int32_t kInvalidHandle = -4;
122
123// The zip archive contained two (or possibly more) entries with the same
124// name.
125static const int32_t kDuplicateEntry = -5;
126
127// The zip archive contains no entries.
128static const int32_t kEmptyArchive = -6;
129
130// The specified entry was not found in the archive.
131static const int32_t kEntryNotFound = -7;
132
133// The zip archive contained an invalid local file header pointer.
134static const int32_t kInvalidOffset = -8;
135
136// The zip archive contained inconsistent entry information. This could
137// be because the central directory & local file header did not agree, or
138// if the actual uncompressed length or crc32 do not match their declared
139// values.
140static const int32_t kInconsistentInformation = -9;
141
142// An invalid entry name was encountered.
143static const int32_t kInvalidEntryName = -10;
144
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000145// An I/O related system call (read, lseek, ftruncate, map) failed.
146static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000147
Narayan Kamatheaf98852013-12-11 14:51:51 +0000148// We were not able to mmap the central directory or entry contents.
149static const int32_t kMmapFailed = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000150
Narayan Kamatheaf98852013-12-11 14:51:51 +0000151static const int32_t kErrorMessageLowerBound = -13;
Narayan Kamath7462f022013-11-21 13:05:04 +0000152
Narayan Kamatheaf98852013-12-11 14:51:51 +0000153static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
Narayan Kamath7462f022013-11-21 13:05:04 +0000154
155/*
156 * A Read-only Zip archive.
157 *
158 * We want "open" and "find entry by name" to be fast operations, and
159 * we want to use as little memory as possible. We memory-map the zip
160 * central directory, and load a hash table with pointers to the filenames
161 * (which aren't null-terminated). The other fields are at a fixed offset
162 * from the filename, so we don't need to extract those (but we do need
163 * to byte-read and endian-swap them every time we want them).
164 *
165 * It's possible that somebody has handed us a massive (~1GB) zip archive,
166 * so we can't expect to mmap the entire file.
167 *
168 * To speed comparisons when doing a lookup by name, we could make the mapping
169 * "private" (copy-on-write) and null-terminate the filenames after verifying
170 * the record structure. However, this requires a private mapping of
171 * every page that the Central Directory touches. Easier to tuck a copy
172 * of the string length into the hash table entry.
173 */
174struct ZipArchive {
175 /* open Zip archive */
176 int fd;
177
178 /* mapped central directory area */
179 off64_t directory_offset;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000180 android::FileMap* directory_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000181
182 /* number of entries in the Zip archive */
183 uint16_t num_entries;
184
185 /*
186 * We know how many entries are in the Zip archive, so we can have a
187 * fixed-size hash table. We define a load factor of 0.75 and overallocat
188 * so the maximum number entries can never be higher than
189 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
190 */
191 uint32_t hash_table_size;
192 ZipEntryName* hash_table;
193};
194
195// Returns 0 on success and negative values on failure.
Narayan Kamatheaf98852013-12-11 14:51:51 +0000196static android::FileMap* MapFileSegment(const int fd, const off64_t start,
197 const size_t length, const bool read_only,
198 const char* debug_file_name) {
199 android::FileMap* file_map = new android::FileMap;
200 const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
201 if (!success) {
202 file_map->release();
203 return NULL;
Narayan Kamath7462f022013-11-21 13:05:04 +0000204 }
205
Narayan Kamatheaf98852013-12-11 14:51:51 +0000206 return file_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000207}
208
209static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
210 static const uint32_t kBufSize = 32768;
211 uint8_t buf[kBufSize];
212
213 uint32_t count = 0;
214 uint64_t crc = 0;
Narayan Kamath58aaf462013-12-10 16:47:14 +0000215 while (count < length) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000216 uint32_t remaining = length - count;
217
218 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
219 // value.
220 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
221 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
222
223 if (actual != get_size) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -0700224 ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
Narayan Kamath7462f022013-11-21 13:05:04 +0000225 return kIoError;
226 }
227
228 memcpy(begin + count, buf, get_size);
229 crc = crc32(crc, buf, get_size);
230 count += get_size;
231 }
232
233 *crc_out = crc;
234
235 return 0;
236}
237
238/*
239 * Round up to the next highest power of 2.
240 *
241 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
242 */
243static uint32_t RoundUpPower2(uint32_t val) {
244 val--;
245 val |= val >> 1;
246 val |= val >> 2;
247 val |= val >> 4;
248 val |= val >> 8;
249 val |= val >> 16;
250 val++;
251
252 return val;
253}
254
255static uint32_t ComputeHash(const char* str, uint16_t len) {
256 uint32_t hash = 0;
257
258 while (len--) {
259 hash = hash * 31 + *str++;
260 }
261
262 return hash;
263}
264
265/*
266 * Convert a ZipEntry to a hash table index, verifying that it's in a
267 * valid range.
268 */
269static int64_t EntryToIndex(const ZipEntryName* hash_table,
270 const uint32_t hash_table_size,
271 const char* name, uint16_t length) {
272 const uint32_t hash = ComputeHash(name, length);
273
274 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
275 uint32_t ent = hash & (hash_table_size - 1);
276 while (hash_table[ent].name != NULL) {
277 if (hash_table[ent].name_length == length &&
278 memcmp(hash_table[ent].name, name, length) == 0) {
279 return ent;
280 }
281
282 ent = (ent + 1) & (hash_table_size - 1);
283 }
284
Colin Crossf4b0b792014-02-06 20:07:15 -0800285 ALOGV("Zip: Unable to find entry %.*s", length, name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000286 return kEntryNotFound;
287}
288
289/*
290 * Add a new entry to the hash table.
291 */
292static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
293 const char* name, uint16_t length) {
294 const uint64_t hash = ComputeHash(name, length);
295 uint32_t ent = hash & (hash_table_size - 1);
296
297 /*
298 * We over-allocated the table, so we're guaranteed to find an empty slot.
299 * Further, we guarantee that the hashtable size is not 0.
300 */
301 while (hash_table[ent].name != NULL) {
302 if (hash_table[ent].name_length == length &&
303 memcmp(hash_table[ent].name, name, length) == 0) {
304 // We've found a duplicate entry. We don't accept it
305 ALOGW("Zip: Found duplicate entry %.*s", length, name);
306 return kDuplicateEntry;
307 }
308 ent = (ent + 1) & (hash_table_size - 1);
309 }
310
311 hash_table[ent].name = name;
312 hash_table[ent].name_length = length;
313 return 0;
314}
315
316/*
317 * Get 2 little-endian bytes.
318 */
319static uint16_t get2LE(const uint8_t* src) {
320 return src[0] | (src[1] << 8);
321}
322
323/*
324 * Get 4 little-endian bytes.
325 */
326static uint32_t get4LE(const uint8_t* src) {
327 uint32_t result;
328
329 result = src[0];
330 result |= src[1] << 8;
331 result |= src[2] << 16;
332 result |= src[3] << 24;
333
334 return result;
335}
336
337static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
338 ZipArchive* archive, off64_t file_length,
339 uint32_t read_amount, uint8_t* scan_buffer) {
340 const off64_t search_start = file_length - read_amount;
341
342 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
Mark Salyzyn96c5c992014-05-08 19:16:40 -0700343 ALOGW("Zip: seek %" PRId64 " failed: %s", (int64_t)search_start, strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000344 return kIoError;
345 }
346 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
347 if (actual != (ssize_t) read_amount) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700348 ALOGW("Zip: read %" PRIu32 " failed: %s", read_amount, strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000349 return kIoError;
350 }
351
352 /*
353 * Scan backward for the EOCD magic. In an archive without a trailing
354 * comment, we'll find it on the first try. (We may want to consider
355 * doing an initial minimal read; if we don't find it, retry with a
356 * second read as above.)
357 */
358 int i;
359 for (i = read_amount - kEOCDLen; i >= 0; i--) {
360 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
361 ALOGV("+++ Found EOCD at buf+%d", i);
362 break;
363 }
364 }
365 if (i < 0) {
366 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
367 return kInvalidFile;
368 }
369
370 const off64_t eocd_offset = search_start + i;
371 const uint8_t* eocd_ptr = scan_buffer + i;
372
373 assert(eocd_offset < file_length);
374
375 /*
376 * Grab the CD offset and size, and the number of entries in the
377 * archive. Verify that they look reasonable. Widen dir_size and
378 * dir_offset to the file offset type.
379 */
380 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
381 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
382 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
Narayan Kamath4f6b4992014-06-03 13:59:23 +0100383 const uint16_t comment_length = get2LE(eocd_ptr + kEOCDCommentLen);
384
385 if (eocd_offset + comment_length + kEOCDCommentOffset != file_length) {
386 ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
387 (int64_t) (file_length - (eocd_offset + comment_length + kEOCDCommentOffset)));
388 return kInvalidFile;
389 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000390
391 if (dir_offset + dir_size > eocd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700392 ALOGW("Zip: bad offsets (dir %" PRId64 ", size %" PRId64 ", eocd %" PRId64 ")",
Mark Salyzyn96c5c992014-05-08 19:16:40 -0700393 (int64_t)dir_offset, (int64_t)dir_size, (int64_t)eocd_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000394 return kInvalidOffset;
395 }
396 if (num_entries == 0) {
397 ALOGW("Zip: empty archive?");
398 return kEmptyArchive;
399 }
400
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700401 ALOGV("+++ num_entries=%d dir_size=%" PRId64 " dir_offset=%" PRId64,
Mark Salyzyn96c5c992014-05-08 19:16:40 -0700402 num_entries, (int64_t)dir_size, (int64_t)dir_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000403
404 /*
405 * It all looks good. Create a mapping for the CD, and set the fields
406 * in archive.
407 */
Narayan Kamatheaf98852013-12-11 14:51:51 +0000408 android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size,
409 true /* read only */, debug_file_name);
410 if (map == NULL) {
411 archive->directory_map = NULL;
412 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +0000413 }
414
Narayan Kamatheaf98852013-12-11 14:51:51 +0000415 archive->directory_map = map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000416 archive->num_entries = num_entries;
417 archive->directory_offset = dir_offset;
418
419 return 0;
420}
421
422/*
423 * Find the zip Central Directory and memory-map it.
424 *
425 * On success, returns 0 after populating fields from the EOCD area:
426 * directory_offset
427 * directory_map
428 * num_entries
429 */
430static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
431 ZipArchive* archive) {
432
433 // Test file length. We use lseek64 to make sure the file
434 // is small enough to be a zip file (Its size must be less than
435 // 0xffffffff bytes).
436 off64_t file_length = lseek64(fd, 0, SEEK_END);
437 if (file_length == -1) {
438 ALOGV("Zip: lseek on fd %d failed", fd);
439 return kInvalidFile;
440 }
441
442 if (file_length > (off64_t) 0xffffffff) {
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700443 ALOGV("Zip: zip file too long %" PRId64, (int64_t)file_length);
Narayan Kamath7462f022013-11-21 13:05:04 +0000444 return kInvalidFile;
445 }
446
447 if (file_length < (int64_t) kEOCDLen) {
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700448 ALOGV("Zip: length %" PRId64 " is too small to be zip", (int64_t)file_length);
Narayan Kamath7462f022013-11-21 13:05:04 +0000449 return kInvalidFile;
450 }
451
452 /*
453 * Perform the traditional EOCD snipe hunt.
454 *
455 * We're searching for the End of Central Directory magic number,
456 * which appears at the start of the EOCD block. It's followed by
457 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
458 * need to read the last part of the file into a buffer, dig through
459 * it to find the magic number, parse some values out, and use those
460 * to determine the extent of the CD.
461 *
462 * We start by pulling in the last part of the file.
463 */
464 uint32_t read_amount = kMaxEOCDSearch;
465 if (file_length < (off64_t) read_amount) {
466 read_amount = file_length;
467 }
468
469 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
470 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
471 file_length, read_amount, scan_buffer);
472
473 free(scan_buffer);
474 return result;
475}
476
477/*
478 * Parses the Zip archive's Central Directory. Allocates and populates the
479 * hash table.
480 *
481 * Returns 0 on success.
482 */
483static int32_t ParseZipArchive(ZipArchive* archive) {
484 int32_t result = -1;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000485 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
486 size_t cd_length = archive->directory_map->getDataLength();
Narayan Kamath7462f022013-11-21 13:05:04 +0000487 uint16_t num_entries = archive->num_entries;
488
489 /*
490 * Create hash table. We have a minimum 75% load factor, possibly as
491 * low as 50% after we round off to a power of 2. There must be at
492 * least one unused entry to avoid an infinite loop during creation.
493 */
494 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
495 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
496 sizeof(ZipEntryName));
497
498 /*
499 * Walk through the central directory, adding entries to the hash
500 * table and verifying values.
501 */
502 const uint8_t* ptr = cd_ptr;
503 for (uint16_t i = 0; i < num_entries; i++) {
504 if (get4LE(ptr) != kCDESignature) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700505 ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000506 goto bail;
507 }
508
509 if (ptr + kCDELen > cd_ptr + cd_length) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700510 ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000511 goto bail;
512 }
513
514 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
515 if (local_header_offset >= archive->directory_offset) {
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700516 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16, (int64_t)local_header_offset, i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000517 goto bail;
518 }
519
520 const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
521 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
522 const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
523
524 /* add the CDE filename to the hash table */
525 const int add_result = AddToHash(archive->hash_table,
526 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
527 if (add_result) {
528 ALOGW("Zip: Error adding entry to hash table %d", add_result);
529 result = add_result;
530 goto bail;
531 }
532
533 ptr += kCDELen + file_name_length + extra_length + comment_length;
534 if ((size_t)(ptr - cd_ptr) > cd_length) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700535 ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
536 ptr - cd_ptr, cd_length, i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000537 goto bail;
538 }
539 }
Mark Salyzyn088bf902014-05-08 16:02:20 -0700540 ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
Narayan Kamath7462f022013-11-21 13:05:04 +0000541
542 result = 0;
543
544bail:
545 return result;
546}
547
548static int32_t OpenArchiveInternal(ZipArchive* archive,
549 const char* debug_file_name) {
550 int32_t result = -1;
551 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
552 return result;
553 }
554
555 if ((result = ParseZipArchive(archive))) {
556 return result;
557 }
558
559 return 0;
560}
561
562int32_t OpenArchiveFd(int fd, const char* debug_file_name,
563 ZipArchiveHandle* handle) {
564 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
565 memset(archive, 0, sizeof(*archive));
566 *handle = archive;
567
568 archive->fd = fd;
569
570 return OpenArchiveInternal(archive, debug_file_name);
571}
572
573int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
574 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
575 memset(archive, 0, sizeof(*archive));
576 *handle = archive;
577
578 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
579 if (fd < 0) {
580 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
581 return kIoError;
582 } else {
583 archive->fd = fd;
584 }
585
586 return OpenArchiveInternal(archive, fileName);
587}
588
589/*
590 * Close a ZipArchive, closing the file and freeing the contents.
591 */
592void CloseArchive(ZipArchiveHandle handle) {
593 ZipArchive* archive = (ZipArchive*) handle;
594 ALOGV("Closing archive %p", archive);
595
596 if (archive->fd >= 0) {
597 close(archive->fd);
598 }
599
Narayan Kamatheaf98852013-12-11 14:51:51 +0000600 if (archive->directory_map != NULL) {
601 archive->directory_map->release();
602 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000603 free(archive->hash_table);
Mathieu Chartier5f98b122014-03-04 17:39:38 -0800604 free(archive);
Narayan Kamath7462f022013-11-21 13:05:04 +0000605}
606
607static int32_t UpdateEntryFromDataDescriptor(int fd,
608 ZipEntry *entry) {
609 uint8_t ddBuf[kDDMaxLen];
610 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
611 if (actual != sizeof(ddBuf)) {
612 return kIoError;
613 }
614
615 const uint32_t ddSignature = get4LE(ddBuf);
616 uint16_t ddOffset = 0;
617 if (ddSignature == kDDOptSignature) {
618 ddOffset = 4;
619 }
620
621 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
622 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
623 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
624
625 return 0;
626}
627
628// Attempts to read |len| bytes into |buf| at offset |off|.
629//
630// This method uses pread64 on platforms that support it and
631// lseek64 + read on platforms that don't. This implies that
632// callers should not rely on the |fd| offset being incremented
633// as a side effect of this call.
634static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
635 off64_t off) {
636#ifdef HAVE_PREAD
637 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
638#else
639 // The only supported platform that doesn't support pread at the moment
640 // is Windows. Only recent versions of windows support unix like forks,
641 // and even there the semantics are quite different.
642 if (lseek64(fd, off, SEEK_SET) != off) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700643 ALOGW("Zip: failed seek to offset %" PRId64, off);
Narayan Kamath7462f022013-11-21 13:05:04 +0000644 return kIoError;
645 }
646
647 return TEMP_FAILURE_RETRY(read(fd, buf, len));
648#endif // HAVE_PREAD
649}
650
651static int32_t FindEntry(const ZipArchive* archive, const int ent,
652 ZipEntry* data) {
653 const uint16_t nameLen = archive->hash_table[ent].name_length;
654 const char* name = archive->hash_table[ent].name;
655
656 // Recover the start of the central directory entry from the filename
657 // pointer. The filename is the first entry past the fixed-size data,
658 // so we can just subtract back from that.
659 const unsigned char* ptr = (const unsigned char*) name;
660 ptr -= kCDELen;
661
662 // This is the base of our mmapped region, we have to sanity check that
663 // the name that's in the hash table is a pointer to a location within
664 // this mapped region.
665 const unsigned char* base_ptr = (const unsigned char*)
Narayan Kamatheaf98852013-12-11 14:51:51 +0000666 archive->directory_map->getDataPtr();
667 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000668 ALOGW("Zip: Invalid entry pointer");
669 return kInvalidOffset;
670 }
671
672 // The offset of the start of the central directory in the zipfile.
673 // We keep this lying around so that we can sanity check all our lengths
674 // and our per-file structures.
675 const off64_t cd_offset = archive->directory_offset;
676
677 // Fill out the compression method, modification time, crc32
678 // and other interesting attributes from the central directory. These
679 // will later be compared against values from the local file header.
680 data->method = get2LE(ptr + kCDEMethod);
681 data->mod_time = get4LE(ptr + kCDEModWhen);
682 data->crc32 = get4LE(ptr + kCDECRC);
683 data->compressed_length = get4LE(ptr + kCDECompLen);
684 data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
685
686 // Figure out the local header offset from the central directory. The
687 // actual file data will begin after the local header and the name /
688 // extra comments.
689 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
690 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
691 ALOGW("Zip: bad local hdr offset in zip");
692 return kInvalidOffset;
693 }
694
695 uint8_t lfh_buf[kLFHLen];
696 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
697 local_header_offset);
698 if (actual != sizeof(lfh_buf)) {
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700699 ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)local_header_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000700 return kIoError;
701 }
702
703 if (get4LE(lfh_buf) != kLFHSignature) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700704 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700705 (int64_t)local_header_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000706 return kInvalidOffset;
707 }
708
709 // Paranoia: Match the values specified in the local file header
710 // to those specified in the central directory.
711 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
712 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
713 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
714
715 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
716 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
717 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
718 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
719
720 data->has_data_descriptor = 0;
721 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
722 || data->crc32 != lfhCrc) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700723 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
724 ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
Narayan Kamath7462f022013-11-21 13:05:04 +0000725 data->compressed_length, data->uncompressed_length, data->crc32,
726 lfhCompLen, lfhUncompLen, lfhCrc);
727 return kInconsistentInformation;
728 }
729 } else {
730 data->has_data_descriptor = 1;
731 }
732
733 // Check that the local file header name matches the declared
734 // name in the central directory.
735 if (lfhNameLen == nameLen) {
736 const off64_t name_offset = local_header_offset + kLFHLen;
737 if (name_offset + lfhNameLen >= cd_offset) {
738 ALOGW("Zip: Invalid declared length");
739 return kInvalidOffset;
740 }
741
742 uint8_t* name_buf = (uint8_t*) malloc(nameLen);
743 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
744 name_offset);
745
746 if (actual != nameLen) {
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700747 ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)name_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000748 free(name_buf);
749 return kIoError;
750 }
751
752 if (memcmp(name, name_buf, nameLen)) {
753 free(name_buf);
754 return kInconsistentInformation;
755 }
756
757 free(name_buf);
758 } else {
759 ALOGW("Zip: lfh name did not match central directory.");
760 return kInconsistentInformation;
761 }
762
763 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
Narayan Kamath48953a12014-01-24 12:32:39 +0000764 if (data_offset > cd_offset) {
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700765 ALOGW("Zip: bad data offset %" PRId64 " in zip", (int64_t)data_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000766 return kInvalidOffset;
767 }
768
769 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700770 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700771 (int64_t)data_offset, data->compressed_length, (int64_t)cd_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000772 return kInvalidOffset;
773 }
774
775 if (data->method == kCompressStored &&
776 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700777 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Mark Salyzyn96c5c992014-05-08 19:16:40 -0700778 (int64_t)data_offset, data->uncompressed_length, (int64_t)cd_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000779 return kInvalidOffset;
780 }
781
782 data->offset = data_offset;
783 return 0;
784}
785
786struct IterationHandle {
787 uint32_t position;
788 const char* prefix;
789 uint16_t prefix_len;
790 ZipArchive* archive;
791};
792
793int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
794 ZipArchive* archive = (ZipArchive *) handle;
795
796 if (archive == NULL || archive->hash_table == NULL) {
797 ALOGW("Zip: Invalid ZipArchiveHandle");
798 return kInvalidHandle;
799 }
800
801 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
802 cookie->position = 0;
803 cookie->prefix = prefix;
804 cookie->archive = archive;
805 if (prefix != NULL) {
806 cookie->prefix_len = strlen(prefix);
807 }
808
809 *cookie_ptr = cookie ;
810 return 0;
811}
812
813int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
814 ZipEntry* data) {
815 const ZipArchive* archive = (ZipArchive*) handle;
816 const int nameLen = strlen(entryName);
817 if (nameLen == 0 || nameLen > 65535) {
818 ALOGW("Zip: Invalid filename %s", entryName);
819 return kInvalidEntryName;
820 }
821
822 const int64_t ent = EntryToIndex(archive->hash_table,
823 archive->hash_table_size, entryName, nameLen);
824
825 if (ent < 0) {
Narayan Kamatha1ff8012013-12-31 10:27:59 +0000826 ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
Narayan Kamath7462f022013-11-21 13:05:04 +0000827 return ent;
828 }
829
830 return FindEntry(archive, ent, data);
831}
832
833int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
834 IterationHandle* handle = (IterationHandle *) cookie;
835 if (handle == NULL) {
836 return kInvalidHandle;
837 }
838
839 ZipArchive* archive = handle->archive;
840 if (archive == NULL || archive->hash_table == NULL) {
841 ALOGW("Zip: Invalid ZipArchiveHandle");
842 return kInvalidHandle;
843 }
844
845 const uint32_t currentOffset = handle->position;
846 const uint32_t hash_table_length = archive->hash_table_size;
847 const ZipEntryName *hash_table = archive->hash_table;
848
849 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
850 if (hash_table[i].name != NULL &&
851 (handle->prefix == NULL ||
852 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
853 handle->position = (i + 1);
854 const int error = FindEntry(archive, i, data);
855 if (!error) {
856 name->name = hash_table[i].name;
857 name->name_length = hash_table[i].name_length;
858 }
859
860 return error;
861 }
862 }
863
864 handle->position = 0;
865 return kIterationEnd;
866}
867
868static int32_t InflateToFile(int fd, const ZipEntry* entry,
869 uint8_t* begin, uint32_t length,
870 uint64_t* crc_out) {
871 int32_t result = -1;
872 const uint32_t kBufSize = 32768;
873 uint8_t read_buf[kBufSize];
874 uint8_t write_buf[kBufSize];
875 z_stream zstream;
876 int zerr;
877
878 /*
879 * Initialize the zlib stream struct.
880 */
881 memset(&zstream, 0, sizeof(zstream));
882 zstream.zalloc = Z_NULL;
883 zstream.zfree = Z_NULL;
884 zstream.opaque = Z_NULL;
885 zstream.next_in = NULL;
886 zstream.avail_in = 0;
887 zstream.next_out = (Bytef*) write_buf;
888 zstream.avail_out = kBufSize;
889 zstream.data_type = Z_UNKNOWN;
890
891 /*
892 * Use the undocumented "negative window bits" feature to tell zlib
893 * that there's no zlib header waiting for it.
894 */
895 zerr = inflateInit2(&zstream, -MAX_WBITS);
896 if (zerr != Z_OK) {
897 if (zerr == Z_VERSION_ERROR) {
898 ALOGE("Installed zlib is not compatible with linked version (%s)",
899 ZLIB_VERSION);
900 } else {
901 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
902 }
903
904 return kZlibError;
905 }
906
907 const uint32_t uncompressed_length = entry->uncompressed_length;
908
909 uint32_t compressed_length = entry->compressed_length;
910 uint32_t write_count = 0;
911 do {
912 /* read as much as we can */
913 if (zstream.avail_in == 0) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -0700914 const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
915 const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
Narayan Kamath7462f022013-11-21 13:05:04 +0000916 if (actual != getSize) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -0700917 ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
Narayan Kamath7462f022013-11-21 13:05:04 +0000918 result = kIoError;
919 goto z_bail;
920 }
921
922 compressed_length -= getSize;
923
924 zstream.next_in = read_buf;
925 zstream.avail_in = getSize;
926 }
927
928 /* uncompress the data */
929 zerr = inflate(&zstream, Z_NO_FLUSH);
930 if (zerr != Z_OK && zerr != Z_STREAM_END) {
931 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
932 zerr, zstream.next_in, zstream.avail_in,
933 zstream.next_out, zstream.avail_out);
934 result = kZlibError;
935 goto z_bail;
936 }
937
938 /* write when we're full or when we're done */
939 if (zstream.avail_out == 0 ||
940 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
941 const size_t write_size = zstream.next_out - write_buf;
942 // The file might have declared a bogus length.
943 if (write_size + write_count > length) {
944 goto z_bail;
945 }
946 memcpy(begin + write_count, write_buf, write_size);
947 write_count += write_size;
948
949 zstream.next_out = write_buf;
950 zstream.avail_out = kBufSize;
951 }
952 } while (zerr == Z_OK);
953
954 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
955
956 // stream.adler holds the crc32 value for such streams.
957 *crc_out = zstream.adler;
958
959 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700960 ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +0000961 zstream.total_out, uncompressed_length);
962 result = kInconsistentInformation;
963 goto z_bail;
964 }
965
966 result = 0;
967
968z_bail:
969 inflateEnd(&zstream); /* free up any allocated structures */
970
971 return result;
972}
973
974int32_t ExtractToMemory(ZipArchiveHandle handle,
975 ZipEntry* entry, uint8_t* begin, uint32_t size) {
976 ZipArchive* archive = (ZipArchive*) handle;
977 const uint16_t method = entry->method;
978 off64_t data_offset = entry->offset;
979
980 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
Mark Salyzyn56a90a02014-05-08 17:20:55 -0700981 ALOGW("Zip: lseek to data at %" PRId64 " failed", (int64_t)data_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000982 return kIoError;
983 }
984
985 // this should default to kUnknownCompressionMethod.
986 int32_t return_value = -1;
987 uint64_t crc = 0;
988 if (method == kCompressStored) {
989 return_value = CopyFileToFile(archive->fd, begin, size, &crc);
990 } else if (method == kCompressDeflated) {
991 return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
992 }
993
994 if (!return_value && entry->has_data_descriptor) {
995 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
996 if (return_value) {
997 return return_value;
998 }
999 }
1000
1001 // TODO: Fix this check by passing the right flags to inflate2 so that
1002 // it calculates the CRC for us.
1003 if (entry->crc32 != crc && false) {
Mark Salyzyn088bf902014-05-08 16:02:20 -07001004 ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001005 return kInconsistentInformation;
1006 }
1007
1008 return return_value;
1009}
1010
1011int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1012 ZipEntry* entry, int fd) {
1013 const int32_t declared_length = entry->uncompressed_length;
1014
Narayan Kamath00a258c2013-12-13 16:06:19 +00001015 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1016 if (current_offset == -1) {
1017 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1018 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +00001019 return kIoError;
1020 }
1021
Narayan Kamath00a258c2013-12-13 16:06:19 +00001022 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1023 if (result == -1) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -07001024 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
Mark Salyzyn56a90a02014-05-08 17:20:55 -07001025 (int64_t)(declared_length + current_offset), strerror(errno));
Narayan Kamath00a258c2013-12-13 16:06:19 +00001026 return kIoError;
1027 }
1028
Narayan Kamath48953a12014-01-24 12:32:39 +00001029 // Don't attempt to map a region of length 0. We still need the
1030 // ftruncate() though, since the API guarantees that we will truncate
1031 // the file to the end of the uncompressed output.
1032 if (declared_length == 0) {
1033 return 0;
1034 }
1035
Narayan Kamath00a258c2013-12-13 16:06:19 +00001036 android::FileMap* map = MapFileSegment(fd, current_offset, declared_length,
Narayan Kamatheaf98852013-12-11 14:51:51 +00001037 false, kTempMappingFileName);
1038 if (map == NULL) {
1039 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +00001040 }
1041
Narayan Kamatheaf98852013-12-11 14:51:51 +00001042 const int32_t error = ExtractToMemory(handle, entry,
1043 reinterpret_cast<uint8_t*>(map->getDataPtr()),
1044 map->getDataLength());
1045 map->release();
Narayan Kamath7462f022013-11-21 13:05:04 +00001046 return error;
1047}
1048
1049const char* ErrorCodeString(int32_t error_code) {
1050 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1051 return kErrorMessages[error_code * -1];
1052 }
1053
1054 return kErrorMessages[0];
1055}
1056
1057int GetFileDescriptor(const ZipArchiveHandle handle) {
1058 return ((ZipArchive*) handle)->fd;
1059}
1060