blob: aebddc87c0f666b94b1de23642326d0bcf7adfd0 [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
Narayan Kamath7462f022013-11-21 13:05:04 +000020
21#include <assert.h>
22#include <errno.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070023#include <fcntl.h>
24#include <inttypes.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000025#include <limits.h>
26#include <log/log.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000027#include <stdlib.h>
28#include <string.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000029#include <unistd.h>
Narayan Kamatheaf98852013-12-11 14:51:51 +000030#include <utils/FileMap.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070031#include <zlib.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000032
33#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
34
Mark Salyzyn99ef9912014-03-14 14:26:22 -070035#include "ziparchive/zip_archive.h"
36
Narayan Kamath7462f022013-11-21 13:05:04 +000037// This is for windows. If we don't open a file in binary mode, weirds
38// things will happen.
39#ifndef O_BINARY
40#define O_BINARY 0
41#endif
42
43/*
44 * Zip file constants.
45 */
46static const uint32_t kEOCDSignature = 0x06054b50;
47static const uint32_t kEOCDLen = 2;
48static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file
49static const uint32_t kEOCDSize = 12; // size of the central directory
50static const uint32_t kEOCDFileOffset = 16; // offset to central directory
51
52static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort
53static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen);
54
55static const uint32_t kLFHSignature = 0x04034b50;
56static const uint32_t kLFHLen = 30; // excluding variable-len fields
57static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags
58static const uint32_t kLFHCRC = 14; // offset to CRC
59static const uint32_t kLFHCompLen = 18; // offset to compressed length
60static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length
61static const uint32_t kLFHNameLen = 26; // offset to filename length
62static const uint32_t kLFHExtraLen = 28; // offset to extra length
63
64static const uint32_t kCDESignature = 0x02014b50;
65static const uint32_t kCDELen = 46; // excluding variable-len fields
66static const uint32_t kCDEMethod = 10; // offset to compression method
67static const uint32_t kCDEModWhen = 12; // offset to modification timestamp
68static const uint32_t kCDECRC = 16; // offset to entry CRC
69static const uint32_t kCDECompLen = 20; // offset to compressed length
70static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length
71static const uint32_t kCDENameLen = 28; // offset to filename length
72static const uint32_t kCDEExtraLen = 30; // offset to extra length
73static const uint32_t kCDECommentLen = 32; // offset to comment length
74static const uint32_t kCDELocalOffset = 42; // offset to local hdr
75
76static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature
77static const uint32_t kDDSignatureLen = 4;
78static const uint32_t kDDLen = 12;
79static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without
80static const uint32_t kDDCrc32 = 0; // offset to crc32
81static const uint32_t kDDCompLen = 4; // offset to compressed length
82static const uint32_t kDDUncompLen = 8; // offset to uncompressed length
83
84static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
85
86static const uint32_t kMaxErrorLen = 1024;
87
88static const char* kErrorMessages[] = {
89 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +000090 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +000091 "Zlib error",
92 "Invalid file",
93 "Invalid handle",
94 "Duplicate entries in archive",
95 "Empty archive",
96 "Entry not found",
97 "Invalid offset",
98 "Inconsistent information",
99 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000100 "I/O Error",
Narayan Kamatheaf98852013-12-11 14:51:51 +0000101 "File mapping failed"
Narayan Kamath7462f022013-11-21 13:05:04 +0000102};
103
104static const int32_t kErrorMessageUpperBound = 0;
105
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000106static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000107
108// We encountered a Zlib error when inflating a stream from this file.
109// Usually indicates file corruption.
110static const int32_t kZlibError = -2;
111
112// The input file cannot be processed as a zip archive. Usually because
113// it's too small, too large or does not have a valid signature.
114static const int32_t kInvalidFile = -3;
115
116// An invalid iteration / ziparchive handle was passed in as an input
117// argument.
118static const int32_t kInvalidHandle = -4;
119
120// The zip archive contained two (or possibly more) entries with the same
121// name.
122static const int32_t kDuplicateEntry = -5;
123
124// The zip archive contains no entries.
125static const int32_t kEmptyArchive = -6;
126
127// The specified entry was not found in the archive.
128static const int32_t kEntryNotFound = -7;
129
130// The zip archive contained an invalid local file header pointer.
131static const int32_t kInvalidOffset = -8;
132
133// The zip archive contained inconsistent entry information. This could
134// be because the central directory & local file header did not agree, or
135// if the actual uncompressed length or crc32 do not match their declared
136// values.
137static const int32_t kInconsistentInformation = -9;
138
139// An invalid entry name was encountered.
140static const int32_t kInvalidEntryName = -10;
141
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000142// An I/O related system call (read, lseek, ftruncate, map) failed.
143static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000144
Narayan Kamatheaf98852013-12-11 14:51:51 +0000145// We were not able to mmap the central directory or entry contents.
146static const int32_t kMmapFailed = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000147
Narayan Kamatheaf98852013-12-11 14:51:51 +0000148static const int32_t kErrorMessageLowerBound = -13;
Narayan Kamath7462f022013-11-21 13:05:04 +0000149
Narayan Kamatheaf98852013-12-11 14:51:51 +0000150static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
Narayan Kamath7462f022013-11-21 13:05:04 +0000151
152/*
153 * A Read-only Zip archive.
154 *
155 * We want "open" and "find entry by name" to be fast operations, and
156 * we want to use as little memory as possible. We memory-map the zip
157 * central directory, and load a hash table with pointers to the filenames
158 * (which aren't null-terminated). The other fields are at a fixed offset
159 * from the filename, so we don't need to extract those (but we do need
160 * to byte-read and endian-swap them every time we want them).
161 *
162 * It's possible that somebody has handed us a massive (~1GB) zip archive,
163 * so we can't expect to mmap the entire file.
164 *
165 * To speed comparisons when doing a lookup by name, we could make the mapping
166 * "private" (copy-on-write) and null-terminate the filenames after verifying
167 * the record structure. However, this requires a private mapping of
168 * every page that the Central Directory touches. Easier to tuck a copy
169 * of the string length into the hash table entry.
170 */
171struct ZipArchive {
172 /* open Zip archive */
173 int fd;
174
175 /* mapped central directory area */
176 off64_t directory_offset;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000177 android::FileMap* directory_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000178
179 /* number of entries in the Zip archive */
180 uint16_t num_entries;
181
182 /*
183 * We know how many entries are in the Zip archive, so we can have a
184 * fixed-size hash table. We define a load factor of 0.75 and overallocat
185 * so the maximum number entries can never be higher than
186 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
187 */
188 uint32_t hash_table_size;
189 ZipEntryName* hash_table;
190};
191
192// Returns 0 on success and negative values on failure.
Narayan Kamatheaf98852013-12-11 14:51:51 +0000193static android::FileMap* MapFileSegment(const int fd, const off64_t start,
194 const size_t length, const bool read_only,
195 const char* debug_file_name) {
196 android::FileMap* file_map = new android::FileMap;
197 const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
198 if (!success) {
199 file_map->release();
200 return NULL;
Narayan Kamath7462f022013-11-21 13:05:04 +0000201 }
202
Narayan Kamatheaf98852013-12-11 14:51:51 +0000203 return file_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000204}
205
206static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
207 static const uint32_t kBufSize = 32768;
208 uint8_t buf[kBufSize];
209
210 uint32_t count = 0;
211 uint64_t crc = 0;
Narayan Kamath58aaf462013-12-10 16:47:14 +0000212 while (count < length) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000213 uint32_t remaining = length - count;
214
215 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
216 // value.
217 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
218 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
219
220 if (actual != get_size) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700221 ALOGW("CopyFileToFile: copy read failed (%zd vs %zd)", actual, get_size);
Narayan Kamath7462f022013-11-21 13:05:04 +0000222 return kIoError;
223 }
224
225 memcpy(begin + count, buf, get_size);
226 crc = crc32(crc, buf, get_size);
227 count += get_size;
228 }
229
230 *crc_out = crc;
231
232 return 0;
233}
234
235/*
236 * Round up to the next highest power of 2.
237 *
238 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
239 */
240static uint32_t RoundUpPower2(uint32_t val) {
241 val--;
242 val |= val >> 1;
243 val |= val >> 2;
244 val |= val >> 4;
245 val |= val >> 8;
246 val |= val >> 16;
247 val++;
248
249 return val;
250}
251
252static uint32_t ComputeHash(const char* str, uint16_t len) {
253 uint32_t hash = 0;
254
255 while (len--) {
256 hash = hash * 31 + *str++;
257 }
258
259 return hash;
260}
261
262/*
263 * Convert a ZipEntry to a hash table index, verifying that it's in a
264 * valid range.
265 */
266static int64_t EntryToIndex(const ZipEntryName* hash_table,
267 const uint32_t hash_table_size,
268 const char* name, uint16_t length) {
269 const uint32_t hash = ComputeHash(name, length);
270
271 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
272 uint32_t ent = hash & (hash_table_size - 1);
273 while (hash_table[ent].name != NULL) {
274 if (hash_table[ent].name_length == length &&
275 memcmp(hash_table[ent].name, name, length) == 0) {
276 return ent;
277 }
278
279 ent = (ent + 1) & (hash_table_size - 1);
280 }
281
Colin Crossf4b0b792014-02-06 20:07:15 -0800282 ALOGV("Zip: Unable to find entry %.*s", length, name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000283 return kEntryNotFound;
284}
285
286/*
287 * Add a new entry to the hash table.
288 */
289static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
290 const char* name, uint16_t length) {
291 const uint64_t hash = ComputeHash(name, length);
292 uint32_t ent = hash & (hash_table_size - 1);
293
294 /*
295 * We over-allocated the table, so we're guaranteed to find an empty slot.
296 * Further, we guarantee that the hashtable size is not 0.
297 */
298 while (hash_table[ent].name != NULL) {
299 if (hash_table[ent].name_length == length &&
300 memcmp(hash_table[ent].name, name, length) == 0) {
301 // We've found a duplicate entry. We don't accept it
302 ALOGW("Zip: Found duplicate entry %.*s", length, name);
303 return kDuplicateEntry;
304 }
305 ent = (ent + 1) & (hash_table_size - 1);
306 }
307
308 hash_table[ent].name = name;
309 hash_table[ent].name_length = length;
310 return 0;
311}
312
313/*
314 * Get 2 little-endian bytes.
315 */
316static uint16_t get2LE(const uint8_t* src) {
317 return src[0] | (src[1] << 8);
318}
319
320/*
321 * Get 4 little-endian bytes.
322 */
323static uint32_t get4LE(const uint8_t* src) {
324 uint32_t result;
325
326 result = src[0];
327 result |= src[1] << 8;
328 result |= src[2] << 16;
329 result |= src[3] << 24;
330
331 return result;
332}
333
334static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
335 ZipArchive* archive, off64_t file_length,
336 uint32_t read_amount, uint8_t* scan_buffer) {
337 const off64_t search_start = file_length - read_amount;
338
339 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700340 ALOGW("Zip: seek %" PRId64 " failed: %s", search_start, strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000341 return kIoError;
342 }
343 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
344 if (actual != (ssize_t) read_amount) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700345 ALOGW("Zip: read %u failed: %s", read_amount, strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000346 return kIoError;
347 }
348
349 /*
350 * Scan backward for the EOCD magic. In an archive without a trailing
351 * comment, we'll find it on the first try. (We may want to consider
352 * doing an initial minimal read; if we don't find it, retry with a
353 * second read as above.)
354 */
355 int i;
356 for (i = read_amount - kEOCDLen; i >= 0; i--) {
357 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
358 ALOGV("+++ Found EOCD at buf+%d", i);
359 break;
360 }
361 }
362 if (i < 0) {
363 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
364 return kInvalidFile;
365 }
366
367 const off64_t eocd_offset = search_start + i;
368 const uint8_t* eocd_ptr = scan_buffer + i;
369
370 assert(eocd_offset < file_length);
371
372 /*
373 * Grab the CD offset and size, and the number of entries in the
374 * archive. Verify that they look reasonable. Widen dir_size and
375 * dir_offset to the file offset type.
376 */
377 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
378 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
379 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
380
381 if (dir_offset + dir_size > eocd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700382 ALOGW("Zip: bad offsets (dir %" PRId64 ", size %" PRId64 ", eocd %" PRId64 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +0000383 dir_offset, dir_size, eocd_offset);
384 return kInvalidOffset;
385 }
386 if (num_entries == 0) {
387 ALOGW("Zip: empty archive?");
388 return kEmptyArchive;
389 }
390
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700391 ALOGV("+++ num_entries=%d dir_size=%" PRId64 " dir_offset=%" PRId64,
392 num_entries, dir_size, dir_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000393
394 /*
395 * It all looks good. Create a mapping for the CD, and set the fields
396 * in archive.
397 */
Narayan Kamatheaf98852013-12-11 14:51:51 +0000398 android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size,
399 true /* read only */, debug_file_name);
400 if (map == NULL) {
401 archive->directory_map = NULL;
402 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +0000403 }
404
Narayan Kamatheaf98852013-12-11 14:51:51 +0000405 archive->directory_map = map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000406 archive->num_entries = num_entries;
407 archive->directory_offset = dir_offset;
408
409 return 0;
410}
411
412/*
413 * Find the zip Central Directory and memory-map it.
414 *
415 * On success, returns 0 after populating fields from the EOCD area:
416 * directory_offset
417 * directory_map
418 * num_entries
419 */
420static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
421 ZipArchive* archive) {
422
423 // Test file length. We use lseek64 to make sure the file
424 // is small enough to be a zip file (Its size must be less than
425 // 0xffffffff bytes).
426 off64_t file_length = lseek64(fd, 0, SEEK_END);
427 if (file_length == -1) {
428 ALOGV("Zip: lseek on fd %d failed", fd);
429 return kInvalidFile;
430 }
431
432 if (file_length > (off64_t) 0xffffffff) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700433 ALOGV("Zip: zip file too long %" PRId64, file_length);
Narayan Kamath7462f022013-11-21 13:05:04 +0000434 return kInvalidFile;
435 }
436
437 if (file_length < (int64_t) kEOCDLen) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700438 ALOGV("Zip: length %" PRId64 " is too small to be zip", file_length);
Narayan Kamath7462f022013-11-21 13:05:04 +0000439 return kInvalidFile;
440 }
441
442 /*
443 * Perform the traditional EOCD snipe hunt.
444 *
445 * We're searching for the End of Central Directory magic number,
446 * which appears at the start of the EOCD block. It's followed by
447 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
448 * need to read the last part of the file into a buffer, dig through
449 * it to find the magic number, parse some values out, and use those
450 * to determine the extent of the CD.
451 *
452 * We start by pulling in the last part of the file.
453 */
454 uint32_t read_amount = kMaxEOCDSearch;
455 if (file_length < (off64_t) read_amount) {
456 read_amount = file_length;
457 }
458
459 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
460 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
461 file_length, read_amount, scan_buffer);
462
463 free(scan_buffer);
464 return result;
465}
466
467/*
468 * Parses the Zip archive's Central Directory. Allocates and populates the
469 * hash table.
470 *
471 * Returns 0 on success.
472 */
473static int32_t ParseZipArchive(ZipArchive* archive) {
474 int32_t result = -1;
Narayan Kamatheaf98852013-12-11 14:51:51 +0000475 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
476 size_t cd_length = archive->directory_map->getDataLength();
Narayan Kamath7462f022013-11-21 13:05:04 +0000477 uint16_t num_entries = archive->num_entries;
478
479 /*
480 * Create hash table. We have a minimum 75% load factor, possibly as
481 * low as 50% after we round off to a power of 2. There must be at
482 * least one unused entry to avoid an infinite loop during creation.
483 */
484 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
485 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
486 sizeof(ZipEntryName));
487
488 /*
489 * Walk through the central directory, adding entries to the hash
490 * table and verifying values.
491 */
492 const uint8_t* ptr = cd_ptr;
493 for (uint16_t i = 0; i < num_entries; i++) {
494 if (get4LE(ptr) != kCDESignature) {
495 ALOGW("Zip: missed a central dir sig (at %d)", i);
496 goto bail;
497 }
498
499 if (ptr + kCDELen > cd_ptr + cd_length) {
500 ALOGW("Zip: ran off the end (at %d)", i);
501 goto bail;
502 }
503
504 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
505 if (local_header_offset >= archive->directory_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700506 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %d", local_header_offset, i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000507 goto bail;
508 }
509
510 const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
511 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
512 const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
513
514 /* add the CDE filename to the hash table */
515 const int add_result = AddToHash(archive->hash_table,
516 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
517 if (add_result) {
518 ALOGW("Zip: Error adding entry to hash table %d", add_result);
519 result = add_result;
520 goto bail;
521 }
522
523 ptr += kCDELen + file_name_length + extra_length + comment_length;
524 if ((size_t)(ptr - cd_ptr) > cd_length) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700525 ALOGW("Zip: bad CD advance (%zu vs %zu) at entry %d",
526 (size_t) (ptr - cd_ptr), cd_length, i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000527 goto bail;
528 }
529 }
530 ALOGV("+++ zip good scan %d entries", num_entries);
531
532 result = 0;
533
534bail:
535 return result;
536}
537
538static int32_t OpenArchiveInternal(ZipArchive* archive,
539 const char* debug_file_name) {
540 int32_t result = -1;
541 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
542 return result;
543 }
544
545 if ((result = ParseZipArchive(archive))) {
546 return result;
547 }
548
549 return 0;
550}
551
552int32_t OpenArchiveFd(int fd, const char* debug_file_name,
553 ZipArchiveHandle* handle) {
554 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
555 memset(archive, 0, sizeof(*archive));
556 *handle = archive;
557
558 archive->fd = fd;
559
560 return OpenArchiveInternal(archive, debug_file_name);
561}
562
563int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
564 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
565 memset(archive, 0, sizeof(*archive));
566 *handle = archive;
567
568 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
569 if (fd < 0) {
570 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
571 return kIoError;
572 } else {
573 archive->fd = fd;
574 }
575
576 return OpenArchiveInternal(archive, fileName);
577}
578
579/*
580 * Close a ZipArchive, closing the file and freeing the contents.
581 */
582void CloseArchive(ZipArchiveHandle handle) {
583 ZipArchive* archive = (ZipArchive*) handle;
584 ALOGV("Closing archive %p", archive);
585
586 if (archive->fd >= 0) {
587 close(archive->fd);
588 }
589
Narayan Kamatheaf98852013-12-11 14:51:51 +0000590 if (archive->directory_map != NULL) {
591 archive->directory_map->release();
592 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000593 free(archive->hash_table);
Mathieu Chartier5f98b122014-03-04 17:39:38 -0800594 free(archive);
Narayan Kamath7462f022013-11-21 13:05:04 +0000595}
596
597static int32_t UpdateEntryFromDataDescriptor(int fd,
598 ZipEntry *entry) {
599 uint8_t ddBuf[kDDMaxLen];
600 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
601 if (actual != sizeof(ddBuf)) {
602 return kIoError;
603 }
604
605 const uint32_t ddSignature = get4LE(ddBuf);
606 uint16_t ddOffset = 0;
607 if (ddSignature == kDDOptSignature) {
608 ddOffset = 4;
609 }
610
611 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
612 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
613 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
614
615 return 0;
616}
617
618// Attempts to read |len| bytes into |buf| at offset |off|.
619//
620// This method uses pread64 on platforms that support it and
621// lseek64 + read on platforms that don't. This implies that
622// callers should not rely on the |fd| offset being incremented
623// as a side effect of this call.
624static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
625 off64_t off) {
626#ifdef HAVE_PREAD
627 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
628#else
629 // The only supported platform that doesn't support pread at the moment
630 // is Windows. Only recent versions of windows support unix like forks,
631 // and even there the semantics are quite different.
632 if (lseek64(fd, off, SEEK_SET) != off) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700633 ALOGW("Zip: failed seek to offset %" PRId64, off);
Narayan Kamath7462f022013-11-21 13:05:04 +0000634 return kIoError;
635 }
636
637 return TEMP_FAILURE_RETRY(read(fd, buf, len));
638#endif // HAVE_PREAD
639}
640
641static int32_t FindEntry(const ZipArchive* archive, const int ent,
642 ZipEntry* data) {
643 const uint16_t nameLen = archive->hash_table[ent].name_length;
644 const char* name = archive->hash_table[ent].name;
645
646 // Recover the start of the central directory entry from the filename
647 // pointer. The filename is the first entry past the fixed-size data,
648 // so we can just subtract back from that.
649 const unsigned char* ptr = (const unsigned char*) name;
650 ptr -= kCDELen;
651
652 // This is the base of our mmapped region, we have to sanity check that
653 // the name that's in the hash table is a pointer to a location within
654 // this mapped region.
655 const unsigned char* base_ptr = (const unsigned char*)
Narayan Kamatheaf98852013-12-11 14:51:51 +0000656 archive->directory_map->getDataPtr();
657 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000658 ALOGW("Zip: Invalid entry pointer");
659 return kInvalidOffset;
660 }
661
662 // The offset of the start of the central directory in the zipfile.
663 // We keep this lying around so that we can sanity check all our lengths
664 // and our per-file structures.
665 const off64_t cd_offset = archive->directory_offset;
666
667 // Fill out the compression method, modification time, crc32
668 // and other interesting attributes from the central directory. These
669 // will later be compared against values from the local file header.
670 data->method = get2LE(ptr + kCDEMethod);
671 data->mod_time = get4LE(ptr + kCDEModWhen);
672 data->crc32 = get4LE(ptr + kCDECRC);
673 data->compressed_length = get4LE(ptr + kCDECompLen);
674 data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
675
676 // Figure out the local header offset from the central directory. The
677 // actual file data will begin after the local header and the name /
678 // extra comments.
679 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
680 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
681 ALOGW("Zip: bad local hdr offset in zip");
682 return kInvalidOffset;
683 }
684
685 uint8_t lfh_buf[kLFHLen];
686 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
687 local_header_offset);
688 if (actual != sizeof(lfh_buf)) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700689 ALOGW("Zip: failed reading lfh name from offset %" PRId64, local_header_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000690 return kIoError;
691 }
692
693 if (get4LE(lfh_buf) != kLFHSignature) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700694 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
Narayan Kamath7462f022013-11-21 13:05:04 +0000695 local_header_offset);
696 return kInvalidOffset;
697 }
698
699 // Paranoia: Match the values specified in the local file header
700 // to those specified in the central directory.
701 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
702 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
703 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
704
705 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
706 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
707 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
708 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
709
710 data->has_data_descriptor = 0;
711 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
712 || data->crc32 != lfhCrc) {
713 ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}",
714 data->compressed_length, data->uncompressed_length, data->crc32,
715 lfhCompLen, lfhUncompLen, lfhCrc);
716 return kInconsistentInformation;
717 }
718 } else {
719 data->has_data_descriptor = 1;
720 }
721
722 // Check that the local file header name matches the declared
723 // name in the central directory.
724 if (lfhNameLen == nameLen) {
725 const off64_t name_offset = local_header_offset + kLFHLen;
726 if (name_offset + lfhNameLen >= cd_offset) {
727 ALOGW("Zip: Invalid declared length");
728 return kInvalidOffset;
729 }
730
731 uint8_t* name_buf = (uint8_t*) malloc(nameLen);
732 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
733 name_offset);
734
735 if (actual != nameLen) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700736 ALOGW("Zip: failed reading lfh name from offset %" PRId64, name_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000737 free(name_buf);
738 return kIoError;
739 }
740
741 if (memcmp(name, name_buf, nameLen)) {
742 free(name_buf);
743 return kInconsistentInformation;
744 }
745
746 free(name_buf);
747 } else {
748 ALOGW("Zip: lfh name did not match central directory.");
749 return kInconsistentInformation;
750 }
751
752 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
Narayan Kamath48953a12014-01-24 12:32:39 +0000753 if (data_offset > cd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700754 ALOGW("Zip: bad data offset %" PRId64 " in zip", data_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000755 return kInvalidOffset;
756 }
757
758 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700759 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %zd > %" PRId64 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +0000760 data_offset, data->compressed_length, cd_offset);
761 return kInvalidOffset;
762 }
763
764 if (data->method == kCompressStored &&
765 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700766 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %d > %" PRId64 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +0000767 data_offset, data->uncompressed_length, cd_offset);
768 return kInvalidOffset;
769 }
770
771 data->offset = data_offset;
772 return 0;
773}
774
775struct IterationHandle {
776 uint32_t position;
777 const char* prefix;
778 uint16_t prefix_len;
779 ZipArchive* archive;
780};
781
782int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
783 ZipArchive* archive = (ZipArchive *) handle;
784
785 if (archive == NULL || archive->hash_table == NULL) {
786 ALOGW("Zip: Invalid ZipArchiveHandle");
787 return kInvalidHandle;
788 }
789
790 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
791 cookie->position = 0;
792 cookie->prefix = prefix;
793 cookie->archive = archive;
794 if (prefix != NULL) {
795 cookie->prefix_len = strlen(prefix);
796 }
797
798 *cookie_ptr = cookie ;
799 return 0;
800}
801
802int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
803 ZipEntry* data) {
804 const ZipArchive* archive = (ZipArchive*) handle;
805 const int nameLen = strlen(entryName);
806 if (nameLen == 0 || nameLen > 65535) {
807 ALOGW("Zip: Invalid filename %s", entryName);
808 return kInvalidEntryName;
809 }
810
811 const int64_t ent = EntryToIndex(archive->hash_table,
812 archive->hash_table_size, entryName, nameLen);
813
814 if (ent < 0) {
Narayan Kamatha1ff8012013-12-31 10:27:59 +0000815 ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
Narayan Kamath7462f022013-11-21 13:05:04 +0000816 return ent;
817 }
818
819 return FindEntry(archive, ent, data);
820}
821
822int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
823 IterationHandle* handle = (IterationHandle *) cookie;
824 if (handle == NULL) {
825 return kInvalidHandle;
826 }
827
828 ZipArchive* archive = handle->archive;
829 if (archive == NULL || archive->hash_table == NULL) {
830 ALOGW("Zip: Invalid ZipArchiveHandle");
831 return kInvalidHandle;
832 }
833
834 const uint32_t currentOffset = handle->position;
835 const uint32_t hash_table_length = archive->hash_table_size;
836 const ZipEntryName *hash_table = archive->hash_table;
837
838 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
839 if (hash_table[i].name != NULL &&
840 (handle->prefix == NULL ||
841 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
842 handle->position = (i + 1);
843 const int error = FindEntry(archive, i, data);
844 if (!error) {
845 name->name = hash_table[i].name;
846 name->name_length = hash_table[i].name_length;
847 }
848
849 return error;
850 }
851 }
852
853 handle->position = 0;
854 return kIterationEnd;
855}
856
857static int32_t InflateToFile(int fd, const ZipEntry* entry,
858 uint8_t* begin, uint32_t length,
859 uint64_t* crc_out) {
860 int32_t result = -1;
861 const uint32_t kBufSize = 32768;
862 uint8_t read_buf[kBufSize];
863 uint8_t write_buf[kBufSize];
864 z_stream zstream;
865 int zerr;
866
867 /*
868 * Initialize the zlib stream struct.
869 */
870 memset(&zstream, 0, sizeof(zstream));
871 zstream.zalloc = Z_NULL;
872 zstream.zfree = Z_NULL;
873 zstream.opaque = Z_NULL;
874 zstream.next_in = NULL;
875 zstream.avail_in = 0;
876 zstream.next_out = (Bytef*) write_buf;
877 zstream.avail_out = kBufSize;
878 zstream.data_type = Z_UNKNOWN;
879
880 /*
881 * Use the undocumented "negative window bits" feature to tell zlib
882 * that there's no zlib header waiting for it.
883 */
884 zerr = inflateInit2(&zstream, -MAX_WBITS);
885 if (zerr != Z_OK) {
886 if (zerr == Z_VERSION_ERROR) {
887 ALOGE("Installed zlib is not compatible with linked version (%s)",
888 ZLIB_VERSION);
889 } else {
890 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
891 }
892
893 return kZlibError;
894 }
895
896 const uint32_t uncompressed_length = entry->uncompressed_length;
897
898 uint32_t compressed_length = entry->compressed_length;
899 uint32_t write_count = 0;
900 do {
901 /* read as much as we can */
902 if (zstream.avail_in == 0) {
903 const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
904 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
905 if (actual != getSize) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700906 ALOGW("Zip: inflate read failed (%zd vs %zd)", actual, getSize);
Narayan Kamath7462f022013-11-21 13:05:04 +0000907 result = kIoError;
908 goto z_bail;
909 }
910
911 compressed_length -= getSize;
912
913 zstream.next_in = read_buf;
914 zstream.avail_in = getSize;
915 }
916
917 /* uncompress the data */
918 zerr = inflate(&zstream, Z_NO_FLUSH);
919 if (zerr != Z_OK && zerr != Z_STREAM_END) {
920 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
921 zerr, zstream.next_in, zstream.avail_in,
922 zstream.next_out, zstream.avail_out);
923 result = kZlibError;
924 goto z_bail;
925 }
926
927 /* write when we're full or when we're done */
928 if (zstream.avail_out == 0 ||
929 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
930 const size_t write_size = zstream.next_out - write_buf;
931 // The file might have declared a bogus length.
932 if (write_size + write_count > length) {
933 goto z_bail;
934 }
935 memcpy(begin + write_count, write_buf, write_size);
936 write_count += write_size;
937
938 zstream.next_out = write_buf;
939 zstream.avail_out = kBufSize;
940 }
941 } while (zerr == Z_OK);
942
943 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
944
945 // stream.adler holds the crc32 value for such streams.
946 *crc_out = zstream.adler;
947
948 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700949 ALOGW("Zip: size mismatch on inflated file (%ld vs %u)",
Narayan Kamath7462f022013-11-21 13:05:04 +0000950 zstream.total_out, uncompressed_length);
951 result = kInconsistentInformation;
952 goto z_bail;
953 }
954
955 result = 0;
956
957z_bail:
958 inflateEnd(&zstream); /* free up any allocated structures */
959
960 return result;
961}
962
963int32_t ExtractToMemory(ZipArchiveHandle handle,
964 ZipEntry* entry, uint8_t* begin, uint32_t size) {
965 ZipArchive* archive = (ZipArchive*) handle;
966 const uint16_t method = entry->method;
967 off64_t data_offset = entry->offset;
968
969 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700970 ALOGW("Zip: lseek to data at %" PRId64 " failed", data_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000971 return kIoError;
972 }
973
974 // this should default to kUnknownCompressionMethod.
975 int32_t return_value = -1;
976 uint64_t crc = 0;
977 if (method == kCompressStored) {
978 return_value = CopyFileToFile(archive->fd, begin, size, &crc);
979 } else if (method == kCompressDeflated) {
980 return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
981 }
982
983 if (!return_value && entry->has_data_descriptor) {
984 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
985 if (return_value) {
986 return return_value;
987 }
988 }
989
990 // TODO: Fix this check by passing the right flags to inflate2 so that
991 // it calculates the CRC for us.
992 if (entry->crc32 != crc && false) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700993 ALOGW("Zip: crc mismatch: expected %u, was %" PRIu64, entry->crc32, crc);
Narayan Kamath7462f022013-11-21 13:05:04 +0000994 return kInconsistentInformation;
995 }
996
997 return return_value;
998}
999
1000int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1001 ZipEntry* entry, int fd) {
1002 const int32_t declared_length = entry->uncompressed_length;
1003
Narayan Kamath00a258c2013-12-13 16:06:19 +00001004 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1005 if (current_offset == -1) {
1006 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1007 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +00001008 return kIoError;
1009 }
1010
Narayan Kamath00a258c2013-12-13 16:06:19 +00001011 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1012 if (result == -1) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -07001013 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1014 declared_length + current_offset, strerror(errno));
Narayan Kamath00a258c2013-12-13 16:06:19 +00001015 return kIoError;
1016 }
1017
Narayan Kamath48953a12014-01-24 12:32:39 +00001018 // Don't attempt to map a region of length 0. We still need the
1019 // ftruncate() though, since the API guarantees that we will truncate
1020 // the file to the end of the uncompressed output.
1021 if (declared_length == 0) {
1022 return 0;
1023 }
1024
Narayan Kamath00a258c2013-12-13 16:06:19 +00001025 android::FileMap* map = MapFileSegment(fd, current_offset, declared_length,
Narayan Kamatheaf98852013-12-11 14:51:51 +00001026 false, kTempMappingFileName);
1027 if (map == NULL) {
1028 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +00001029 }
1030
Narayan Kamatheaf98852013-12-11 14:51:51 +00001031 const int32_t error = ExtractToMemory(handle, entry,
1032 reinterpret_cast<uint8_t*>(map->getDataPtr()),
1033 map->getDataLength());
1034 map->release();
Narayan Kamath7462f022013-11-21 13:05:04 +00001035 return error;
1036}
1037
1038const char* ErrorCodeString(int32_t error_code) {
1039 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1040 return kErrorMessages[error_code * -1];
1041 }
1042
1043 return kErrorMessages[0];
1044}
1045
1046int GetFileDescriptor(const ZipArchiveHandle handle) {
1047 return ((ZipArchive*) handle)->fd;
1048}
1049