blob: e820f2a5f13b73d0bc324cb69cb6bb3307acf0c5 [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
Narayan Kamath7462f022013-11-21 13:05:04 +000020
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -080021#include <memory>
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -070022#include <vector>
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -080023
Narayan Kamath7462f022013-11-21 13:05:04 +000024#include <assert.h>
25#include <errno.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070026#include <fcntl.h>
27#include <inttypes.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000028#include <limits.h>
29#include <log/log.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000030#include <stdlib.h>
31#include <string.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000032#include <unistd.h>
Mark Salyzyn51d562d2014-05-05 14:38:05 -070033#include <utils/Compat.h>
Narayan Kamatheaf98852013-12-11 14:51:51 +000034#include <utils/FileMap.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070035#include <zlib.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000036
37#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
38
Narayan Kamath044bc8e2014-12-03 18:22:53 +000039#include "entry_name_utils-inl.h"
Mark Salyzyn99ef9912014-03-14 14:26:22 -070040#include "ziparchive/zip_archive.h"
41
Narayan Kamath044bc8e2014-12-03 18:22:53 +000042
Narayan Kamath926973e2014-06-09 14:18:14 +010043// This is for windows. If we don't open a file in binary mode, weird
Narayan Kamath7462f022013-11-21 13:05:04 +000044// things will happen.
45#ifndef O_BINARY
46#define O_BINARY 0
47#endif
48
Narayan Kamath926973e2014-06-09 14:18:14 +010049#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
50 TypeName(); \
51 TypeName(const TypeName&); \
52 void operator=(const TypeName&)
Narayan Kamath7462f022013-11-21 13:05:04 +000053
Narayan Kamath926973e2014-06-09 14:18:14 +010054// The "end of central directory" (EOCD) record. Each archive
55// contains exactly once such record which appears at the end of
56// the archive. It contains archive wide information like the
57// number of entries in the archive and the offset to the central
58// directory of the offset.
59struct EocdRecord {
60 static const uint32_t kSignature = 0x06054b50;
Narayan Kamath7462f022013-11-21 13:05:04 +000061
Narayan Kamath926973e2014-06-09 14:18:14 +010062 // End of central directory signature, should always be
63 // |kSignature|.
64 uint32_t eocd_signature;
65 // The number of the current "disk", i.e, the "disk" that this
66 // central directory is on.
67 //
68 // This implementation assumes that each archive spans a single
69 // disk only. i.e, that disk_num == 1.
70 uint16_t disk_num;
71 // The disk where the central directory starts.
72 //
73 // This implementation assumes that each archive spans a single
74 // disk only. i.e, that cd_start_disk == 1.
75 uint16_t cd_start_disk;
76 // The number of central directory records on this disk.
77 //
78 // This implementation assumes that each archive spans a single
79 // disk only. i.e, that num_records_on_disk == num_records.
80 uint16_t num_records_on_disk;
81 // The total number of central directory records.
82 uint16_t num_records;
83 // The size of the central directory (in bytes).
84 uint32_t cd_size;
85 // The offset of the start of the central directory, relative
86 // to the start of the file.
87 uint32_t cd_start_offset;
88 // Length of the central directory comment.
89 uint16_t comment_length;
90 private:
91 DISALLOW_IMPLICIT_CONSTRUCTORS(EocdRecord);
92} __attribute__((packed));
Narayan Kamath7462f022013-11-21 13:05:04 +000093
Narayan Kamath926973e2014-06-09 14:18:14 +010094// A structure representing the fixed length fields for a single
95// record in the central directory of the archive. In addition to
96// the fixed length fields listed here, each central directory
97// record contains a variable length "file_name" and "extra_field"
98// whose lengths are given by |file_name_length| and |extra_field_length|
99// respectively.
100struct CentralDirectoryRecord {
101 static const uint32_t kSignature = 0x02014b50;
Narayan Kamath7462f022013-11-21 13:05:04 +0000102
Narayan Kamath926973e2014-06-09 14:18:14 +0100103 // The start of record signature. Must be |kSignature|.
104 uint32_t record_signature;
105 // Tool version. Ignored by this implementation.
106 uint16_t version_made_by;
107 // Tool version. Ignored by this implementation.
108 uint16_t version_needed;
109 // The "general purpose bit flags" for this entry. The only
110 // flag value that we currently check for is the "data descriptor"
111 // flag.
112 uint16_t gpb_flags;
113 // The compression method for this entry, one of |kCompressStored|
114 // and |kCompressDeflated|.
115 uint16_t compression_method;
116 // The file modification time and date for this entry.
117 uint16_t last_mod_time;
118 uint16_t last_mod_date;
119 // The CRC-32 checksum for this entry.
120 uint32_t crc32;
121 // The compressed size (in bytes) of this entry.
122 uint32_t compressed_size;
123 // The uncompressed size (in bytes) of this entry.
124 uint32_t uncompressed_size;
125 // The length of the entry file name in bytes. The file name
126 // will appear immediately after this record.
127 uint16_t file_name_length;
128 // The length of the extra field info (in bytes). This data
129 // will appear immediately after the entry file name.
130 uint16_t extra_field_length;
131 // The length of the entry comment (in bytes). This data will
132 // appear immediately after the extra field.
133 uint16_t comment_length;
134 // The start disk for this entry. Ignored by this implementation).
135 uint16_t file_start_disk;
136 // File attributes. Ignored by this implementation.
137 uint16_t internal_file_attributes;
138 // File attributes. Ignored by this implementation.
139 uint32_t external_file_attributes;
140 // The offset to the local file header for this entry, from the
141 // beginning of this archive.
142 uint32_t local_file_header_offset;
143 private:
144 DISALLOW_IMPLICIT_CONSTRUCTORS(CentralDirectoryRecord);
145} __attribute__((packed));
Narayan Kamath7462f022013-11-21 13:05:04 +0000146
Narayan Kamath926973e2014-06-09 14:18:14 +0100147// The local file header for a given entry. This duplicates information
148// present in the central directory of the archive. It is an error for
149// the information here to be different from the central directory
150// information for a given entry.
151struct LocalFileHeader {
152 static const uint32_t kSignature = 0x04034b50;
Narayan Kamath7462f022013-11-21 13:05:04 +0000153
Narayan Kamath926973e2014-06-09 14:18:14 +0100154 // The local file header signature, must be |kSignature|.
155 uint32_t lfh_signature;
156 // Tool version. Ignored by this implementation.
157 uint16_t version_needed;
158 // The "general purpose bit flags" for this entry. The only
159 // flag value that we currently check for is the "data descriptor"
160 // flag.
161 uint16_t gpb_flags;
162 // The compression method for this entry, one of |kCompressStored|
163 // and |kCompressDeflated|.
164 uint16_t compression_method;
165 // The file modification time and date for this entry.
166 uint16_t last_mod_time;
167 uint16_t last_mod_date;
168 // The CRC-32 checksum for this entry.
169 uint32_t crc32;
170 // The compressed size (in bytes) of this entry.
171 uint32_t compressed_size;
172 // The uncompressed size (in bytes) of this entry.
173 uint32_t uncompressed_size;
174 // The length of the entry file name in bytes. The file name
175 // will appear immediately after this record.
176 uint16_t file_name_length;
177 // The length of the extra field info (in bytes). This data
178 // will appear immediately after the entry file name.
179 uint16_t extra_field_length;
180 private:
181 DISALLOW_IMPLICIT_CONSTRUCTORS(LocalFileHeader);
182} __attribute__((packed));
183
184struct DataDescriptor {
185 // The *optional* data descriptor start signature.
186 static const uint32_t kOptSignature = 0x08074b50;
187
188 // CRC-32 checksum of the entry.
189 uint32_t crc32;
190 // Compressed size of the entry.
191 uint32_t compressed_size;
192 // Uncompressed size of the entry.
193 uint32_t uncompressed_size;
194 private:
195 DISALLOW_IMPLICIT_CONSTRUCTORS(DataDescriptor);
196} __attribute__((packed));
197
198#undef DISALLOW_IMPLICIT_CONSTRUCTORS
199
Piotr Jastrzebskibd0a7482014-08-13 09:49:25 +0000200static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
Narayan Kamath7462f022013-11-21 13:05:04 +0000201
Narayan Kamath926973e2014-06-09 14:18:14 +0100202// The maximum size of a central directory or a file
203// comment in bytes.
204static const uint32_t kMaxCommentLen = 65535;
205
206// The maximum number of bytes to scan backwards for the EOCD start.
207static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
208
Narayan Kamath7462f022013-11-21 13:05:04 +0000209static const char* kErrorMessages[] = {
210 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000211 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +0000212 "Zlib error",
213 "Invalid file",
214 "Invalid handle",
215 "Duplicate entries in archive",
216 "Empty archive",
217 "Entry not found",
218 "Invalid offset",
219 "Inconsistent information",
220 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000221 "I/O Error",
Narayan Kamatheaf98852013-12-11 14:51:51 +0000222 "File mapping failed"
Narayan Kamath7462f022013-11-21 13:05:04 +0000223};
224
225static const int32_t kErrorMessageUpperBound = 0;
226
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000227static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000228
229// We encountered a Zlib error when inflating a stream from this file.
230// Usually indicates file corruption.
231static const int32_t kZlibError = -2;
232
233// The input file cannot be processed as a zip archive. Usually because
234// it's too small, too large or does not have a valid signature.
235static const int32_t kInvalidFile = -3;
236
237// An invalid iteration / ziparchive handle was passed in as an input
238// argument.
239static const int32_t kInvalidHandle = -4;
240
241// The zip archive contained two (or possibly more) entries with the same
242// name.
243static const int32_t kDuplicateEntry = -5;
244
245// The zip archive contains no entries.
246static const int32_t kEmptyArchive = -6;
247
248// The specified entry was not found in the archive.
249static const int32_t kEntryNotFound = -7;
250
251// The zip archive contained an invalid local file header pointer.
252static const int32_t kInvalidOffset = -8;
253
254// The zip archive contained inconsistent entry information. This could
255// be because the central directory & local file header did not agree, or
256// if the actual uncompressed length or crc32 do not match their declared
257// values.
258static const int32_t kInconsistentInformation = -9;
259
260// An invalid entry name was encountered.
261static const int32_t kInvalidEntryName = -10;
262
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000263// An I/O related system call (read, lseek, ftruncate, map) failed.
264static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000265
Narayan Kamatheaf98852013-12-11 14:51:51 +0000266// We were not able to mmap the central directory or entry contents.
267static const int32_t kMmapFailed = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000268
Narayan Kamatheaf98852013-12-11 14:51:51 +0000269static const int32_t kErrorMessageLowerBound = -13;
Narayan Kamath7462f022013-11-21 13:05:04 +0000270
Narayan Kamatheaf98852013-12-11 14:51:51 +0000271static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
Narayan Kamath7462f022013-11-21 13:05:04 +0000272
273/*
274 * A Read-only Zip archive.
275 *
276 * We want "open" and "find entry by name" to be fast operations, and
277 * we want to use as little memory as possible. We memory-map the zip
278 * central directory, and load a hash table with pointers to the filenames
279 * (which aren't null-terminated). The other fields are at a fixed offset
280 * from the filename, so we don't need to extract those (but we do need
281 * to byte-read and endian-swap them every time we want them).
282 *
283 * It's possible that somebody has handed us a massive (~1GB) zip archive,
284 * so we can't expect to mmap the entire file.
285 *
286 * To speed comparisons when doing a lookup by name, we could make the mapping
287 * "private" (copy-on-write) and null-terminate the filenames after verifying
288 * the record structure. However, this requires a private mapping of
289 * every page that the Central Directory touches. Easier to tuck a copy
290 * of the string length into the hash table entry.
291 */
292struct ZipArchive {
293 /* open Zip archive */
Neil Fullerb1a113f2014-07-25 14:43:04 +0100294 const int fd;
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700295 const bool close_file;
Narayan Kamath7462f022013-11-21 13:05:04 +0000296
297 /* mapped central directory area */
298 off64_t directory_offset;
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800299 android::FileMap directory_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000300
301 /* number of entries in the Zip archive */
302 uint16_t num_entries;
303
304 /*
305 * We know how many entries are in the Zip archive, so we can have a
306 * fixed-size hash table. We define a load factor of 0.75 and overallocat
307 * so the maximum number entries can never be higher than
308 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
309 */
310 uint32_t hash_table_size;
311 ZipEntryName* hash_table;
Neil Fullerb1a113f2014-07-25 14:43:04 +0100312
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700313 ZipArchive(const int fd, bool assume_ownership) :
Neil Fullerb1a113f2014-07-25 14:43:04 +0100314 fd(fd),
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700315 close_file(assume_ownership),
Neil Fullerb1a113f2014-07-25 14:43:04 +0100316 directory_offset(0),
Neil Fullerb1a113f2014-07-25 14:43:04 +0100317 num_entries(0),
318 hash_table_size(0),
319 hash_table(NULL) {}
320
321 ~ZipArchive() {
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700322 if (close_file && fd >= 0) {
Neil Fullerb1a113f2014-07-25 14:43:04 +0100323 close(fd);
324 }
325
Neil Fullerb1a113f2014-07-25 14:43:04 +0100326 free(hash_table);
327 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000328};
329
Narayan Kamath7462f022013-11-21 13:05:04 +0000330static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
331 static const uint32_t kBufSize = 32768;
332 uint8_t buf[kBufSize];
333
334 uint32_t count = 0;
335 uint64_t crc = 0;
Narayan Kamath58aaf462013-12-10 16:47:14 +0000336 while (count < length) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000337 uint32_t remaining = length - count;
338
339 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
340 // value.
341 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
342 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
343
344 if (actual != get_size) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -0700345 ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
Narayan Kamath7462f022013-11-21 13:05:04 +0000346 return kIoError;
347 }
348
349 memcpy(begin + count, buf, get_size);
350 crc = crc32(crc, buf, get_size);
351 count += get_size;
352 }
353
354 *crc_out = crc;
355
356 return 0;
357}
358
359/*
360 * Round up to the next highest power of 2.
361 *
362 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
363 */
364static uint32_t RoundUpPower2(uint32_t val) {
365 val--;
366 val |= val >> 1;
367 val |= val >> 2;
368 val |= val >> 4;
369 val |= val >> 8;
370 val |= val >> 16;
371 val++;
372
373 return val;
374}
375
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100376static uint32_t ComputeHash(const ZipEntryName& name) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000377 uint32_t hash = 0;
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100378 uint16_t len = name.name_length;
379 const uint8_t* str = name.name;
Narayan Kamath7462f022013-11-21 13:05:04 +0000380
381 while (len--) {
382 hash = hash * 31 + *str++;
383 }
384
385 return hash;
386}
387
388/*
389 * Convert a ZipEntry to a hash table index, verifying that it's in a
390 * valid range.
391 */
392static int64_t EntryToIndex(const ZipEntryName* hash_table,
393 const uint32_t hash_table_size,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100394 const ZipEntryName& name) {
395 const uint32_t hash = ComputeHash(name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000396
397 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
398 uint32_t ent = hash & (hash_table_size - 1);
399 while (hash_table[ent].name != NULL) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100400 if (hash_table[ent].name_length == name.name_length &&
401 memcmp(hash_table[ent].name, name.name, name.name_length) == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000402 return ent;
403 }
404
405 ent = (ent + 1) & (hash_table_size - 1);
406 }
407
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100408 ALOGV("Zip: Unable to find entry %.*s", name.name_length, name.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000409 return kEntryNotFound;
410}
411
412/*
413 * Add a new entry to the hash table.
414 */
415static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100416 const ZipEntryName& name) {
417 const uint64_t hash = ComputeHash(name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000418 uint32_t ent = hash & (hash_table_size - 1);
419
420 /*
421 * We over-allocated the table, so we're guaranteed to find an empty slot.
422 * Further, we guarantee that the hashtable size is not 0.
423 */
424 while (hash_table[ent].name != NULL) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100425 if (hash_table[ent].name_length == name.name_length &&
426 memcmp(hash_table[ent].name, name.name, name.name_length) == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000427 // We've found a duplicate entry. We don't accept it
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100428 ALOGW("Zip: Found duplicate entry %.*s", name.name_length, name.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000429 return kDuplicateEntry;
430 }
431 ent = (ent + 1) & (hash_table_size - 1);
432 }
433
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100434 hash_table[ent].name = name.name;
435 hash_table[ent].name_length = name.name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000436 return 0;
437}
438
Narayan Kamath7462f022013-11-21 13:05:04 +0000439static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
440 ZipArchive* archive, off64_t file_length,
Narayan Kamath926973e2014-06-09 14:18:14 +0100441 off64_t read_amount, uint8_t* scan_buffer) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000442 const off64_t search_start = file_length - read_amount;
443
444 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100445 ALOGW("Zip: seek %" PRId64 " failed: %s", static_cast<int64_t>(search_start),
446 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000447 return kIoError;
448 }
Narayan Kamath926973e2014-06-09 14:18:14 +0100449 ssize_t actual = TEMP_FAILURE_RETRY(
450 read(fd, scan_buffer, static_cast<size_t>(read_amount)));
451 if (actual != static_cast<ssize_t>(read_amount)) {
452 ALOGW("Zip: read %" PRId64 " failed: %s", static_cast<int64_t>(read_amount),
453 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000454 return kIoError;
455 }
456
457 /*
458 * Scan backward for the EOCD magic. In an archive without a trailing
459 * comment, we'll find it on the first try. (We may want to consider
460 * doing an initial minimal read; if we don't find it, retry with a
461 * second read as above.)
462 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100463 int i = read_amount - sizeof(EocdRecord);
464 for (; i >= 0; i--) {
465 if (scan_buffer[i] == 0x50 &&
466 ((*reinterpret_cast<uint32_t*>(&scan_buffer[i])) == EocdRecord::kSignature)) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000467 ALOGV("+++ Found EOCD at buf+%d", i);
468 break;
469 }
470 }
471 if (i < 0) {
472 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
473 return kInvalidFile;
474 }
475
476 const off64_t eocd_offset = search_start + i;
Narayan Kamath926973e2014-06-09 14:18:14 +0100477 const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000478 /*
Narayan Kamath926973e2014-06-09 14:18:14 +0100479 * Verify that there's no trailing space at the end of the central directory
480 * and its comment.
Narayan Kamath7462f022013-11-21 13:05:04 +0000481 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100482 const off64_t calculated_length = eocd_offset + sizeof(EocdRecord)
483 + eocd->comment_length;
484 if (calculated_length != file_length) {
Narayan Kamath4f6b4992014-06-03 13:59:23 +0100485 ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
Narayan Kamath926973e2014-06-09 14:18:14 +0100486 static_cast<int64_t>(file_length - calculated_length));
Narayan Kamath4f6b4992014-06-03 13:59:23 +0100487 return kInvalidFile;
488 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000489
Narayan Kamath926973e2014-06-09 14:18:14 +0100490 /*
491 * Grab the CD offset and size, and the number of entries in the
492 * archive and verify that they look reasonable.
493 */
494 if (eocd->cd_start_offset + eocd->cd_size > eocd_offset) {
495 ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
496 eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000497 return kInvalidOffset;
498 }
Narayan Kamath926973e2014-06-09 14:18:14 +0100499 if (eocd->num_records == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000500 ALOGW("Zip: empty archive?");
501 return kEmptyArchive;
502 }
503
Narayan Kamath926973e2014-06-09 14:18:14 +0100504 ALOGV("+++ num_entries=%" PRIu32 "dir_size=%" PRIu32 " dir_offset=%" PRIu32,
505 eocd->num_records, eocd->cd_size, eocd->cd_start_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000506
507 /*
508 * It all looks good. Create a mapping for the CD, and set the fields
509 * in archive.
510 */
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800511 if (!archive->directory_map.create(debug_file_name, fd,
512 static_cast<off64_t>(eocd->cd_start_offset),
513 static_cast<size_t>(eocd->cd_size), true /* read only */) ) {
Narayan Kamatheaf98852013-12-11 14:51:51 +0000514 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +0000515 }
516
Narayan Kamath926973e2014-06-09 14:18:14 +0100517 archive->num_entries = eocd->num_records;
518 archive->directory_offset = eocd->cd_start_offset;
Narayan Kamath7462f022013-11-21 13:05:04 +0000519
520 return 0;
521}
522
523/*
524 * Find the zip Central Directory and memory-map it.
525 *
526 * On success, returns 0 after populating fields from the EOCD area:
527 * directory_offset
528 * directory_map
529 * num_entries
530 */
531static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
532 ZipArchive* archive) {
533
534 // Test file length. We use lseek64 to make sure the file
535 // is small enough to be a zip file (Its size must be less than
536 // 0xffffffff bytes).
537 off64_t file_length = lseek64(fd, 0, SEEK_END);
538 if (file_length == -1) {
539 ALOGV("Zip: lseek on fd %d failed", fd);
540 return kInvalidFile;
541 }
542
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800543 if (file_length > static_cast<off64_t>(0xffffffff)) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100544 ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
Narayan Kamath7462f022013-11-21 13:05:04 +0000545 return kInvalidFile;
546 }
547
Narayan Kamath926973e2014-06-09 14:18:14 +0100548 if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
549 ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
Narayan Kamath7462f022013-11-21 13:05:04 +0000550 return kInvalidFile;
551 }
552
553 /*
554 * Perform the traditional EOCD snipe hunt.
555 *
556 * We're searching for the End of Central Directory magic number,
557 * which appears at the start of the EOCD block. It's followed by
558 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
559 * need to read the last part of the file into a buffer, dig through
560 * it to find the magic number, parse some values out, and use those
561 * to determine the extent of the CD.
562 *
563 * We start by pulling in the last part of the file.
564 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100565 off64_t read_amount = kMaxEOCDSearch;
566 if (file_length < read_amount) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000567 read_amount = file_length;
568 }
569
Narayan Kamath926973e2014-06-09 14:18:14 +0100570 uint8_t* scan_buffer = reinterpret_cast<uint8_t*>(malloc(read_amount));
Narayan Kamath7462f022013-11-21 13:05:04 +0000571 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
572 file_length, read_amount, scan_buffer);
573
574 free(scan_buffer);
575 return result;
576}
577
578/*
579 * Parses the Zip archive's Central Directory. Allocates and populates the
580 * hash table.
581 *
582 * Returns 0 on success.
583 */
584static int32_t ParseZipArchive(ZipArchive* archive) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800585 const uint8_t* const cd_ptr =
586 reinterpret_cast<const uint8_t*>(archive->directory_map.getDataPtr());
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800587 const size_t cd_length = archive->directory_map.getDataLength();
Narayan Kamath926973e2014-06-09 14:18:14 +0100588 const uint16_t num_entries = archive->num_entries;
Narayan Kamath7462f022013-11-21 13:05:04 +0000589
590 /*
591 * Create hash table. We have a minimum 75% load factor, possibly as
592 * low as 50% after we round off to a power of 2. There must be at
593 * least one unused entry to avoid an infinite loop during creation.
594 */
595 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800596 archive->hash_table = reinterpret_cast<ZipEntryName*>(calloc(archive->hash_table_size,
597 sizeof(ZipEntryName)));
Narayan Kamath7462f022013-11-21 13:05:04 +0000598
599 /*
600 * Walk through the central directory, adding entries to the hash
601 * table and verifying values.
602 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100603 const uint8_t* const cd_end = cd_ptr + cd_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000604 const uint8_t* ptr = cd_ptr;
605 for (uint16_t i = 0; i < num_entries; i++) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100606 const CentralDirectoryRecord* cdr =
607 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
608 if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700609 ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800610 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000611 }
612
Narayan Kamath926973e2014-06-09 14:18:14 +0100613 if (ptr + sizeof(CentralDirectoryRecord) > cd_end) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700614 ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800615 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000616 }
617
Narayan Kamath926973e2014-06-09 14:18:14 +0100618 const off64_t local_header_offset = cdr->local_file_header_offset;
Narayan Kamath7462f022013-11-21 13:05:04 +0000619 if (local_header_offset >= archive->directory_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800620 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16,
621 static_cast<int64_t>(local_header_offset), i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800622 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000623 }
624
Narayan Kamath926973e2014-06-09 14:18:14 +0100625 const uint16_t file_name_length = cdr->file_name_length;
626 const uint16_t extra_length = cdr->extra_field_length;
627 const uint16_t comment_length = cdr->comment_length;
Piotr Jastrzebski78271ba2014-08-15 12:53:00 +0100628 const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
629
Narayan Kamath044bc8e2014-12-03 18:22:53 +0000630 /* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */
631 if (!IsValidEntryName(file_name, file_name_length)) {
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800632 return -1;
Piotr Jastrzebski78271ba2014-08-15 12:53:00 +0100633 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000634
635 /* add the CDE filename to the hash table */
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100636 ZipEntryName entry_name;
637 entry_name.name = file_name;
638 entry_name.name_length = file_name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000639 const int add_result = AddToHash(archive->hash_table,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100640 archive->hash_table_size, entry_name);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800641 if (add_result != 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000642 ALOGW("Zip: Error adding entry to hash table %d", add_result);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800643 return add_result;
Narayan Kamath7462f022013-11-21 13:05:04 +0000644 }
645
Narayan Kamath926973e2014-06-09 14:18:14 +0100646 ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
647 if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700648 ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
649 ptr - cd_ptr, cd_length, i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800650 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000651 }
652 }
Mark Salyzyn088bf902014-05-08 16:02:20 -0700653 ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
Narayan Kamath7462f022013-11-21 13:05:04 +0000654
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800655 return 0;
Narayan Kamath7462f022013-11-21 13:05:04 +0000656}
657
658static int32_t OpenArchiveInternal(ZipArchive* archive,
659 const char* debug_file_name) {
660 int32_t result = -1;
661 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
662 return result;
663 }
664
665 if ((result = ParseZipArchive(archive))) {
666 return result;
667 }
668
669 return 0;
670}
671
672int32_t OpenArchiveFd(int fd, const char* debug_file_name,
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700673 ZipArchiveHandle* handle, bool assume_ownership) {
674 ZipArchive* archive = new ZipArchive(fd, assume_ownership);
Narayan Kamath7462f022013-11-21 13:05:04 +0000675 *handle = archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000676 return OpenArchiveInternal(archive, debug_file_name);
677}
678
679int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
Neil Fullerb1a113f2014-07-25 14:43:04 +0100680 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700681 ZipArchive* archive = new ZipArchive(fd, true);
Narayan Kamath7462f022013-11-21 13:05:04 +0000682 *handle = archive;
683
Narayan Kamath7462f022013-11-21 13:05:04 +0000684 if (fd < 0) {
685 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
686 return kIoError;
Narayan Kamath7462f022013-11-21 13:05:04 +0000687 }
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700688
Narayan Kamath7462f022013-11-21 13:05:04 +0000689 return OpenArchiveInternal(archive, fileName);
690}
691
692/*
693 * Close a ZipArchive, closing the file and freeing the contents.
694 */
695void CloseArchive(ZipArchiveHandle handle) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800696 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +0000697 ALOGV("Closing archive %p", archive);
Neil Fullerb1a113f2014-07-25 14:43:04 +0100698 delete archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000699}
700
701static int32_t UpdateEntryFromDataDescriptor(int fd,
702 ZipEntry *entry) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100703 uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
Narayan Kamath7462f022013-11-21 13:05:04 +0000704 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
705 if (actual != sizeof(ddBuf)) {
706 return kIoError;
707 }
708
Narayan Kamath926973e2014-06-09 14:18:14 +0100709 const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
710 const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
711 const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000712
Narayan Kamath926973e2014-06-09 14:18:14 +0100713 entry->crc32 = descriptor->crc32;
714 entry->compressed_length = descriptor->compressed_size;
715 entry->uncompressed_length = descriptor->uncompressed_size;
Narayan Kamath7462f022013-11-21 13:05:04 +0000716
717 return 0;
718}
719
720// Attempts to read |len| bytes into |buf| at offset |off|.
721//
722// This method uses pread64 on platforms that support it and
723// lseek64 + read on platforms that don't. This implies that
724// callers should not rely on the |fd| offset being incremented
725// as a side effect of this call.
726static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
727 off64_t off) {
Yabin Cui70160f42014-11-19 20:47:18 -0800728#if !defined(_WIN32)
Narayan Kamath7462f022013-11-21 13:05:04 +0000729 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
730#else
731 // The only supported platform that doesn't support pread at the moment
732 // is Windows. Only recent versions of windows support unix like forks,
733 // and even there the semantics are quite different.
734 if (lseek64(fd, off, SEEK_SET) != off) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700735 ALOGW("Zip: failed seek to offset %" PRId64, off);
Narayan Kamath7462f022013-11-21 13:05:04 +0000736 return kIoError;
737 }
738
739 return TEMP_FAILURE_RETRY(read(fd, buf, len));
Yabin Cui70160f42014-11-19 20:47:18 -0800740#endif
Narayan Kamath7462f022013-11-21 13:05:04 +0000741}
742
743static int32_t FindEntry(const ZipArchive* archive, const int ent,
744 ZipEntry* data) {
745 const uint16_t nameLen = archive->hash_table[ent].name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000746
747 // Recover the start of the central directory entry from the filename
748 // pointer. The filename is the first entry past the fixed-size data,
749 // so we can just subtract back from that.
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100750 const uint8_t* ptr = archive->hash_table[ent].name;
Narayan Kamath926973e2014-06-09 14:18:14 +0100751 ptr -= sizeof(CentralDirectoryRecord);
Narayan Kamath7462f022013-11-21 13:05:04 +0000752
753 // This is the base of our mmapped region, we have to sanity check that
754 // the name that's in the hash table is a pointer to a location within
755 // this mapped region.
Narayan Kamath926973e2014-06-09 14:18:14 +0100756 const uint8_t* base_ptr = reinterpret_cast<const uint8_t*>(
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800757 archive->directory_map.getDataPtr());
758 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map.getDataLength()) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000759 ALOGW("Zip: Invalid entry pointer");
760 return kInvalidOffset;
761 }
762
Narayan Kamath926973e2014-06-09 14:18:14 +0100763 const CentralDirectoryRecord *cdr =
764 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
765
Narayan Kamath7462f022013-11-21 13:05:04 +0000766 // The offset of the start of the central directory in the zipfile.
767 // We keep this lying around so that we can sanity check all our lengths
768 // and our per-file structures.
769 const off64_t cd_offset = archive->directory_offset;
770
771 // Fill out the compression method, modification time, crc32
772 // and other interesting attributes from the central directory. These
773 // will later be compared against values from the local file header.
Narayan Kamath926973e2014-06-09 14:18:14 +0100774 data->method = cdr->compression_method;
775 data->mod_time = cdr->last_mod_time;
776 data->crc32 = cdr->crc32;
777 data->compressed_length = cdr->compressed_size;
778 data->uncompressed_length = cdr->uncompressed_size;
Narayan Kamath7462f022013-11-21 13:05:04 +0000779
780 // Figure out the local header offset from the central directory. The
781 // actual file data will begin after the local header and the name /
782 // extra comments.
Narayan Kamath926973e2014-06-09 14:18:14 +0100783 const off64_t local_header_offset = cdr->local_file_header_offset;
784 if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000785 ALOGW("Zip: bad local hdr offset in zip");
786 return kInvalidOffset;
787 }
788
Narayan Kamath926973e2014-06-09 14:18:14 +0100789 uint8_t lfh_buf[sizeof(LocalFileHeader)];
Narayan Kamath7462f022013-11-21 13:05:04 +0000790 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
791 local_header_offset);
792 if (actual != sizeof(lfh_buf)) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800793 ALOGW("Zip: failed reading lfh name from offset %" PRId64,
794 static_cast<int64_t>(local_header_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000795 return kIoError;
796 }
797
Narayan Kamath926973e2014-06-09 14:18:14 +0100798 const LocalFileHeader *lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
799
800 if (lfh->lfh_signature != LocalFileHeader::kSignature) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700801 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
Narayan Kamath926973e2014-06-09 14:18:14 +0100802 static_cast<int64_t>(local_header_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000803 return kInvalidOffset;
804 }
805
806 // Paranoia: Match the values specified in the local file header
807 // to those specified in the central directory.
Narayan Kamath926973e2014-06-09 14:18:14 +0100808 if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000809 data->has_data_descriptor = 0;
Narayan Kamath926973e2014-06-09 14:18:14 +0100810 if (data->compressed_length != lfh->compressed_size
811 || data->uncompressed_length != lfh->uncompressed_size
812 || data->crc32 != lfh->crc32) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700813 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
814 ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
Narayan Kamath7462f022013-11-21 13:05:04 +0000815 data->compressed_length, data->uncompressed_length, data->crc32,
Narayan Kamath926973e2014-06-09 14:18:14 +0100816 lfh->compressed_size, lfh->uncompressed_size, lfh->crc32);
Narayan Kamath7462f022013-11-21 13:05:04 +0000817 return kInconsistentInformation;
818 }
819 } else {
820 data->has_data_descriptor = 1;
821 }
822
823 // Check that the local file header name matches the declared
824 // name in the central directory.
Narayan Kamath926973e2014-06-09 14:18:14 +0100825 if (lfh->file_name_length == nameLen) {
826 const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
827 if (name_offset + lfh->file_name_length >= cd_offset) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000828 ALOGW("Zip: Invalid declared length");
829 return kInvalidOffset;
830 }
831
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800832 uint8_t* name_buf = reinterpret_cast<uint8_t*>(malloc(nameLen));
Narayan Kamath7462f022013-11-21 13:05:04 +0000833 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
834 name_offset);
835
836 if (actual != nameLen) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800837 ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000838 free(name_buf);
839 return kIoError;
840 }
841
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100842 if (memcmp(archive->hash_table[ent].name, name_buf, nameLen)) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000843 free(name_buf);
844 return kInconsistentInformation;
845 }
846
847 free(name_buf);
848 } else {
849 ALOGW("Zip: lfh name did not match central directory.");
850 return kInconsistentInformation;
851 }
852
Narayan Kamath926973e2014-06-09 14:18:14 +0100853 const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader)
854 + lfh->file_name_length + lfh->extra_field_length;
Narayan Kamath48953a12014-01-24 12:32:39 +0000855 if (data_offset > cd_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800856 ALOGW("Zip: bad data offset %" PRId64 " in zip", static_cast<int64_t>(data_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000857 return kInvalidOffset;
858 }
859
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800860 if (static_cast<off64_t>(data_offset + data->compressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700861 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800862 static_cast<int64_t>(data_offset), data->compressed_length, static_cast<int64_t>(cd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000863 return kInvalidOffset;
864 }
865
866 if (data->method == kCompressStored &&
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800867 static_cast<off64_t>(data_offset + data->uncompressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700868 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800869 static_cast<int64_t>(data_offset), data->uncompressed_length,
870 static_cast<int64_t>(cd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000871 return kInvalidOffset;
872 }
873
874 data->offset = data_offset;
875 return 0;
876}
877
878struct IterationHandle {
879 uint32_t position;
Piotr Jastrzebski10aa9a02014-08-19 09:01:20 +0100880 // We're not using vector here because this code is used in the Windows SDK
881 // where the STL is not available.
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100882 const uint8_t* prefix;
883 uint16_t prefix_len;
Narayan Kamath7462f022013-11-21 13:05:04 +0000884 ZipArchive* archive;
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100885
886 IterationHandle() : prefix(NULL), prefix_len(0) {}
887
888 IterationHandle(const ZipEntryName& prefix_name)
889 : prefix_len(prefix_name.name_length) {
890 uint8_t* prefix_copy = new uint8_t[prefix_len];
Piotr Jastrzebski10aa9a02014-08-19 09:01:20 +0100891 memcpy(prefix_copy, prefix_name.name, prefix_len);
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100892 prefix = prefix_copy;
893 }
894
895 ~IterationHandle() {
Piotr Jastrzebski10aa9a02014-08-19 09:01:20 +0100896 delete[] prefix;
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100897 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000898};
899
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100900int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr,
901 const ZipEntryName* optional_prefix) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800902 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +0000903
904 if (archive == NULL || archive->hash_table == NULL) {
905 ALOGW("Zip: Invalid ZipArchiveHandle");
906 return kInvalidHandle;
907 }
908
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100909 IterationHandle* cookie =
910 optional_prefix != NULL ? new IterationHandle(*optional_prefix) : new IterationHandle();
Narayan Kamath7462f022013-11-21 13:05:04 +0000911 cookie->position = 0;
Narayan Kamath7462f022013-11-21 13:05:04 +0000912 cookie->archive = archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000913
914 *cookie_ptr = cookie ;
915 return 0;
916}
917
Piotr Jastrzebski79c8b342014-08-08 14:02:17 +0100918void EndIteration(void* cookie) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100919 delete reinterpret_cast<IterationHandle*>(cookie);
Piotr Jastrzebski79c8b342014-08-08 14:02:17 +0100920}
921
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100922int32_t FindEntry(const ZipArchiveHandle handle, const ZipEntryName& entryName,
Narayan Kamath7462f022013-11-21 13:05:04 +0000923 ZipEntry* data) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800924 const ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100925 if (entryName.name_length == 0) {
926 ALOGW("Zip: Invalid filename %.*s", entryName.name_length, entryName.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000927 return kInvalidEntryName;
928 }
929
930 const int64_t ent = EntryToIndex(archive->hash_table,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100931 archive->hash_table_size, entryName);
Narayan Kamath7462f022013-11-21 13:05:04 +0000932
933 if (ent < 0) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100934 ALOGV("Zip: Could not find entry %.*s", entryName.name_length, entryName.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000935 return ent;
936 }
937
938 return FindEntry(archive, ent, data);
939}
940
941int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800942 IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
Narayan Kamath7462f022013-11-21 13:05:04 +0000943 if (handle == NULL) {
944 return kInvalidHandle;
945 }
946
947 ZipArchive* archive = handle->archive;
948 if (archive == NULL || archive->hash_table == NULL) {
949 ALOGW("Zip: Invalid ZipArchiveHandle");
950 return kInvalidHandle;
951 }
952
953 const uint32_t currentOffset = handle->position;
954 const uint32_t hash_table_length = archive->hash_table_size;
955 const ZipEntryName *hash_table = archive->hash_table;
956
957 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
958 if (hash_table[i].name != NULL &&
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100959 (handle->prefix_len == 0 ||
960 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000961 handle->position = (i + 1);
962 const int error = FindEntry(archive, i, data);
963 if (!error) {
964 name->name = hash_table[i].name;
965 name->name_length = hash_table[i].name_length;
966 }
967
968 return error;
969 }
970 }
971
972 handle->position = 0;
973 return kIterationEnd;
974}
975
Dmitriy Ivanovf94e1592015-03-06 13:27:59 -0800976// This method is using libz macros with old-style-casts
977#pragma GCC diagnostic push
978#pragma GCC diagnostic ignored "-Wold-style-cast"
979static inline int zlib_inflateInit2(z_stream* stream, int window_bits) {
980 return inflateInit2(stream, window_bits);
981}
982#pragma GCC diagnostic pop
983
Narayan Kamath7462f022013-11-21 13:05:04 +0000984static int32_t InflateToFile(int fd, const ZipEntry* entry,
985 uint8_t* begin, uint32_t length,
986 uint64_t* crc_out) {
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -0700987 const size_t kBufSize = 32768;
988 std::vector<uint8_t> read_buf(kBufSize);
989 std::vector<uint8_t> write_buf(kBufSize);
Narayan Kamath7462f022013-11-21 13:05:04 +0000990 z_stream zstream;
991 int zerr;
992
993 /*
994 * Initialize the zlib stream struct.
995 */
996 memset(&zstream, 0, sizeof(zstream));
997 zstream.zalloc = Z_NULL;
998 zstream.zfree = Z_NULL;
999 zstream.opaque = Z_NULL;
1000 zstream.next_in = NULL;
1001 zstream.avail_in = 0;
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001002 zstream.next_out = &write_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001003 zstream.avail_out = kBufSize;
1004 zstream.data_type = Z_UNKNOWN;
1005
1006 /*
1007 * Use the undocumented "negative window bits" feature to tell zlib
1008 * that there's no zlib header waiting for it.
1009 */
Dmitriy Ivanovf94e1592015-03-06 13:27:59 -08001010 zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
Narayan Kamath7462f022013-11-21 13:05:04 +00001011 if (zerr != Z_OK) {
1012 if (zerr == Z_VERSION_ERROR) {
1013 ALOGE("Installed zlib is not compatible with linked version (%s)",
1014 ZLIB_VERSION);
1015 } else {
1016 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
1017 }
1018
1019 return kZlibError;
1020 }
1021
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001022 auto zstream_deleter = [](z_stream* stream) {
1023 inflateEnd(stream); /* free up any allocated structures */
1024 };
1025
1026 std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter);
1027
Narayan Kamath7462f022013-11-21 13:05:04 +00001028 const uint32_t uncompressed_length = entry->uncompressed_length;
1029
1030 uint32_t compressed_length = entry->compressed_length;
1031 uint32_t write_count = 0;
1032 do {
1033 /* read as much as we can */
1034 if (zstream.avail_in == 0) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -07001035 const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001036 const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, &read_buf[0], getSize));
Narayan Kamath7462f022013-11-21 13:05:04 +00001037 if (actual != getSize) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -07001038 ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001039 return kIoError;
Narayan Kamath7462f022013-11-21 13:05:04 +00001040 }
1041
1042 compressed_length -= getSize;
1043
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001044 zstream.next_in = &read_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001045 zstream.avail_in = getSize;
1046 }
1047
1048 /* uncompress the data */
1049 zerr = inflate(&zstream, Z_NO_FLUSH);
1050 if (zerr != Z_OK && zerr != Z_STREAM_END) {
1051 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
1052 zerr, zstream.next_in, zstream.avail_in,
1053 zstream.next_out, zstream.avail_out);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001054 return kZlibError;
Narayan Kamath7462f022013-11-21 13:05:04 +00001055 }
1056
1057 /* write when we're full or when we're done */
1058 if (zstream.avail_out == 0 ||
1059 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001060 const size_t write_size = zstream.next_out - &write_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001061 // The file might have declared a bogus length.
1062 if (write_size + write_count > length) {
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001063 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +00001064 }
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001065 memcpy(begin + write_count, &write_buf[0], write_size);
Narayan Kamath7462f022013-11-21 13:05:04 +00001066 write_count += write_size;
1067
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001068 zstream.next_out = &write_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001069 zstream.avail_out = kBufSize;
1070 }
1071 } while (zerr == Z_OK);
1072
1073 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
1074
1075 // stream.adler holds the crc32 value for such streams.
1076 *crc_out = zstream.adler;
1077
1078 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
Mark Salyzyn088bf902014-05-08 16:02:20 -07001079 ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +00001080 zstream.total_out, uncompressed_length);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001081 return kInconsistentInformation;
Narayan Kamath7462f022013-11-21 13:05:04 +00001082 }
1083
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001084 return 0;
Narayan Kamath7462f022013-11-21 13:05:04 +00001085}
1086
1087int32_t ExtractToMemory(ZipArchiveHandle handle,
1088 ZipEntry* entry, uint8_t* begin, uint32_t size) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001089 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +00001090 const uint16_t method = entry->method;
1091 off64_t data_offset = entry->offset;
1092
1093 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001094 ALOGW("Zip: lseek to data at %" PRId64 " failed", static_cast<int64_t>(data_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +00001095 return kIoError;
1096 }
1097
1098 // this should default to kUnknownCompressionMethod.
1099 int32_t return_value = -1;
1100 uint64_t crc = 0;
1101 if (method == kCompressStored) {
1102 return_value = CopyFileToFile(archive->fd, begin, size, &crc);
1103 } else if (method == kCompressDeflated) {
1104 return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
1105 }
1106
1107 if (!return_value && entry->has_data_descriptor) {
1108 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1109 if (return_value) {
1110 return return_value;
1111 }
1112 }
1113
1114 // TODO: Fix this check by passing the right flags to inflate2 so that
1115 // it calculates the CRC for us.
1116 if (entry->crc32 != crc && false) {
Mark Salyzyn088bf902014-05-08 16:02:20 -07001117 ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001118 return kInconsistentInformation;
1119 }
1120
1121 return return_value;
1122}
1123
1124int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1125 ZipEntry* entry, int fd) {
1126 const int32_t declared_length = entry->uncompressed_length;
1127
Narayan Kamath00a258c2013-12-13 16:06:19 +00001128 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1129 if (current_offset == -1) {
1130 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1131 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +00001132 return kIoError;
1133 }
1134
Narayan Kamath00a258c2013-12-13 16:06:19 +00001135 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1136 if (result == -1) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -07001137 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001138 static_cast<int64_t>(declared_length + current_offset), strerror(errno));
Narayan Kamath00a258c2013-12-13 16:06:19 +00001139 return kIoError;
1140 }
1141
Narayan Kamath48953a12014-01-24 12:32:39 +00001142 // Don't attempt to map a region of length 0. We still need the
1143 // ftruncate() though, since the API guarantees that we will truncate
1144 // the file to the end of the uncompressed output.
1145 if (declared_length == 0) {
1146 return 0;
1147 }
1148
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -08001149 android::FileMap map;
1150 if (!map.create(kTempMappingFileName, fd, current_offset, declared_length, false)) {
Narayan Kamatheaf98852013-12-11 14:51:51 +00001151 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +00001152 }
1153
Narayan Kamatheaf98852013-12-11 14:51:51 +00001154 const int32_t error = ExtractToMemory(handle, entry,
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -08001155 reinterpret_cast<uint8_t*>(map.getDataPtr()),
1156 map.getDataLength());
Narayan Kamath7462f022013-11-21 13:05:04 +00001157 return error;
1158}
1159
1160const char* ErrorCodeString(int32_t error_code) {
1161 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1162 return kErrorMessages[error_code * -1];
1163 }
1164
1165 return kErrorMessages[0];
1166}
1167
1168int GetFileDescriptor(const ZipArchiveHandle handle) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001169 return reinterpret_cast<ZipArchive*>(handle)->fd;
Narayan Kamath7462f022013-11-21 13:05:04 +00001170}
1171