blob: 3716343aeeb3c4052236a45747a473e3887a8dfb [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
Narayan Kamath7462f022013-11-21 13:05:04 +000020
21#include <assert.h>
22#include <errno.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070023#include <fcntl.h>
24#include <inttypes.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000025#include <limits.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000026#include <stdlib.h>
27#include <string.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000028#include <unistd.h>
29
Dan Albert1ae07642015-04-09 14:11:18 -070030#include <memory>
31#include <vector>
32
Narayan Kamathe97e66e2015-04-27 16:25:53 +010033#include "base/file.h"
Dan Albert1ae07642015-04-09 14:11:18 -070034#include "base/macros.h" // TEMP_FAILURE_RETRY may or may not be in unistd
35#include "base/memory.h"
36#include "log/log.h"
37#include "utils/Compat.h"
38#include "utils/FileMap.h"
39#include "zlib.h"
Narayan Kamath7462f022013-11-21 13:05:04 +000040
Narayan Kamath044bc8e2014-12-03 18:22:53 +000041#include "entry_name_utils-inl.h"
Mark Salyzyn99ef9912014-03-14 14:26:22 -070042#include "ziparchive/zip_archive.h"
43
Dan Albert1ae07642015-04-09 14:11:18 -070044using android::base::get_unaligned;
Narayan Kamath044bc8e2014-12-03 18:22:53 +000045
Narayan Kamath926973e2014-06-09 14:18:14 +010046// This is for windows. If we don't open a file in binary mode, weird
Narayan Kamath7462f022013-11-21 13:05:04 +000047// things will happen.
48#ifndef O_BINARY
49#define O_BINARY 0
50#endif
51
Narayan Kamath926973e2014-06-09 14:18:14 +010052// The "end of central directory" (EOCD) record. Each archive
53// contains exactly once such record which appears at the end of
54// the archive. It contains archive wide information like the
55// number of entries in the archive and the offset to the central
56// directory of the offset.
57struct EocdRecord {
58 static const uint32_t kSignature = 0x06054b50;
Narayan Kamath7462f022013-11-21 13:05:04 +000059
Narayan Kamath926973e2014-06-09 14:18:14 +010060 // End of central directory signature, should always be
61 // |kSignature|.
62 uint32_t eocd_signature;
63 // The number of the current "disk", i.e, the "disk" that this
64 // central directory is on.
65 //
66 // This implementation assumes that each archive spans a single
67 // disk only. i.e, that disk_num == 1.
68 uint16_t disk_num;
69 // The disk where the central directory starts.
70 //
71 // This implementation assumes that each archive spans a single
72 // disk only. i.e, that cd_start_disk == 1.
73 uint16_t cd_start_disk;
74 // The number of central directory records on this disk.
75 //
76 // This implementation assumes that each archive spans a single
77 // disk only. i.e, that num_records_on_disk == num_records.
78 uint16_t num_records_on_disk;
79 // The total number of central directory records.
80 uint16_t num_records;
81 // The size of the central directory (in bytes).
82 uint32_t cd_size;
83 // The offset of the start of the central directory, relative
84 // to the start of the file.
85 uint32_t cd_start_offset;
86 // Length of the central directory comment.
87 uint16_t comment_length;
88 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +010089 EocdRecord() = default;
90 DISALLOW_COPY_AND_ASSIGN(EocdRecord);
Narayan Kamath926973e2014-06-09 14:18:14 +010091} __attribute__((packed));
Narayan Kamath7462f022013-11-21 13:05:04 +000092
Narayan Kamath926973e2014-06-09 14:18:14 +010093// A structure representing the fixed length fields for a single
94// record in the central directory of the archive. In addition to
95// the fixed length fields listed here, each central directory
96// record contains a variable length "file_name" and "extra_field"
97// whose lengths are given by |file_name_length| and |extra_field_length|
98// respectively.
99struct CentralDirectoryRecord {
100 static const uint32_t kSignature = 0x02014b50;
Narayan Kamath7462f022013-11-21 13:05:04 +0000101
Narayan Kamath926973e2014-06-09 14:18:14 +0100102 // The start of record signature. Must be |kSignature|.
103 uint32_t record_signature;
104 // Tool version. Ignored by this implementation.
105 uint16_t version_made_by;
106 // Tool version. Ignored by this implementation.
107 uint16_t version_needed;
108 // The "general purpose bit flags" for this entry. The only
109 // flag value that we currently check for is the "data descriptor"
110 // flag.
111 uint16_t gpb_flags;
112 // The compression method for this entry, one of |kCompressStored|
113 // and |kCompressDeflated|.
114 uint16_t compression_method;
115 // The file modification time and date for this entry.
116 uint16_t last_mod_time;
117 uint16_t last_mod_date;
118 // The CRC-32 checksum for this entry.
119 uint32_t crc32;
120 // The compressed size (in bytes) of this entry.
121 uint32_t compressed_size;
122 // The uncompressed size (in bytes) of this entry.
123 uint32_t uncompressed_size;
124 // The length of the entry file name in bytes. The file name
125 // will appear immediately after this record.
126 uint16_t file_name_length;
127 // The length of the extra field info (in bytes). This data
128 // will appear immediately after the entry file name.
129 uint16_t extra_field_length;
130 // The length of the entry comment (in bytes). This data will
131 // appear immediately after the extra field.
132 uint16_t comment_length;
133 // The start disk for this entry. Ignored by this implementation).
134 uint16_t file_start_disk;
135 // File attributes. Ignored by this implementation.
136 uint16_t internal_file_attributes;
137 // File attributes. Ignored by this implementation.
138 uint32_t external_file_attributes;
139 // The offset to the local file header for this entry, from the
140 // beginning of this archive.
141 uint32_t local_file_header_offset;
142 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +0100143 CentralDirectoryRecord() = default;
144 DISALLOW_COPY_AND_ASSIGN(CentralDirectoryRecord);
Narayan Kamath926973e2014-06-09 14:18:14 +0100145} __attribute__((packed));
Narayan Kamath7462f022013-11-21 13:05:04 +0000146
Narayan Kamath926973e2014-06-09 14:18:14 +0100147// The local file header for a given entry. This duplicates information
148// present in the central directory of the archive. It is an error for
149// the information here to be different from the central directory
150// information for a given entry.
151struct LocalFileHeader {
152 static const uint32_t kSignature = 0x04034b50;
Narayan Kamath7462f022013-11-21 13:05:04 +0000153
Narayan Kamath926973e2014-06-09 14:18:14 +0100154 // The local file header signature, must be |kSignature|.
155 uint32_t lfh_signature;
156 // Tool version. Ignored by this implementation.
157 uint16_t version_needed;
158 // The "general purpose bit flags" for this entry. The only
159 // flag value that we currently check for is the "data descriptor"
160 // flag.
161 uint16_t gpb_flags;
162 // The compression method for this entry, one of |kCompressStored|
163 // and |kCompressDeflated|.
164 uint16_t compression_method;
165 // The file modification time and date for this entry.
166 uint16_t last_mod_time;
167 uint16_t last_mod_date;
168 // The CRC-32 checksum for this entry.
169 uint32_t crc32;
170 // The compressed size (in bytes) of this entry.
171 uint32_t compressed_size;
172 // The uncompressed size (in bytes) of this entry.
173 uint32_t uncompressed_size;
174 // The length of the entry file name in bytes. The file name
175 // will appear immediately after this record.
176 uint16_t file_name_length;
177 // The length of the extra field info (in bytes). This data
178 // will appear immediately after the entry file name.
179 uint16_t extra_field_length;
180 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +0100181 LocalFileHeader() = default;
182 DISALLOW_COPY_AND_ASSIGN(LocalFileHeader);
Narayan Kamath926973e2014-06-09 14:18:14 +0100183} __attribute__((packed));
184
185struct DataDescriptor {
186 // The *optional* data descriptor start signature.
187 static const uint32_t kOptSignature = 0x08074b50;
188
189 // CRC-32 checksum of the entry.
190 uint32_t crc32;
191 // Compressed size of the entry.
192 uint32_t compressed_size;
193 // Uncompressed size of the entry.
194 uint32_t uncompressed_size;
195 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +0100196 DataDescriptor() = default;
197 DISALLOW_COPY_AND_ASSIGN(DataDescriptor);
Narayan Kamath926973e2014-06-09 14:18:14 +0100198} __attribute__((packed));
199
Narayan Kamath926973e2014-06-09 14:18:14 +0100200
Piotr Jastrzebskibd0a7482014-08-13 09:49:25 +0000201static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
Narayan Kamath7462f022013-11-21 13:05:04 +0000202
Narayan Kamath926973e2014-06-09 14:18:14 +0100203// The maximum size of a central directory or a file
204// comment in bytes.
205static const uint32_t kMaxCommentLen = 65535;
206
207// The maximum number of bytes to scan backwards for the EOCD start.
208static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
209
Narayan Kamath7462f022013-11-21 13:05:04 +0000210static const char* kErrorMessages[] = {
211 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000212 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +0000213 "Zlib error",
214 "Invalid file",
215 "Invalid handle",
216 "Duplicate entries in archive",
217 "Empty archive",
218 "Entry not found",
219 "Invalid offset",
220 "Inconsistent information",
221 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000222 "I/O Error",
Narayan Kamatheaf98852013-12-11 14:51:51 +0000223 "File mapping failed"
Narayan Kamath7462f022013-11-21 13:05:04 +0000224};
225
226static const int32_t kErrorMessageUpperBound = 0;
227
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000228static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000229
230// We encountered a Zlib error when inflating a stream from this file.
231// Usually indicates file corruption.
232static const int32_t kZlibError = -2;
233
234// The input file cannot be processed as a zip archive. Usually because
235// it's too small, too large or does not have a valid signature.
236static const int32_t kInvalidFile = -3;
237
238// An invalid iteration / ziparchive handle was passed in as an input
239// argument.
240static const int32_t kInvalidHandle = -4;
241
242// The zip archive contained two (or possibly more) entries with the same
243// name.
244static const int32_t kDuplicateEntry = -5;
245
246// The zip archive contains no entries.
247static const int32_t kEmptyArchive = -6;
248
249// The specified entry was not found in the archive.
250static const int32_t kEntryNotFound = -7;
251
252// The zip archive contained an invalid local file header pointer.
253static const int32_t kInvalidOffset = -8;
254
255// The zip archive contained inconsistent entry information. This could
256// be because the central directory & local file header did not agree, or
257// if the actual uncompressed length or crc32 do not match their declared
258// values.
259static const int32_t kInconsistentInformation = -9;
260
261// An invalid entry name was encountered.
262static const int32_t kInvalidEntryName = -10;
263
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000264// An I/O related system call (read, lseek, ftruncate, map) failed.
265static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000266
Narayan Kamatheaf98852013-12-11 14:51:51 +0000267// We were not able to mmap the central directory or entry contents.
268static const int32_t kMmapFailed = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000269
Narayan Kamatheaf98852013-12-11 14:51:51 +0000270static const int32_t kErrorMessageLowerBound = -13;
Narayan Kamath7462f022013-11-21 13:05:04 +0000271
Narayan Kamath7462f022013-11-21 13:05:04 +0000272/*
273 * A Read-only Zip archive.
274 *
275 * We want "open" and "find entry by name" to be fast operations, and
276 * we want to use as little memory as possible. We memory-map the zip
277 * central directory, and load a hash table with pointers to the filenames
278 * (which aren't null-terminated). The other fields are at a fixed offset
279 * from the filename, so we don't need to extract those (but we do need
280 * to byte-read and endian-swap them every time we want them).
281 *
282 * It's possible that somebody has handed us a massive (~1GB) zip archive,
283 * so we can't expect to mmap the entire file.
284 *
285 * To speed comparisons when doing a lookup by name, we could make the mapping
286 * "private" (copy-on-write) and null-terminate the filenames after verifying
287 * the record structure. However, this requires a private mapping of
288 * every page that the Central Directory touches. Easier to tuck a copy
289 * of the string length into the hash table entry.
290 */
291struct ZipArchive {
292 /* open Zip archive */
Neil Fullerb1a113f2014-07-25 14:43:04 +0100293 const int fd;
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700294 const bool close_file;
Narayan Kamath7462f022013-11-21 13:05:04 +0000295
296 /* mapped central directory area */
297 off64_t directory_offset;
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800298 android::FileMap directory_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000299
300 /* number of entries in the Zip archive */
301 uint16_t num_entries;
302
303 /*
304 * We know how many entries are in the Zip archive, so we can have a
305 * fixed-size hash table. We define a load factor of 0.75 and overallocat
306 * so the maximum number entries can never be higher than
307 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
308 */
309 uint32_t hash_table_size;
Yusuke Sato07447542015-06-25 14:39:19 -0700310 ZipString* hash_table;
Neil Fullerb1a113f2014-07-25 14:43:04 +0100311
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700312 ZipArchive(const int fd, bool assume_ownership) :
Neil Fullerb1a113f2014-07-25 14:43:04 +0100313 fd(fd),
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700314 close_file(assume_ownership),
Neil Fullerb1a113f2014-07-25 14:43:04 +0100315 directory_offset(0),
Neil Fullerb1a113f2014-07-25 14:43:04 +0100316 num_entries(0),
317 hash_table_size(0),
318 hash_table(NULL) {}
319
320 ~ZipArchive() {
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700321 if (close_file && fd >= 0) {
Neil Fullerb1a113f2014-07-25 14:43:04 +0100322 close(fd);
323 }
324
Neil Fullerb1a113f2014-07-25 14:43:04 +0100325 free(hash_table);
326 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000327};
328
Narayan Kamath7462f022013-11-21 13:05:04 +0000329/*
330 * Round up to the next highest power of 2.
331 *
332 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
333 */
334static uint32_t RoundUpPower2(uint32_t val) {
335 val--;
336 val |= val >> 1;
337 val |= val >> 2;
338 val |= val >> 4;
339 val |= val >> 8;
340 val |= val >> 16;
341 val++;
342
343 return val;
344}
345
Yusuke Sato07447542015-06-25 14:39:19 -0700346static uint32_t ComputeHash(const ZipString& name) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000347 uint32_t hash = 0;
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100348 uint16_t len = name.name_length;
349 const uint8_t* str = name.name;
Narayan Kamath7462f022013-11-21 13:05:04 +0000350
351 while (len--) {
352 hash = hash * 31 + *str++;
353 }
354
355 return hash;
356}
357
358/*
359 * Convert a ZipEntry to a hash table index, verifying that it's in a
360 * valid range.
361 */
Yusuke Sato07447542015-06-25 14:39:19 -0700362static int64_t EntryToIndex(const ZipString* hash_table,
Narayan Kamath7462f022013-11-21 13:05:04 +0000363 const uint32_t hash_table_size,
Yusuke Sato07447542015-06-25 14:39:19 -0700364 const ZipString& name) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100365 const uint32_t hash = ComputeHash(name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000366
367 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
368 uint32_t ent = hash & (hash_table_size - 1);
369 while (hash_table[ent].name != NULL) {
Yusuke Sato07447542015-06-25 14:39:19 -0700370 if (hash_table[ent] == name) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000371 return ent;
372 }
373
374 ent = (ent + 1) & (hash_table_size - 1);
375 }
376
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100377 ALOGV("Zip: Unable to find entry %.*s", name.name_length, name.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000378 return kEntryNotFound;
379}
380
381/*
382 * Add a new entry to the hash table.
383 */
Yusuke Sato07447542015-06-25 14:39:19 -0700384static int32_t AddToHash(ZipString *hash_table, const uint64_t hash_table_size,
385 const ZipString& name) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100386 const uint64_t hash = ComputeHash(name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000387 uint32_t ent = hash & (hash_table_size - 1);
388
389 /*
390 * We over-allocated the table, so we're guaranteed to find an empty slot.
391 * Further, we guarantee that the hashtable size is not 0.
392 */
393 while (hash_table[ent].name != NULL) {
Yusuke Sato07447542015-06-25 14:39:19 -0700394 if (hash_table[ent] == name) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000395 // We've found a duplicate entry. We don't accept it
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100396 ALOGW("Zip: Found duplicate entry %.*s", name.name_length, name.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000397 return kDuplicateEntry;
398 }
399 ent = (ent + 1) & (hash_table_size - 1);
400 }
401
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100402 hash_table[ent].name = name.name;
403 hash_table[ent].name_length = name.name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000404 return 0;
405}
406
Narayan Kamath7462f022013-11-21 13:05:04 +0000407static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
408 ZipArchive* archive, off64_t file_length,
Narayan Kamath926973e2014-06-09 14:18:14 +0100409 off64_t read_amount, uint8_t* scan_buffer) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000410 const off64_t search_start = file_length - read_amount;
411
412 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100413 ALOGW("Zip: seek %" PRId64 " failed: %s", static_cast<int64_t>(search_start),
414 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000415 return kIoError;
416 }
Narayan Kamath926973e2014-06-09 14:18:14 +0100417 ssize_t actual = TEMP_FAILURE_RETRY(
418 read(fd, scan_buffer, static_cast<size_t>(read_amount)));
419 if (actual != static_cast<ssize_t>(read_amount)) {
420 ALOGW("Zip: read %" PRId64 " failed: %s", static_cast<int64_t>(read_amount),
421 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000422 return kIoError;
423 }
424
425 /*
426 * Scan backward for the EOCD magic. In an archive without a trailing
427 * comment, we'll find it on the first try. (We may want to consider
428 * doing an initial minimal read; if we don't find it, retry with a
429 * second read as above.)
430 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100431 int i = read_amount - sizeof(EocdRecord);
432 for (; i >= 0; i--) {
Dan Albert1ae07642015-04-09 14:11:18 -0700433 if (scan_buffer[i] == 0x50) {
434 uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
435 if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
436 ALOGV("+++ Found EOCD at buf+%d", i);
437 break;
438 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000439 }
440 }
441 if (i < 0) {
442 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
443 return kInvalidFile;
444 }
445
446 const off64_t eocd_offset = search_start + i;
Narayan Kamath926973e2014-06-09 14:18:14 +0100447 const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000448 /*
Narayan Kamath926973e2014-06-09 14:18:14 +0100449 * Verify that there's no trailing space at the end of the central directory
450 * and its comment.
Narayan Kamath7462f022013-11-21 13:05:04 +0000451 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100452 const off64_t calculated_length = eocd_offset + sizeof(EocdRecord)
453 + eocd->comment_length;
454 if (calculated_length != file_length) {
Narayan Kamath4f6b4992014-06-03 13:59:23 +0100455 ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
Narayan Kamath926973e2014-06-09 14:18:14 +0100456 static_cast<int64_t>(file_length - calculated_length));
Narayan Kamath4f6b4992014-06-03 13:59:23 +0100457 return kInvalidFile;
458 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000459
Narayan Kamath926973e2014-06-09 14:18:14 +0100460 /*
461 * Grab the CD offset and size, and the number of entries in the
462 * archive and verify that they look reasonable.
463 */
464 if (eocd->cd_start_offset + eocd->cd_size > eocd_offset) {
465 ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
466 eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000467 return kInvalidOffset;
468 }
Narayan Kamath926973e2014-06-09 14:18:14 +0100469 if (eocd->num_records == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000470 ALOGW("Zip: empty archive?");
471 return kEmptyArchive;
472 }
473
Elliott Hughese49236b2015-06-04 15:21:59 -0700474 ALOGV("+++ num_entries=%" PRIu32 " dir_size=%" PRIu32 " dir_offset=%" PRIu32,
Narayan Kamath926973e2014-06-09 14:18:14 +0100475 eocd->num_records, eocd->cd_size, eocd->cd_start_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000476
477 /*
478 * It all looks good. Create a mapping for the CD, and set the fields
479 * in archive.
480 */
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800481 if (!archive->directory_map.create(debug_file_name, fd,
482 static_cast<off64_t>(eocd->cd_start_offset),
483 static_cast<size_t>(eocd->cd_size), true /* read only */) ) {
Narayan Kamatheaf98852013-12-11 14:51:51 +0000484 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +0000485 }
486
Narayan Kamath926973e2014-06-09 14:18:14 +0100487 archive->num_entries = eocd->num_records;
488 archive->directory_offset = eocd->cd_start_offset;
Narayan Kamath7462f022013-11-21 13:05:04 +0000489
490 return 0;
491}
492
493/*
494 * Find the zip Central Directory and memory-map it.
495 *
496 * On success, returns 0 after populating fields from the EOCD area:
497 * directory_offset
498 * directory_map
499 * num_entries
500 */
501static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
502 ZipArchive* archive) {
503
504 // Test file length. We use lseek64 to make sure the file
505 // is small enough to be a zip file (Its size must be less than
506 // 0xffffffff bytes).
507 off64_t file_length = lseek64(fd, 0, SEEK_END);
508 if (file_length == -1) {
509 ALOGV("Zip: lseek on fd %d failed", fd);
510 return kInvalidFile;
511 }
512
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800513 if (file_length > static_cast<off64_t>(0xffffffff)) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100514 ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
Narayan Kamath7462f022013-11-21 13:05:04 +0000515 return kInvalidFile;
516 }
517
Narayan Kamath926973e2014-06-09 14:18:14 +0100518 if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
519 ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
Narayan Kamath7462f022013-11-21 13:05:04 +0000520 return kInvalidFile;
521 }
522
523 /*
524 * Perform the traditional EOCD snipe hunt.
525 *
526 * We're searching for the End of Central Directory magic number,
527 * which appears at the start of the EOCD block. It's followed by
528 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
529 * need to read the last part of the file into a buffer, dig through
530 * it to find the magic number, parse some values out, and use those
531 * to determine the extent of the CD.
532 *
533 * We start by pulling in the last part of the file.
534 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100535 off64_t read_amount = kMaxEOCDSearch;
536 if (file_length < read_amount) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000537 read_amount = file_length;
538 }
539
Narayan Kamath926973e2014-06-09 14:18:14 +0100540 uint8_t* scan_buffer = reinterpret_cast<uint8_t*>(malloc(read_amount));
Narayan Kamath7462f022013-11-21 13:05:04 +0000541 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
542 file_length, read_amount, scan_buffer);
543
544 free(scan_buffer);
545 return result;
546}
547
548/*
549 * Parses the Zip archive's Central Directory. Allocates and populates the
550 * hash table.
551 *
552 * Returns 0 on success.
553 */
554static int32_t ParseZipArchive(ZipArchive* archive) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800555 const uint8_t* const cd_ptr =
556 reinterpret_cast<const uint8_t*>(archive->directory_map.getDataPtr());
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800557 const size_t cd_length = archive->directory_map.getDataLength();
Narayan Kamath926973e2014-06-09 14:18:14 +0100558 const uint16_t num_entries = archive->num_entries;
Narayan Kamath7462f022013-11-21 13:05:04 +0000559
560 /*
561 * Create hash table. We have a minimum 75% load factor, possibly as
562 * low as 50% after we round off to a power of 2. There must be at
563 * least one unused entry to avoid an infinite loop during creation.
564 */
565 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
Yusuke Sato07447542015-06-25 14:39:19 -0700566 archive->hash_table = reinterpret_cast<ZipString*>(calloc(archive->hash_table_size,
567 sizeof(ZipString)));
Narayan Kamath7462f022013-11-21 13:05:04 +0000568
569 /*
570 * Walk through the central directory, adding entries to the hash
571 * table and verifying values.
572 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100573 const uint8_t* const cd_end = cd_ptr + cd_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000574 const uint8_t* ptr = cd_ptr;
575 for (uint16_t i = 0; i < num_entries; i++) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100576 const CentralDirectoryRecord* cdr =
577 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
578 if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700579 ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800580 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000581 }
582
Narayan Kamath926973e2014-06-09 14:18:14 +0100583 if (ptr + sizeof(CentralDirectoryRecord) > cd_end) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700584 ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800585 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000586 }
587
Narayan Kamath926973e2014-06-09 14:18:14 +0100588 const off64_t local_header_offset = cdr->local_file_header_offset;
Narayan Kamath7462f022013-11-21 13:05:04 +0000589 if (local_header_offset >= archive->directory_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800590 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16,
591 static_cast<int64_t>(local_header_offset), i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800592 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000593 }
594
Narayan Kamath926973e2014-06-09 14:18:14 +0100595 const uint16_t file_name_length = cdr->file_name_length;
596 const uint16_t extra_length = cdr->extra_field_length;
597 const uint16_t comment_length = cdr->comment_length;
Piotr Jastrzebski78271ba2014-08-15 12:53:00 +0100598 const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
599
Narayan Kamath044bc8e2014-12-03 18:22:53 +0000600 /* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */
601 if (!IsValidEntryName(file_name, file_name_length)) {
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800602 return -1;
Piotr Jastrzebski78271ba2014-08-15 12:53:00 +0100603 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000604
605 /* add the CDE filename to the hash table */
Yusuke Sato07447542015-06-25 14:39:19 -0700606 ZipString entry_name;
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100607 entry_name.name = file_name;
608 entry_name.name_length = file_name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000609 const int add_result = AddToHash(archive->hash_table,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100610 archive->hash_table_size, entry_name);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800611 if (add_result != 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000612 ALOGW("Zip: Error adding entry to hash table %d", add_result);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800613 return add_result;
Narayan Kamath7462f022013-11-21 13:05:04 +0000614 }
615
Narayan Kamath926973e2014-06-09 14:18:14 +0100616 ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
617 if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700618 ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
619 ptr - cd_ptr, cd_length, i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800620 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000621 }
622 }
Mark Salyzyn088bf902014-05-08 16:02:20 -0700623 ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
Narayan Kamath7462f022013-11-21 13:05:04 +0000624
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800625 return 0;
Narayan Kamath7462f022013-11-21 13:05:04 +0000626}
627
628static int32_t OpenArchiveInternal(ZipArchive* archive,
629 const char* debug_file_name) {
630 int32_t result = -1;
631 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
632 return result;
633 }
634
635 if ((result = ParseZipArchive(archive))) {
636 return result;
637 }
638
639 return 0;
640}
641
642int32_t OpenArchiveFd(int fd, const char* debug_file_name,
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700643 ZipArchiveHandle* handle, bool assume_ownership) {
644 ZipArchive* archive = new ZipArchive(fd, assume_ownership);
Narayan Kamath7462f022013-11-21 13:05:04 +0000645 *handle = archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000646 return OpenArchiveInternal(archive, debug_file_name);
647}
648
649int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
Neil Fullerb1a113f2014-07-25 14:43:04 +0100650 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700651 ZipArchive* archive = new ZipArchive(fd, true);
Narayan Kamath7462f022013-11-21 13:05:04 +0000652 *handle = archive;
653
Narayan Kamath7462f022013-11-21 13:05:04 +0000654 if (fd < 0) {
655 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
656 return kIoError;
Narayan Kamath7462f022013-11-21 13:05:04 +0000657 }
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700658
Narayan Kamath7462f022013-11-21 13:05:04 +0000659 return OpenArchiveInternal(archive, fileName);
660}
661
662/*
663 * Close a ZipArchive, closing the file and freeing the contents.
664 */
665void CloseArchive(ZipArchiveHandle handle) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800666 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +0000667 ALOGV("Closing archive %p", archive);
Neil Fullerb1a113f2014-07-25 14:43:04 +0100668 delete archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000669}
670
671static int32_t UpdateEntryFromDataDescriptor(int fd,
672 ZipEntry *entry) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100673 uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
Narayan Kamath7462f022013-11-21 13:05:04 +0000674 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
675 if (actual != sizeof(ddBuf)) {
676 return kIoError;
677 }
678
Narayan Kamath926973e2014-06-09 14:18:14 +0100679 const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
680 const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
681 const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000682
Narayan Kamath926973e2014-06-09 14:18:14 +0100683 entry->crc32 = descriptor->crc32;
684 entry->compressed_length = descriptor->compressed_size;
685 entry->uncompressed_length = descriptor->uncompressed_size;
Narayan Kamath7462f022013-11-21 13:05:04 +0000686
687 return 0;
688}
689
690// Attempts to read |len| bytes into |buf| at offset |off|.
691//
692// This method uses pread64 on platforms that support it and
693// lseek64 + read on platforms that don't. This implies that
694// callers should not rely on the |fd| offset being incremented
695// as a side effect of this call.
696static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
697 off64_t off) {
Yabin Cui70160f42014-11-19 20:47:18 -0800698#if !defined(_WIN32)
Narayan Kamath7462f022013-11-21 13:05:04 +0000699 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
700#else
701 // The only supported platform that doesn't support pread at the moment
702 // is Windows. Only recent versions of windows support unix like forks,
703 // and even there the semantics are quite different.
704 if (lseek64(fd, off, SEEK_SET) != off) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700705 ALOGW("Zip: failed seek to offset %" PRId64, off);
Narayan Kamath7462f022013-11-21 13:05:04 +0000706 return kIoError;
707 }
708
709 return TEMP_FAILURE_RETRY(read(fd, buf, len));
Yabin Cui70160f42014-11-19 20:47:18 -0800710#endif
Narayan Kamath7462f022013-11-21 13:05:04 +0000711}
712
713static int32_t FindEntry(const ZipArchive* archive, const int ent,
714 ZipEntry* data) {
715 const uint16_t nameLen = archive->hash_table[ent].name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000716
717 // Recover the start of the central directory entry from the filename
718 // pointer. The filename is the first entry past the fixed-size data,
719 // so we can just subtract back from that.
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100720 const uint8_t* ptr = archive->hash_table[ent].name;
Narayan Kamath926973e2014-06-09 14:18:14 +0100721 ptr -= sizeof(CentralDirectoryRecord);
Narayan Kamath7462f022013-11-21 13:05:04 +0000722
723 // This is the base of our mmapped region, we have to sanity check that
724 // the name that's in the hash table is a pointer to a location within
725 // this mapped region.
Narayan Kamath926973e2014-06-09 14:18:14 +0100726 const uint8_t* base_ptr = reinterpret_cast<const uint8_t*>(
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800727 archive->directory_map.getDataPtr());
728 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map.getDataLength()) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000729 ALOGW("Zip: Invalid entry pointer");
730 return kInvalidOffset;
731 }
732
Narayan Kamath926973e2014-06-09 14:18:14 +0100733 const CentralDirectoryRecord *cdr =
734 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
735
Narayan Kamath7462f022013-11-21 13:05:04 +0000736 // The offset of the start of the central directory in the zipfile.
737 // We keep this lying around so that we can sanity check all our lengths
738 // and our per-file structures.
739 const off64_t cd_offset = archive->directory_offset;
740
741 // Fill out the compression method, modification time, crc32
742 // and other interesting attributes from the central directory. These
743 // will later be compared against values from the local file header.
Narayan Kamath926973e2014-06-09 14:18:14 +0100744 data->method = cdr->compression_method;
745 data->mod_time = cdr->last_mod_time;
746 data->crc32 = cdr->crc32;
747 data->compressed_length = cdr->compressed_size;
748 data->uncompressed_length = cdr->uncompressed_size;
Narayan Kamath7462f022013-11-21 13:05:04 +0000749
750 // Figure out the local header offset from the central directory. The
751 // actual file data will begin after the local header and the name /
752 // extra comments.
Narayan Kamath926973e2014-06-09 14:18:14 +0100753 const off64_t local_header_offset = cdr->local_file_header_offset;
754 if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000755 ALOGW("Zip: bad local hdr offset in zip");
756 return kInvalidOffset;
757 }
758
Narayan Kamath926973e2014-06-09 14:18:14 +0100759 uint8_t lfh_buf[sizeof(LocalFileHeader)];
Narayan Kamath7462f022013-11-21 13:05:04 +0000760 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
761 local_header_offset);
762 if (actual != sizeof(lfh_buf)) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800763 ALOGW("Zip: failed reading lfh name from offset %" PRId64,
764 static_cast<int64_t>(local_header_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000765 return kIoError;
766 }
767
Narayan Kamath926973e2014-06-09 14:18:14 +0100768 const LocalFileHeader *lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
769
770 if (lfh->lfh_signature != LocalFileHeader::kSignature) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700771 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
Narayan Kamath926973e2014-06-09 14:18:14 +0100772 static_cast<int64_t>(local_header_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000773 return kInvalidOffset;
774 }
775
776 // Paranoia: Match the values specified in the local file header
777 // to those specified in the central directory.
Narayan Kamath926973e2014-06-09 14:18:14 +0100778 if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000779 data->has_data_descriptor = 0;
Narayan Kamath926973e2014-06-09 14:18:14 +0100780 if (data->compressed_length != lfh->compressed_size
781 || data->uncompressed_length != lfh->uncompressed_size
782 || data->crc32 != lfh->crc32) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700783 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
784 ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
Narayan Kamath7462f022013-11-21 13:05:04 +0000785 data->compressed_length, data->uncompressed_length, data->crc32,
Narayan Kamath926973e2014-06-09 14:18:14 +0100786 lfh->compressed_size, lfh->uncompressed_size, lfh->crc32);
Narayan Kamath7462f022013-11-21 13:05:04 +0000787 return kInconsistentInformation;
788 }
789 } else {
790 data->has_data_descriptor = 1;
791 }
792
793 // Check that the local file header name matches the declared
794 // name in the central directory.
Narayan Kamath926973e2014-06-09 14:18:14 +0100795 if (lfh->file_name_length == nameLen) {
796 const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
Mykola Kondratenko50afc152014-09-08 12:46:37 +0200797 if (name_offset + lfh->file_name_length > cd_offset) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000798 ALOGW("Zip: Invalid declared length");
799 return kInvalidOffset;
800 }
801
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800802 uint8_t* name_buf = reinterpret_cast<uint8_t*>(malloc(nameLen));
Narayan Kamath7462f022013-11-21 13:05:04 +0000803 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
804 name_offset);
805
806 if (actual != nameLen) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800807 ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000808 free(name_buf);
809 return kIoError;
810 }
811
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100812 if (memcmp(archive->hash_table[ent].name, name_buf, nameLen)) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000813 free(name_buf);
814 return kInconsistentInformation;
815 }
816
817 free(name_buf);
818 } else {
819 ALOGW("Zip: lfh name did not match central directory.");
820 return kInconsistentInformation;
821 }
822
Narayan Kamath926973e2014-06-09 14:18:14 +0100823 const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader)
824 + lfh->file_name_length + lfh->extra_field_length;
Narayan Kamath48953a12014-01-24 12:32:39 +0000825 if (data_offset > cd_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800826 ALOGW("Zip: bad data offset %" PRId64 " in zip", static_cast<int64_t>(data_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000827 return kInvalidOffset;
828 }
829
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800830 if (static_cast<off64_t>(data_offset + data->compressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700831 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800832 static_cast<int64_t>(data_offset), data->compressed_length, static_cast<int64_t>(cd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000833 return kInvalidOffset;
834 }
835
836 if (data->method == kCompressStored &&
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800837 static_cast<off64_t>(data_offset + data->uncompressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700838 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800839 static_cast<int64_t>(data_offset), data->uncompressed_length,
840 static_cast<int64_t>(cd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000841 return kInvalidOffset;
842 }
843
844 data->offset = data_offset;
845 return 0;
846}
847
848struct IterationHandle {
849 uint32_t position;
Piotr Jastrzebski10aa9a02014-08-19 09:01:20 +0100850 // We're not using vector here because this code is used in the Windows SDK
851 // where the STL is not available.
Yusuke Sato07447542015-06-25 14:39:19 -0700852 ZipString prefix;
853 ZipString suffix;
Narayan Kamath7462f022013-11-21 13:05:04 +0000854 ZipArchive* archive;
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100855
Yusuke Sato07447542015-06-25 14:39:19 -0700856 IterationHandle(const ZipString* in_prefix,
857 const ZipString* in_suffix) {
858 if (in_prefix) {
859 uint8_t* name_copy = new uint8_t[in_prefix->name_length];
860 memcpy(name_copy, in_prefix->name, in_prefix->name_length);
861 prefix.name = name_copy;
862 prefix.name_length = in_prefix->name_length;
863 } else {
864 prefix.name = NULL;
865 prefix.name_length = 0;
Yusuke Satof1d3d3b2015-06-25 14:09:00 -0700866 }
Yusuke Sato07447542015-06-25 14:39:19 -0700867 if (in_suffix) {
868 uint8_t* name_copy = new uint8_t[in_suffix->name_length];
869 memcpy(name_copy, in_suffix->name, in_suffix->name_length);
870 suffix.name = name_copy;
871 suffix.name_length = in_suffix->name_length;
872 } else {
873 suffix.name = NULL;
874 suffix.name_length = 0;
Yusuke Satof1d3d3b2015-06-25 14:09:00 -0700875 }
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100876 }
877
878 ~IterationHandle() {
Yusuke Sato07447542015-06-25 14:39:19 -0700879 delete[] prefix.name;
880 delete[] suffix.name;
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100881 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000882};
883
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100884int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr,
Yusuke Sato07447542015-06-25 14:39:19 -0700885 const ZipString* optional_prefix,
886 const ZipString* optional_suffix) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800887 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +0000888
889 if (archive == NULL || archive->hash_table == NULL) {
890 ALOGW("Zip: Invalid ZipArchiveHandle");
891 return kInvalidHandle;
892 }
893
Yusuke Satof1d3d3b2015-06-25 14:09:00 -0700894 IterationHandle* cookie = new IterationHandle(optional_prefix, optional_suffix);
Narayan Kamath7462f022013-11-21 13:05:04 +0000895 cookie->position = 0;
Narayan Kamath7462f022013-11-21 13:05:04 +0000896 cookie->archive = archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000897
898 *cookie_ptr = cookie ;
899 return 0;
900}
901
Piotr Jastrzebski79c8b342014-08-08 14:02:17 +0100902void EndIteration(void* cookie) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100903 delete reinterpret_cast<IterationHandle*>(cookie);
Piotr Jastrzebski79c8b342014-08-08 14:02:17 +0100904}
905
Yusuke Sato07447542015-06-25 14:39:19 -0700906int32_t FindEntry(const ZipArchiveHandle handle, const ZipString& entryName,
Narayan Kamath7462f022013-11-21 13:05:04 +0000907 ZipEntry* data) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800908 const ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100909 if (entryName.name_length == 0) {
910 ALOGW("Zip: Invalid filename %.*s", entryName.name_length, entryName.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000911 return kInvalidEntryName;
912 }
913
914 const int64_t ent = EntryToIndex(archive->hash_table,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100915 archive->hash_table_size, entryName);
Narayan Kamath7462f022013-11-21 13:05:04 +0000916
917 if (ent < 0) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100918 ALOGV("Zip: Could not find entry %.*s", entryName.name_length, entryName.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000919 return ent;
920 }
921
922 return FindEntry(archive, ent, data);
923}
924
Yusuke Sato07447542015-06-25 14:39:19 -0700925int32_t Next(void* cookie, ZipEntry* data, ZipString* name) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800926 IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
Narayan Kamath7462f022013-11-21 13:05:04 +0000927 if (handle == NULL) {
928 return kInvalidHandle;
929 }
930
931 ZipArchive* archive = handle->archive;
932 if (archive == NULL || archive->hash_table == NULL) {
933 ALOGW("Zip: Invalid ZipArchiveHandle");
934 return kInvalidHandle;
935 }
936
937 const uint32_t currentOffset = handle->position;
938 const uint32_t hash_table_length = archive->hash_table_size;
Yusuke Sato07447542015-06-25 14:39:19 -0700939 const ZipString* hash_table = archive->hash_table;
Narayan Kamath7462f022013-11-21 13:05:04 +0000940
941 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
942 if (hash_table[i].name != NULL &&
Yusuke Sato07447542015-06-25 14:39:19 -0700943 (handle->prefix.name_length == 0 ||
944 hash_table[i].StartsWith(handle->prefix)) &&
945 (handle->suffix.name_length == 0 ||
946 hash_table[i].EndsWith(handle->suffix))) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000947 handle->position = (i + 1);
948 const int error = FindEntry(archive, i, data);
949 if (!error) {
950 name->name = hash_table[i].name;
951 name->name_length = hash_table[i].name_length;
952 }
953
954 return error;
955 }
956 }
957
958 handle->position = 0;
959 return kIterationEnd;
960}
961
Narayan Kamathf899bd52015-04-17 11:53:14 +0100962class Writer {
963 public:
964 virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
965 virtual ~Writer() {}
966 protected:
967 Writer() = default;
968 private:
969 DISALLOW_COPY_AND_ASSIGN(Writer);
970};
971
972// A Writer that writes data to a fixed size memory region.
973// The size of the memory region must be equal to the total size of
974// the data appended to it.
975class MemoryWriter : public Writer {
976 public:
977 MemoryWriter(uint8_t* buf, size_t size) : Writer(),
978 buf_(buf), size_(size), bytes_written_(0) {
979 }
980
981 virtual bool Append(uint8_t* buf, size_t buf_size) override {
982 if (bytes_written_ + buf_size > size_) {
983 ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
984 size_, bytes_written_ + buf_size);
985 return false;
986 }
987
988 memcpy(buf_ + bytes_written_, buf, buf_size);
989 bytes_written_ += buf_size;
990 return true;
991 }
992
993 private:
994 uint8_t* const buf_;
995 const size_t size_;
996 size_t bytes_written_;
997};
998
999// A Writer that appends data to a file |fd| at its current position.
1000// The file will be truncated to the end of the written data.
1001class FileWriter : public Writer {
1002 public:
1003
1004 // Creates a FileWriter for |fd| and prepare to write |entry| to it,
1005 // guaranteeing that the file descriptor is valid and that there's enough
1006 // space on the volume to write out the entry completely and that the file
1007 // is truncated to the correct length.
1008 //
1009 // Returns a valid FileWriter on success, |nullptr| if an error occurred.
1010 static std::unique_ptr<FileWriter> Create(int fd, const ZipEntry* entry) {
1011 const uint32_t declared_length = entry->uncompressed_length;
1012 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1013 if (current_offset == -1) {
1014 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno));
1015 return nullptr;
1016 }
1017
1018 int result = 0;
1019#if defined(__linux__)
1020 if (declared_length > 0) {
1021 // Make sure we have enough space on the volume to extract the compressed
1022 // entry. Note that the call to ftruncate below will change the file size but
1023 // will not allocate space on disk and this call to fallocate will not
1024 // change the file size.
Badhri Jagan Sridharana68d0d12015-06-02 14:47:57 -07001025 // Note: fallocate is only supported by the following filesystems -
1026 // btrfs, ext4, ocfs2, and xfs. Therefore fallocate might fail with
1027 // EOPNOTSUPP error when issued in other filesystems.
1028 // Hence, check for the return error code before concluding that the
1029 // disk does not have enough space.
Narayan Kamathf899bd52015-04-17 11:53:14 +01001030 result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length));
Badhri Jagan Sridharana68d0d12015-06-02 14:47:57 -07001031 if (result == -1 && errno == ENOSPC) {
Narayan Kamathf899bd52015-04-17 11:53:14 +01001032 ALOGW("Zip: unable to allocate space for file to %" PRId64 ": %s",
1033 static_cast<int64_t>(declared_length + current_offset), strerror(errno));
1034 return std::unique_ptr<FileWriter>(nullptr);
1035 }
1036 }
1037#endif // __linux__
1038
1039 result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1040 if (result == -1) {
1041 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1042 static_cast<int64_t>(declared_length + current_offset), strerror(errno));
1043 return std::unique_ptr<FileWriter>(nullptr);
1044 }
1045
1046 return std::unique_ptr<FileWriter>(new FileWriter(fd, declared_length));
1047 }
1048
1049 virtual bool Append(uint8_t* buf, size_t buf_size) override {
1050 if (total_bytes_written_ + buf_size > declared_length_) {
1051 ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
1052 declared_length_, total_bytes_written_ + buf_size);
1053 return false;
1054 }
1055
Narayan Kamathe97e66e2015-04-27 16:25:53 +01001056 const bool result = android::base::WriteFully(fd_, buf, buf_size);
1057 if (result) {
1058 total_bytes_written_ += buf_size;
1059 } else {
1060 ALOGW("Zip: unable to write " ZD " bytes to file; %s", buf_size, strerror(errno));
Narayan Kamathf899bd52015-04-17 11:53:14 +01001061 }
1062
Narayan Kamathe97e66e2015-04-27 16:25:53 +01001063 return result;
Narayan Kamathf899bd52015-04-17 11:53:14 +01001064 }
1065 private:
1066 FileWriter(const int fd, const size_t declared_length) :
1067 Writer(),
1068 fd_(fd),
1069 declared_length_(declared_length),
1070 total_bytes_written_(0) {
1071 }
1072
1073 const int fd_;
1074 const size_t declared_length_;
1075 size_t total_bytes_written_;
1076};
1077
Dmitriy Ivanovf94e1592015-03-06 13:27:59 -08001078// This method is using libz macros with old-style-casts
1079#pragma GCC diagnostic push
1080#pragma GCC diagnostic ignored "-Wold-style-cast"
1081static inline int zlib_inflateInit2(z_stream* stream, int window_bits) {
1082 return inflateInit2(stream, window_bits);
1083}
1084#pragma GCC diagnostic pop
1085
Narayan Kamathf899bd52015-04-17 11:53:14 +01001086static int32_t InflateEntryToWriter(int fd, const ZipEntry* entry,
1087 Writer* writer, uint64_t* crc_out) {
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001088 const size_t kBufSize = 32768;
1089 std::vector<uint8_t> read_buf(kBufSize);
1090 std::vector<uint8_t> write_buf(kBufSize);
Narayan Kamath7462f022013-11-21 13:05:04 +00001091 z_stream zstream;
1092 int zerr;
1093
1094 /*
1095 * Initialize the zlib stream struct.
1096 */
1097 memset(&zstream, 0, sizeof(zstream));
1098 zstream.zalloc = Z_NULL;
1099 zstream.zfree = Z_NULL;
1100 zstream.opaque = Z_NULL;
1101 zstream.next_in = NULL;
1102 zstream.avail_in = 0;
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001103 zstream.next_out = &write_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001104 zstream.avail_out = kBufSize;
1105 zstream.data_type = Z_UNKNOWN;
1106
1107 /*
1108 * Use the undocumented "negative window bits" feature to tell zlib
1109 * that there's no zlib header waiting for it.
1110 */
Dmitriy Ivanovf94e1592015-03-06 13:27:59 -08001111 zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
Narayan Kamath7462f022013-11-21 13:05:04 +00001112 if (zerr != Z_OK) {
1113 if (zerr == Z_VERSION_ERROR) {
1114 ALOGE("Installed zlib is not compatible with linked version (%s)",
1115 ZLIB_VERSION);
1116 } else {
1117 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
1118 }
1119
1120 return kZlibError;
1121 }
1122
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001123 auto zstream_deleter = [](z_stream* stream) {
1124 inflateEnd(stream); /* free up any allocated structures */
1125 };
1126
1127 std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter);
1128
Narayan Kamath7462f022013-11-21 13:05:04 +00001129 const uint32_t uncompressed_length = entry->uncompressed_length;
1130
1131 uint32_t compressed_length = entry->compressed_length;
Narayan Kamath7462f022013-11-21 13:05:04 +00001132 do {
1133 /* read as much as we can */
1134 if (zstream.avail_in == 0) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -07001135 const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001136 const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, &read_buf[0], getSize));
Narayan Kamath7462f022013-11-21 13:05:04 +00001137 if (actual != getSize) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -07001138 ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001139 return kIoError;
Narayan Kamath7462f022013-11-21 13:05:04 +00001140 }
1141
1142 compressed_length -= getSize;
1143
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001144 zstream.next_in = &read_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001145 zstream.avail_in = getSize;
1146 }
1147
1148 /* uncompress the data */
1149 zerr = inflate(&zstream, Z_NO_FLUSH);
1150 if (zerr != Z_OK && zerr != Z_STREAM_END) {
1151 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
1152 zerr, zstream.next_in, zstream.avail_in,
1153 zstream.next_out, zstream.avail_out);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001154 return kZlibError;
Narayan Kamath7462f022013-11-21 13:05:04 +00001155 }
1156
1157 /* write when we're full or when we're done */
1158 if (zstream.avail_out == 0 ||
1159 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001160 const size_t write_size = zstream.next_out - &write_buf[0];
Narayan Kamathf899bd52015-04-17 11:53:14 +01001161 if (!writer->Append(&write_buf[0], write_size)) {
1162 // The file might have declared a bogus length.
1163 return kInconsistentInformation;
Narayan Kamath7462f022013-11-21 13:05:04 +00001164 }
Narayan Kamath7462f022013-11-21 13:05:04 +00001165
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001166 zstream.next_out = &write_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001167 zstream.avail_out = kBufSize;
1168 }
1169 } while (zerr == Z_OK);
1170
1171 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
1172
1173 // stream.adler holds the crc32 value for such streams.
1174 *crc_out = zstream.adler;
1175
1176 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
Mark Salyzyn088bf902014-05-08 16:02:20 -07001177 ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +00001178 zstream.total_out, uncompressed_length);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001179 return kInconsistentInformation;
Narayan Kamath7462f022013-11-21 13:05:04 +00001180 }
1181
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001182 return 0;
Narayan Kamath7462f022013-11-21 13:05:04 +00001183}
1184
Narayan Kamathf899bd52015-04-17 11:53:14 +01001185static int32_t CopyEntryToWriter(int fd, const ZipEntry* entry, Writer* writer,
1186 uint64_t *crc_out) {
1187 static const uint32_t kBufSize = 32768;
1188 std::vector<uint8_t> buf(kBufSize);
1189
1190 const uint32_t length = entry->uncompressed_length;
1191 uint32_t count = 0;
1192 uint64_t crc = 0;
1193 while (count < length) {
1194 uint32_t remaining = length - count;
1195
1196 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
1197 // value.
1198 const ssize_t block_size = (remaining > kBufSize) ? kBufSize : remaining;
1199 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, &buf[0], block_size));
1200
1201 if (actual != block_size) {
1202 ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, block_size);
1203 return kIoError;
1204 }
1205
1206 if (!writer->Append(&buf[0], block_size)) {
1207 return kIoError;
1208 }
1209 crc = crc32(crc, &buf[0], block_size);
1210 count += block_size;
1211 }
1212
1213 *crc_out = crc;
1214
1215 return 0;
1216}
1217
1218int32_t ExtractToWriter(ZipArchiveHandle handle,
1219 ZipEntry* entry, Writer* writer) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001220 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +00001221 const uint16_t method = entry->method;
1222 off64_t data_offset = entry->offset;
1223
1224 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001225 ALOGW("Zip: lseek to data at %" PRId64 " failed", static_cast<int64_t>(data_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +00001226 return kIoError;
1227 }
1228
1229 // this should default to kUnknownCompressionMethod.
1230 int32_t return_value = -1;
1231 uint64_t crc = 0;
1232 if (method == kCompressStored) {
Narayan Kamathf899bd52015-04-17 11:53:14 +01001233 return_value = CopyEntryToWriter(archive->fd, entry, writer, &crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001234 } else if (method == kCompressDeflated) {
Narayan Kamathf899bd52015-04-17 11:53:14 +01001235 return_value = InflateEntryToWriter(archive->fd, entry, writer, &crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001236 }
1237
1238 if (!return_value && entry->has_data_descriptor) {
1239 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1240 if (return_value) {
1241 return return_value;
1242 }
1243 }
1244
1245 // TODO: Fix this check by passing the right flags to inflate2 so that
1246 // it calculates the CRC for us.
1247 if (entry->crc32 != crc && false) {
Mark Salyzyn088bf902014-05-08 16:02:20 -07001248 ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001249 return kInconsistentInformation;
1250 }
1251
1252 return return_value;
1253}
1254
Narayan Kamathf899bd52015-04-17 11:53:14 +01001255int32_t ExtractToMemory(ZipArchiveHandle handle, ZipEntry* entry,
1256 uint8_t* begin, uint32_t size) {
1257 std::unique_ptr<Writer> writer(new MemoryWriter(begin, size));
1258 return ExtractToWriter(handle, entry, writer.get());
1259}
1260
Narayan Kamath7462f022013-11-21 13:05:04 +00001261int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1262 ZipEntry* entry, int fd) {
Narayan Kamathf899bd52015-04-17 11:53:14 +01001263 std::unique_ptr<Writer> writer(FileWriter::Create(fd, entry));
1264 if (writer.get() == nullptr) {
Narayan Kamath7462f022013-11-21 13:05:04 +00001265 return kIoError;
1266 }
1267
Narayan Kamathf899bd52015-04-17 11:53:14 +01001268 return ExtractToWriter(handle, entry, writer.get());
Narayan Kamath7462f022013-11-21 13:05:04 +00001269}
1270
1271const char* ErrorCodeString(int32_t error_code) {
1272 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1273 return kErrorMessages[error_code * -1];
1274 }
1275
1276 return kErrorMessages[0];
1277}
1278
1279int GetFileDescriptor(const ZipArchiveHandle handle) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001280 return reinterpret_cast<ZipArchive*>(handle)->fd;
Narayan Kamath7462f022013-11-21 13:05:04 +00001281}