blob: 4ba91dfb92c4d198c43cd1f8f505eaf5dc2af895 [file] [log] [blame]
Narayan Kamath7462f022013-11-21 13:05:04 +00001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
Narayan Kamath7462f022013-11-21 13:05:04 +000020
21#include <assert.h>
22#include <errno.h>
Mark Salyzyn99ef9912014-03-14 14:26:22 -070023#include <fcntl.h>
24#include <inttypes.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000025#include <limits.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000026#include <stdlib.h>
27#include <string.h>
Narayan Kamath7462f022013-11-21 13:05:04 +000028#include <unistd.h>
29
Dan Albert1ae07642015-04-09 14:11:18 -070030#include <memory>
31#include <vector>
32
33#include "base/macros.h" // TEMP_FAILURE_RETRY may or may not be in unistd
34#include "base/memory.h"
35#include "log/log.h"
36#include "utils/Compat.h"
37#include "utils/FileMap.h"
38#include "zlib.h"
Narayan Kamath7462f022013-11-21 13:05:04 +000039
Narayan Kamath044bc8e2014-12-03 18:22:53 +000040#include "entry_name_utils-inl.h"
Mark Salyzyn99ef9912014-03-14 14:26:22 -070041#include "ziparchive/zip_archive.h"
42
Dan Albert1ae07642015-04-09 14:11:18 -070043using android::base::get_unaligned;
Narayan Kamath044bc8e2014-12-03 18:22:53 +000044
Narayan Kamath926973e2014-06-09 14:18:14 +010045// This is for windows. If we don't open a file in binary mode, weird
Narayan Kamath7462f022013-11-21 13:05:04 +000046// things will happen.
47#ifndef O_BINARY
48#define O_BINARY 0
49#endif
50
Narayan Kamath926973e2014-06-09 14:18:14 +010051// The "end of central directory" (EOCD) record. Each archive
52// contains exactly once such record which appears at the end of
53// the archive. It contains archive wide information like the
54// number of entries in the archive and the offset to the central
55// directory of the offset.
56struct EocdRecord {
57 static const uint32_t kSignature = 0x06054b50;
Narayan Kamath7462f022013-11-21 13:05:04 +000058
Narayan Kamath926973e2014-06-09 14:18:14 +010059 // End of central directory signature, should always be
60 // |kSignature|.
61 uint32_t eocd_signature;
62 // The number of the current "disk", i.e, the "disk" that this
63 // central directory is on.
64 //
65 // This implementation assumes that each archive spans a single
66 // disk only. i.e, that disk_num == 1.
67 uint16_t disk_num;
68 // The disk where the central directory starts.
69 //
70 // This implementation assumes that each archive spans a single
71 // disk only. i.e, that cd_start_disk == 1.
72 uint16_t cd_start_disk;
73 // The number of central directory records on this disk.
74 //
75 // This implementation assumes that each archive spans a single
76 // disk only. i.e, that num_records_on_disk == num_records.
77 uint16_t num_records_on_disk;
78 // The total number of central directory records.
79 uint16_t num_records;
80 // The size of the central directory (in bytes).
81 uint32_t cd_size;
82 // The offset of the start of the central directory, relative
83 // to the start of the file.
84 uint32_t cd_start_offset;
85 // Length of the central directory comment.
86 uint16_t comment_length;
87 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +010088 EocdRecord() = default;
89 DISALLOW_COPY_AND_ASSIGN(EocdRecord);
Narayan Kamath926973e2014-06-09 14:18:14 +010090} __attribute__((packed));
Narayan Kamath7462f022013-11-21 13:05:04 +000091
Narayan Kamath926973e2014-06-09 14:18:14 +010092// A structure representing the fixed length fields for a single
93// record in the central directory of the archive. In addition to
94// the fixed length fields listed here, each central directory
95// record contains a variable length "file_name" and "extra_field"
96// whose lengths are given by |file_name_length| and |extra_field_length|
97// respectively.
98struct CentralDirectoryRecord {
99 static const uint32_t kSignature = 0x02014b50;
Narayan Kamath7462f022013-11-21 13:05:04 +0000100
Narayan Kamath926973e2014-06-09 14:18:14 +0100101 // The start of record signature. Must be |kSignature|.
102 uint32_t record_signature;
103 // Tool version. Ignored by this implementation.
104 uint16_t version_made_by;
105 // Tool version. Ignored by this implementation.
106 uint16_t version_needed;
107 // The "general purpose bit flags" for this entry. The only
108 // flag value that we currently check for is the "data descriptor"
109 // flag.
110 uint16_t gpb_flags;
111 // The compression method for this entry, one of |kCompressStored|
112 // and |kCompressDeflated|.
113 uint16_t compression_method;
114 // The file modification time and date for this entry.
115 uint16_t last_mod_time;
116 uint16_t last_mod_date;
117 // The CRC-32 checksum for this entry.
118 uint32_t crc32;
119 // The compressed size (in bytes) of this entry.
120 uint32_t compressed_size;
121 // The uncompressed size (in bytes) of this entry.
122 uint32_t uncompressed_size;
123 // The length of the entry file name in bytes. The file name
124 // will appear immediately after this record.
125 uint16_t file_name_length;
126 // The length of the extra field info (in bytes). This data
127 // will appear immediately after the entry file name.
128 uint16_t extra_field_length;
129 // The length of the entry comment (in bytes). This data will
130 // appear immediately after the extra field.
131 uint16_t comment_length;
132 // The start disk for this entry. Ignored by this implementation).
133 uint16_t file_start_disk;
134 // File attributes. Ignored by this implementation.
135 uint16_t internal_file_attributes;
136 // File attributes. Ignored by this implementation.
137 uint32_t external_file_attributes;
138 // The offset to the local file header for this entry, from the
139 // beginning of this archive.
140 uint32_t local_file_header_offset;
141 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +0100142 CentralDirectoryRecord() = default;
143 DISALLOW_COPY_AND_ASSIGN(CentralDirectoryRecord);
Narayan Kamath926973e2014-06-09 14:18:14 +0100144} __attribute__((packed));
Narayan Kamath7462f022013-11-21 13:05:04 +0000145
Narayan Kamath926973e2014-06-09 14:18:14 +0100146// The local file header for a given entry. This duplicates information
147// present in the central directory of the archive. It is an error for
148// the information here to be different from the central directory
149// information for a given entry.
150struct LocalFileHeader {
151 static const uint32_t kSignature = 0x04034b50;
Narayan Kamath7462f022013-11-21 13:05:04 +0000152
Narayan Kamath926973e2014-06-09 14:18:14 +0100153 // The local file header signature, must be |kSignature|.
154 uint32_t lfh_signature;
155 // Tool version. Ignored by this implementation.
156 uint16_t version_needed;
157 // The "general purpose bit flags" for this entry. The only
158 // flag value that we currently check for is the "data descriptor"
159 // flag.
160 uint16_t gpb_flags;
161 // The compression method for this entry, one of |kCompressStored|
162 // and |kCompressDeflated|.
163 uint16_t compression_method;
164 // The file modification time and date for this entry.
165 uint16_t last_mod_time;
166 uint16_t last_mod_date;
167 // The CRC-32 checksum for this entry.
168 uint32_t crc32;
169 // The compressed size (in bytes) of this entry.
170 uint32_t compressed_size;
171 // The uncompressed size (in bytes) of this entry.
172 uint32_t uncompressed_size;
173 // The length of the entry file name in bytes. The file name
174 // will appear immediately after this record.
175 uint16_t file_name_length;
176 // The length of the extra field info (in bytes). This data
177 // will appear immediately after the entry file name.
178 uint16_t extra_field_length;
179 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +0100180 LocalFileHeader() = default;
181 DISALLOW_COPY_AND_ASSIGN(LocalFileHeader);
Narayan Kamath926973e2014-06-09 14:18:14 +0100182} __attribute__((packed));
183
184struct DataDescriptor {
185 // The *optional* data descriptor start signature.
186 static const uint32_t kOptSignature = 0x08074b50;
187
188 // CRC-32 checksum of the entry.
189 uint32_t crc32;
190 // Compressed size of the entry.
191 uint32_t compressed_size;
192 // Uncompressed size of the entry.
193 uint32_t uncompressed_size;
194 private:
Narayan Kamathf899bd52015-04-17 11:53:14 +0100195 DataDescriptor() = default;
196 DISALLOW_COPY_AND_ASSIGN(DataDescriptor);
Narayan Kamath926973e2014-06-09 14:18:14 +0100197} __attribute__((packed));
198
Narayan Kamath926973e2014-06-09 14:18:14 +0100199
Piotr Jastrzebskibd0a7482014-08-13 09:49:25 +0000200static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD
Narayan Kamath7462f022013-11-21 13:05:04 +0000201
Narayan Kamath926973e2014-06-09 14:18:14 +0100202// The maximum size of a central directory or a file
203// comment in bytes.
204static const uint32_t kMaxCommentLen = 65535;
205
206// The maximum number of bytes to scan backwards for the EOCD start.
207static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
208
Narayan Kamath7462f022013-11-21 13:05:04 +0000209static const char* kErrorMessages[] = {
210 "Unknown return code.",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000211 "Iteration ended",
Narayan Kamath7462f022013-11-21 13:05:04 +0000212 "Zlib error",
213 "Invalid file",
214 "Invalid handle",
215 "Duplicate entries in archive",
216 "Empty archive",
217 "Entry not found",
218 "Invalid offset",
219 "Inconsistent information",
220 "Invalid entry name",
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000221 "I/O Error",
Narayan Kamatheaf98852013-12-11 14:51:51 +0000222 "File mapping failed"
Narayan Kamath7462f022013-11-21 13:05:04 +0000223};
224
225static const int32_t kErrorMessageUpperBound = 0;
226
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000227static const int32_t kIterationEnd = -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000228
229// We encountered a Zlib error when inflating a stream from this file.
230// Usually indicates file corruption.
231static const int32_t kZlibError = -2;
232
233// The input file cannot be processed as a zip archive. Usually because
234// it's too small, too large or does not have a valid signature.
235static const int32_t kInvalidFile = -3;
236
237// An invalid iteration / ziparchive handle was passed in as an input
238// argument.
239static const int32_t kInvalidHandle = -4;
240
241// The zip archive contained two (or possibly more) entries with the same
242// name.
243static const int32_t kDuplicateEntry = -5;
244
245// The zip archive contains no entries.
246static const int32_t kEmptyArchive = -6;
247
248// The specified entry was not found in the archive.
249static const int32_t kEntryNotFound = -7;
250
251// The zip archive contained an invalid local file header pointer.
252static const int32_t kInvalidOffset = -8;
253
254// The zip archive contained inconsistent entry information. This could
255// be because the central directory & local file header did not agree, or
256// if the actual uncompressed length or crc32 do not match their declared
257// values.
258static const int32_t kInconsistentInformation = -9;
259
260// An invalid entry name was encountered.
261static const int32_t kInvalidEntryName = -10;
262
Narayan Kamatheb41ad22013-12-09 16:26:36 +0000263// An I/O related system call (read, lseek, ftruncate, map) failed.
264static const int32_t kIoError = -11;
Narayan Kamath7462f022013-11-21 13:05:04 +0000265
Narayan Kamatheaf98852013-12-11 14:51:51 +0000266// We were not able to mmap the central directory or entry contents.
267static const int32_t kMmapFailed = -12;
Narayan Kamath7462f022013-11-21 13:05:04 +0000268
Narayan Kamatheaf98852013-12-11 14:51:51 +0000269static const int32_t kErrorMessageLowerBound = -13;
Narayan Kamath7462f022013-11-21 13:05:04 +0000270
Narayan Kamatheaf98852013-12-11 14:51:51 +0000271static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
Narayan Kamath7462f022013-11-21 13:05:04 +0000272
273/*
274 * A Read-only Zip archive.
275 *
276 * We want "open" and "find entry by name" to be fast operations, and
277 * we want to use as little memory as possible. We memory-map the zip
278 * central directory, and load a hash table with pointers to the filenames
279 * (which aren't null-terminated). The other fields are at a fixed offset
280 * from the filename, so we don't need to extract those (but we do need
281 * to byte-read and endian-swap them every time we want them).
282 *
283 * It's possible that somebody has handed us a massive (~1GB) zip archive,
284 * so we can't expect to mmap the entire file.
285 *
286 * To speed comparisons when doing a lookup by name, we could make the mapping
287 * "private" (copy-on-write) and null-terminate the filenames after verifying
288 * the record structure. However, this requires a private mapping of
289 * every page that the Central Directory touches. Easier to tuck a copy
290 * of the string length into the hash table entry.
291 */
292struct ZipArchive {
293 /* open Zip archive */
Neil Fullerb1a113f2014-07-25 14:43:04 +0100294 const int fd;
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700295 const bool close_file;
Narayan Kamath7462f022013-11-21 13:05:04 +0000296
297 /* mapped central directory area */
298 off64_t directory_offset;
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800299 android::FileMap directory_map;
Narayan Kamath7462f022013-11-21 13:05:04 +0000300
301 /* number of entries in the Zip archive */
302 uint16_t num_entries;
303
304 /*
305 * We know how many entries are in the Zip archive, so we can have a
306 * fixed-size hash table. We define a load factor of 0.75 and overallocat
307 * so the maximum number entries can never be higher than
308 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
309 */
310 uint32_t hash_table_size;
311 ZipEntryName* hash_table;
Neil Fullerb1a113f2014-07-25 14:43:04 +0100312
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700313 ZipArchive(const int fd, bool assume_ownership) :
Neil Fullerb1a113f2014-07-25 14:43:04 +0100314 fd(fd),
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700315 close_file(assume_ownership),
Neil Fullerb1a113f2014-07-25 14:43:04 +0100316 directory_offset(0),
Neil Fullerb1a113f2014-07-25 14:43:04 +0100317 num_entries(0),
318 hash_table_size(0),
319 hash_table(NULL) {}
320
321 ~ZipArchive() {
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700322 if (close_file && fd >= 0) {
Neil Fullerb1a113f2014-07-25 14:43:04 +0100323 close(fd);
324 }
325
Neil Fullerb1a113f2014-07-25 14:43:04 +0100326 free(hash_table);
327 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000328};
329
Narayan Kamath7462f022013-11-21 13:05:04 +0000330/*
331 * Round up to the next highest power of 2.
332 *
333 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
334 */
335static uint32_t RoundUpPower2(uint32_t val) {
336 val--;
337 val |= val >> 1;
338 val |= val >> 2;
339 val |= val >> 4;
340 val |= val >> 8;
341 val |= val >> 16;
342 val++;
343
344 return val;
345}
346
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100347static uint32_t ComputeHash(const ZipEntryName& name) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000348 uint32_t hash = 0;
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100349 uint16_t len = name.name_length;
350 const uint8_t* str = name.name;
Narayan Kamath7462f022013-11-21 13:05:04 +0000351
352 while (len--) {
353 hash = hash * 31 + *str++;
354 }
355
356 return hash;
357}
358
359/*
360 * Convert a ZipEntry to a hash table index, verifying that it's in a
361 * valid range.
362 */
363static int64_t EntryToIndex(const ZipEntryName* hash_table,
364 const uint32_t hash_table_size,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100365 const ZipEntryName& name) {
366 const uint32_t hash = ComputeHash(name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000367
368 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
369 uint32_t ent = hash & (hash_table_size - 1);
370 while (hash_table[ent].name != NULL) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100371 if (hash_table[ent].name_length == name.name_length &&
372 memcmp(hash_table[ent].name, name.name, name.name_length) == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000373 return ent;
374 }
375
376 ent = (ent + 1) & (hash_table_size - 1);
377 }
378
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100379 ALOGV("Zip: Unable to find entry %.*s", name.name_length, name.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000380 return kEntryNotFound;
381}
382
383/*
384 * Add a new entry to the hash table.
385 */
386static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100387 const ZipEntryName& name) {
388 const uint64_t hash = ComputeHash(name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000389 uint32_t ent = hash & (hash_table_size - 1);
390
391 /*
392 * We over-allocated the table, so we're guaranteed to find an empty slot.
393 * Further, we guarantee that the hashtable size is not 0.
394 */
395 while (hash_table[ent].name != NULL) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100396 if (hash_table[ent].name_length == name.name_length &&
397 memcmp(hash_table[ent].name, name.name, name.name_length) == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000398 // We've found a duplicate entry. We don't accept it
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100399 ALOGW("Zip: Found duplicate entry %.*s", name.name_length, name.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000400 return kDuplicateEntry;
401 }
402 ent = (ent + 1) & (hash_table_size - 1);
403 }
404
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100405 hash_table[ent].name = name.name;
406 hash_table[ent].name_length = name.name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000407 return 0;
408}
409
Narayan Kamath7462f022013-11-21 13:05:04 +0000410static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
411 ZipArchive* archive, off64_t file_length,
Narayan Kamath926973e2014-06-09 14:18:14 +0100412 off64_t read_amount, uint8_t* scan_buffer) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000413 const off64_t search_start = file_length - read_amount;
414
415 if (lseek64(fd, search_start, SEEK_SET) != search_start) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100416 ALOGW("Zip: seek %" PRId64 " failed: %s", static_cast<int64_t>(search_start),
417 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000418 return kIoError;
419 }
Narayan Kamath926973e2014-06-09 14:18:14 +0100420 ssize_t actual = TEMP_FAILURE_RETRY(
421 read(fd, scan_buffer, static_cast<size_t>(read_amount)));
422 if (actual != static_cast<ssize_t>(read_amount)) {
423 ALOGW("Zip: read %" PRId64 " failed: %s", static_cast<int64_t>(read_amount),
424 strerror(errno));
Narayan Kamath7462f022013-11-21 13:05:04 +0000425 return kIoError;
426 }
427
428 /*
429 * Scan backward for the EOCD magic. In an archive without a trailing
430 * comment, we'll find it on the first try. (We may want to consider
431 * doing an initial minimal read; if we don't find it, retry with a
432 * second read as above.)
433 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100434 int i = read_amount - sizeof(EocdRecord);
435 for (; i >= 0; i--) {
Dan Albert1ae07642015-04-09 14:11:18 -0700436 if (scan_buffer[i] == 0x50) {
437 uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
438 if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
439 ALOGV("+++ Found EOCD at buf+%d", i);
440 break;
441 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000442 }
443 }
444 if (i < 0) {
445 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
446 return kInvalidFile;
447 }
448
449 const off64_t eocd_offset = search_start + i;
Narayan Kamath926973e2014-06-09 14:18:14 +0100450 const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
Narayan Kamath7462f022013-11-21 13:05:04 +0000451 /*
Narayan Kamath926973e2014-06-09 14:18:14 +0100452 * Verify that there's no trailing space at the end of the central directory
453 * and its comment.
Narayan Kamath7462f022013-11-21 13:05:04 +0000454 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100455 const off64_t calculated_length = eocd_offset + sizeof(EocdRecord)
456 + eocd->comment_length;
457 if (calculated_length != file_length) {
Narayan Kamath4f6b4992014-06-03 13:59:23 +0100458 ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
Narayan Kamath926973e2014-06-09 14:18:14 +0100459 static_cast<int64_t>(file_length - calculated_length));
Narayan Kamath4f6b4992014-06-03 13:59:23 +0100460 return kInvalidFile;
461 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000462
Narayan Kamath926973e2014-06-09 14:18:14 +0100463 /*
464 * Grab the CD offset and size, and the number of entries in the
465 * archive and verify that they look reasonable.
466 */
467 if (eocd->cd_start_offset + eocd->cd_size > eocd_offset) {
468 ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
469 eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000470 return kInvalidOffset;
471 }
Narayan Kamath926973e2014-06-09 14:18:14 +0100472 if (eocd->num_records == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000473 ALOGW("Zip: empty archive?");
474 return kEmptyArchive;
475 }
476
Narayan Kamath926973e2014-06-09 14:18:14 +0100477 ALOGV("+++ num_entries=%" PRIu32 "dir_size=%" PRIu32 " dir_offset=%" PRIu32,
478 eocd->num_records, eocd->cd_size, eocd->cd_start_offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000479
480 /*
481 * It all looks good. Create a mapping for the CD, and set the fields
482 * in archive.
483 */
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800484 if (!archive->directory_map.create(debug_file_name, fd,
485 static_cast<off64_t>(eocd->cd_start_offset),
486 static_cast<size_t>(eocd->cd_size), true /* read only */) ) {
Narayan Kamatheaf98852013-12-11 14:51:51 +0000487 return kMmapFailed;
Narayan Kamath7462f022013-11-21 13:05:04 +0000488 }
489
Narayan Kamath926973e2014-06-09 14:18:14 +0100490 archive->num_entries = eocd->num_records;
491 archive->directory_offset = eocd->cd_start_offset;
Narayan Kamath7462f022013-11-21 13:05:04 +0000492
493 return 0;
494}
495
496/*
497 * Find the zip Central Directory and memory-map it.
498 *
499 * On success, returns 0 after populating fields from the EOCD area:
500 * directory_offset
501 * directory_map
502 * num_entries
503 */
504static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
505 ZipArchive* archive) {
506
507 // Test file length. We use lseek64 to make sure the file
508 // is small enough to be a zip file (Its size must be less than
509 // 0xffffffff bytes).
510 off64_t file_length = lseek64(fd, 0, SEEK_END);
511 if (file_length == -1) {
512 ALOGV("Zip: lseek on fd %d failed", fd);
513 return kInvalidFile;
514 }
515
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800516 if (file_length > static_cast<off64_t>(0xffffffff)) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100517 ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
Narayan Kamath7462f022013-11-21 13:05:04 +0000518 return kInvalidFile;
519 }
520
Narayan Kamath926973e2014-06-09 14:18:14 +0100521 if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
522 ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
Narayan Kamath7462f022013-11-21 13:05:04 +0000523 return kInvalidFile;
524 }
525
526 /*
527 * Perform the traditional EOCD snipe hunt.
528 *
529 * We're searching for the End of Central Directory magic number,
530 * which appears at the start of the EOCD block. It's followed by
531 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
532 * need to read the last part of the file into a buffer, dig through
533 * it to find the magic number, parse some values out, and use those
534 * to determine the extent of the CD.
535 *
536 * We start by pulling in the last part of the file.
537 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100538 off64_t read_amount = kMaxEOCDSearch;
539 if (file_length < read_amount) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000540 read_amount = file_length;
541 }
542
Narayan Kamath926973e2014-06-09 14:18:14 +0100543 uint8_t* scan_buffer = reinterpret_cast<uint8_t*>(malloc(read_amount));
Narayan Kamath7462f022013-11-21 13:05:04 +0000544 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
545 file_length, read_amount, scan_buffer);
546
547 free(scan_buffer);
548 return result;
549}
550
551/*
552 * Parses the Zip archive's Central Directory. Allocates and populates the
553 * hash table.
554 *
555 * Returns 0 on success.
556 */
557static int32_t ParseZipArchive(ZipArchive* archive) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800558 const uint8_t* const cd_ptr =
559 reinterpret_cast<const uint8_t*>(archive->directory_map.getDataPtr());
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800560 const size_t cd_length = archive->directory_map.getDataLength();
Narayan Kamath926973e2014-06-09 14:18:14 +0100561 const uint16_t num_entries = archive->num_entries;
Narayan Kamath7462f022013-11-21 13:05:04 +0000562
563 /*
564 * Create hash table. We have a minimum 75% load factor, possibly as
565 * low as 50% after we round off to a power of 2. There must be at
566 * least one unused entry to avoid an infinite loop during creation.
567 */
568 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800569 archive->hash_table = reinterpret_cast<ZipEntryName*>(calloc(archive->hash_table_size,
570 sizeof(ZipEntryName)));
Narayan Kamath7462f022013-11-21 13:05:04 +0000571
572 /*
573 * Walk through the central directory, adding entries to the hash
574 * table and verifying values.
575 */
Narayan Kamath926973e2014-06-09 14:18:14 +0100576 const uint8_t* const cd_end = cd_ptr + cd_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000577 const uint8_t* ptr = cd_ptr;
578 for (uint16_t i = 0; i < num_entries; i++) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100579 const CentralDirectoryRecord* cdr =
580 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
581 if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700582 ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800583 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000584 }
585
Narayan Kamath926973e2014-06-09 14:18:14 +0100586 if (ptr + sizeof(CentralDirectoryRecord) > cd_end) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700587 ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800588 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000589 }
590
Narayan Kamath926973e2014-06-09 14:18:14 +0100591 const off64_t local_header_offset = cdr->local_file_header_offset;
Narayan Kamath7462f022013-11-21 13:05:04 +0000592 if (local_header_offset >= archive->directory_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800593 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16,
594 static_cast<int64_t>(local_header_offset), i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800595 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000596 }
597
Narayan Kamath926973e2014-06-09 14:18:14 +0100598 const uint16_t file_name_length = cdr->file_name_length;
599 const uint16_t extra_length = cdr->extra_field_length;
600 const uint16_t comment_length = cdr->comment_length;
Piotr Jastrzebski78271ba2014-08-15 12:53:00 +0100601 const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
602
Narayan Kamath044bc8e2014-12-03 18:22:53 +0000603 /* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */
604 if (!IsValidEntryName(file_name, file_name_length)) {
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800605 return -1;
Piotr Jastrzebski78271ba2014-08-15 12:53:00 +0100606 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000607
608 /* add the CDE filename to the hash table */
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100609 ZipEntryName entry_name;
610 entry_name.name = file_name;
611 entry_name.name_length = file_name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000612 const int add_result = AddToHash(archive->hash_table,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100613 archive->hash_table_size, entry_name);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800614 if (add_result != 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000615 ALOGW("Zip: Error adding entry to hash table %d", add_result);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800616 return add_result;
Narayan Kamath7462f022013-11-21 13:05:04 +0000617 }
618
Narayan Kamath926973e2014-06-09 14:18:14 +0100619 ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
620 if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700621 ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
622 ptr - cd_ptr, cd_length, i);
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800623 return -1;
Narayan Kamath7462f022013-11-21 13:05:04 +0000624 }
625 }
Mark Salyzyn088bf902014-05-08 16:02:20 -0700626 ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
Narayan Kamath7462f022013-11-21 13:05:04 +0000627
Dmitriy Ivanov3ea93da2015-03-06 11:48:47 -0800628 return 0;
Narayan Kamath7462f022013-11-21 13:05:04 +0000629}
630
631static int32_t OpenArchiveInternal(ZipArchive* archive,
632 const char* debug_file_name) {
633 int32_t result = -1;
634 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
635 return result;
636 }
637
638 if ((result = ParseZipArchive(archive))) {
639 return result;
640 }
641
642 return 0;
643}
644
645int32_t OpenArchiveFd(int fd, const char* debug_file_name,
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700646 ZipArchiveHandle* handle, bool assume_ownership) {
647 ZipArchive* archive = new ZipArchive(fd, assume_ownership);
Narayan Kamath7462f022013-11-21 13:05:04 +0000648 *handle = archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000649 return OpenArchiveInternal(archive, debug_file_name);
650}
651
652int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
Neil Fullerb1a113f2014-07-25 14:43:04 +0100653 const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700654 ZipArchive* archive = new ZipArchive(fd, true);
Narayan Kamath7462f022013-11-21 13:05:04 +0000655 *handle = archive;
656
Narayan Kamath7462f022013-11-21 13:05:04 +0000657 if (fd < 0) {
658 ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
659 return kIoError;
Narayan Kamath7462f022013-11-21 13:05:04 +0000660 }
Dmitriy Ivanov40b52b22014-07-15 19:33:00 -0700661
Narayan Kamath7462f022013-11-21 13:05:04 +0000662 return OpenArchiveInternal(archive, fileName);
663}
664
665/*
666 * Close a ZipArchive, closing the file and freeing the contents.
667 */
668void CloseArchive(ZipArchiveHandle handle) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800669 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +0000670 ALOGV("Closing archive %p", archive);
Neil Fullerb1a113f2014-07-25 14:43:04 +0100671 delete archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000672}
673
674static int32_t UpdateEntryFromDataDescriptor(int fd,
675 ZipEntry *entry) {
Narayan Kamath926973e2014-06-09 14:18:14 +0100676 uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
Narayan Kamath7462f022013-11-21 13:05:04 +0000677 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
678 if (actual != sizeof(ddBuf)) {
679 return kIoError;
680 }
681
Narayan Kamath926973e2014-06-09 14:18:14 +0100682 const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
683 const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
684 const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
Narayan Kamath7462f022013-11-21 13:05:04 +0000685
Narayan Kamath926973e2014-06-09 14:18:14 +0100686 entry->crc32 = descriptor->crc32;
687 entry->compressed_length = descriptor->compressed_size;
688 entry->uncompressed_length = descriptor->uncompressed_size;
Narayan Kamath7462f022013-11-21 13:05:04 +0000689
690 return 0;
691}
692
693// Attempts to read |len| bytes into |buf| at offset |off|.
694//
695// This method uses pread64 on platforms that support it and
696// lseek64 + read on platforms that don't. This implies that
697// callers should not rely on the |fd| offset being incremented
698// as a side effect of this call.
699static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
700 off64_t off) {
Yabin Cui70160f42014-11-19 20:47:18 -0800701#if !defined(_WIN32)
Narayan Kamath7462f022013-11-21 13:05:04 +0000702 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
703#else
704 // The only supported platform that doesn't support pread at the moment
705 // is Windows. Only recent versions of windows support unix like forks,
706 // and even there the semantics are quite different.
707 if (lseek64(fd, off, SEEK_SET) != off) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700708 ALOGW("Zip: failed seek to offset %" PRId64, off);
Narayan Kamath7462f022013-11-21 13:05:04 +0000709 return kIoError;
710 }
711
712 return TEMP_FAILURE_RETRY(read(fd, buf, len));
Yabin Cui70160f42014-11-19 20:47:18 -0800713#endif
Narayan Kamath7462f022013-11-21 13:05:04 +0000714}
715
716static int32_t FindEntry(const ZipArchive* archive, const int ent,
717 ZipEntry* data) {
718 const uint16_t nameLen = archive->hash_table[ent].name_length;
Narayan Kamath7462f022013-11-21 13:05:04 +0000719
720 // Recover the start of the central directory entry from the filename
721 // pointer. The filename is the first entry past the fixed-size data,
722 // so we can just subtract back from that.
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100723 const uint8_t* ptr = archive->hash_table[ent].name;
Narayan Kamath926973e2014-06-09 14:18:14 +0100724 ptr -= sizeof(CentralDirectoryRecord);
Narayan Kamath7462f022013-11-21 13:05:04 +0000725
726 // This is the base of our mmapped region, we have to sanity check that
727 // the name that's in the hash table is a pointer to a location within
728 // this mapped region.
Narayan Kamath926973e2014-06-09 14:18:14 +0100729 const uint8_t* base_ptr = reinterpret_cast<const uint8_t*>(
Dmitriy Ivanov4b67f832015-03-06 10:22:34 -0800730 archive->directory_map.getDataPtr());
731 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map.getDataLength()) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000732 ALOGW("Zip: Invalid entry pointer");
733 return kInvalidOffset;
734 }
735
Narayan Kamath926973e2014-06-09 14:18:14 +0100736 const CentralDirectoryRecord *cdr =
737 reinterpret_cast<const CentralDirectoryRecord*>(ptr);
738
Narayan Kamath7462f022013-11-21 13:05:04 +0000739 // The offset of the start of the central directory in the zipfile.
740 // We keep this lying around so that we can sanity check all our lengths
741 // and our per-file structures.
742 const off64_t cd_offset = archive->directory_offset;
743
744 // Fill out the compression method, modification time, crc32
745 // and other interesting attributes from the central directory. These
746 // will later be compared against values from the local file header.
Narayan Kamath926973e2014-06-09 14:18:14 +0100747 data->method = cdr->compression_method;
748 data->mod_time = cdr->last_mod_time;
749 data->crc32 = cdr->crc32;
750 data->compressed_length = cdr->compressed_size;
751 data->uncompressed_length = cdr->uncompressed_size;
Narayan Kamath7462f022013-11-21 13:05:04 +0000752
753 // Figure out the local header offset from the central directory. The
754 // actual file data will begin after the local header and the name /
755 // extra comments.
Narayan Kamath926973e2014-06-09 14:18:14 +0100756 const off64_t local_header_offset = cdr->local_file_header_offset;
757 if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000758 ALOGW("Zip: bad local hdr offset in zip");
759 return kInvalidOffset;
760 }
761
Narayan Kamath926973e2014-06-09 14:18:14 +0100762 uint8_t lfh_buf[sizeof(LocalFileHeader)];
Narayan Kamath7462f022013-11-21 13:05:04 +0000763 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
764 local_header_offset);
765 if (actual != sizeof(lfh_buf)) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800766 ALOGW("Zip: failed reading lfh name from offset %" PRId64,
767 static_cast<int64_t>(local_header_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000768 return kIoError;
769 }
770
Narayan Kamath926973e2014-06-09 14:18:14 +0100771 const LocalFileHeader *lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
772
773 if (lfh->lfh_signature != LocalFileHeader::kSignature) {
Mark Salyzyn99ef9912014-03-14 14:26:22 -0700774 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
Narayan Kamath926973e2014-06-09 14:18:14 +0100775 static_cast<int64_t>(local_header_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000776 return kInvalidOffset;
777 }
778
779 // Paranoia: Match the values specified in the local file header
780 // to those specified in the central directory.
Narayan Kamath926973e2014-06-09 14:18:14 +0100781 if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000782 data->has_data_descriptor = 0;
Narayan Kamath926973e2014-06-09 14:18:14 +0100783 if (data->compressed_length != lfh->compressed_size
784 || data->uncompressed_length != lfh->uncompressed_size
785 || data->crc32 != lfh->crc32) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700786 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
787 ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
Narayan Kamath7462f022013-11-21 13:05:04 +0000788 data->compressed_length, data->uncompressed_length, data->crc32,
Narayan Kamath926973e2014-06-09 14:18:14 +0100789 lfh->compressed_size, lfh->uncompressed_size, lfh->crc32);
Narayan Kamath7462f022013-11-21 13:05:04 +0000790 return kInconsistentInformation;
791 }
792 } else {
793 data->has_data_descriptor = 1;
794 }
795
796 // Check that the local file header name matches the declared
797 // name in the central directory.
Narayan Kamath926973e2014-06-09 14:18:14 +0100798 if (lfh->file_name_length == nameLen) {
799 const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
Mykola Kondratenko50afc152014-09-08 12:46:37 +0200800 if (name_offset + lfh->file_name_length > cd_offset) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000801 ALOGW("Zip: Invalid declared length");
802 return kInvalidOffset;
803 }
804
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800805 uint8_t* name_buf = reinterpret_cast<uint8_t*>(malloc(nameLen));
Narayan Kamath7462f022013-11-21 13:05:04 +0000806 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
807 name_offset);
808
809 if (actual != nameLen) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800810 ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000811 free(name_buf);
812 return kIoError;
813 }
814
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100815 if (memcmp(archive->hash_table[ent].name, name_buf, nameLen)) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000816 free(name_buf);
817 return kInconsistentInformation;
818 }
819
820 free(name_buf);
821 } else {
822 ALOGW("Zip: lfh name did not match central directory.");
823 return kInconsistentInformation;
824 }
825
Narayan Kamath926973e2014-06-09 14:18:14 +0100826 const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader)
827 + lfh->file_name_length + lfh->extra_field_length;
Narayan Kamath48953a12014-01-24 12:32:39 +0000828 if (data_offset > cd_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800829 ALOGW("Zip: bad data offset %" PRId64 " in zip", static_cast<int64_t>(data_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000830 return kInvalidOffset;
831 }
832
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800833 if (static_cast<off64_t>(data_offset + data->compressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700834 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800835 static_cast<int64_t>(data_offset), data->compressed_length, static_cast<int64_t>(cd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000836 return kInvalidOffset;
837 }
838
839 if (data->method == kCompressStored &&
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800840 static_cast<off64_t>(data_offset + data->uncompressed_length) > cd_offset) {
Mark Salyzyn088bf902014-05-08 16:02:20 -0700841 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800842 static_cast<int64_t>(data_offset), data->uncompressed_length,
843 static_cast<int64_t>(cd_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +0000844 return kInvalidOffset;
845 }
846
847 data->offset = data_offset;
848 return 0;
849}
850
851struct IterationHandle {
852 uint32_t position;
Piotr Jastrzebski10aa9a02014-08-19 09:01:20 +0100853 // We're not using vector here because this code is used in the Windows SDK
854 // where the STL is not available.
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100855 const uint8_t* prefix;
856 uint16_t prefix_len;
Narayan Kamath7462f022013-11-21 13:05:04 +0000857 ZipArchive* archive;
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100858
859 IterationHandle() : prefix(NULL), prefix_len(0) {}
860
861 IterationHandle(const ZipEntryName& prefix_name)
862 : prefix_len(prefix_name.name_length) {
863 uint8_t* prefix_copy = new uint8_t[prefix_len];
Piotr Jastrzebski10aa9a02014-08-19 09:01:20 +0100864 memcpy(prefix_copy, prefix_name.name, prefix_len);
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100865 prefix = prefix_copy;
866 }
867
868 ~IterationHandle() {
Piotr Jastrzebski10aa9a02014-08-19 09:01:20 +0100869 delete[] prefix;
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100870 }
Narayan Kamath7462f022013-11-21 13:05:04 +0000871};
872
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100873int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr,
874 const ZipEntryName* optional_prefix) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800875 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +0000876
877 if (archive == NULL || archive->hash_table == NULL) {
878 ALOGW("Zip: Invalid ZipArchiveHandle");
879 return kInvalidHandle;
880 }
881
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100882 IterationHandle* cookie =
883 optional_prefix != NULL ? new IterationHandle(*optional_prefix) : new IterationHandle();
Narayan Kamath7462f022013-11-21 13:05:04 +0000884 cookie->position = 0;
Narayan Kamath7462f022013-11-21 13:05:04 +0000885 cookie->archive = archive;
Narayan Kamath7462f022013-11-21 13:05:04 +0000886
887 *cookie_ptr = cookie ;
888 return 0;
889}
890
Piotr Jastrzebski79c8b342014-08-08 14:02:17 +0100891void EndIteration(void* cookie) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100892 delete reinterpret_cast<IterationHandle*>(cookie);
Piotr Jastrzebski79c8b342014-08-08 14:02:17 +0100893}
894
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100895int32_t FindEntry(const ZipArchiveHandle handle, const ZipEntryName& entryName,
Narayan Kamath7462f022013-11-21 13:05:04 +0000896 ZipEntry* data) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800897 const ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100898 if (entryName.name_length == 0) {
899 ALOGW("Zip: Invalid filename %.*s", entryName.name_length, entryName.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000900 return kInvalidEntryName;
901 }
902
903 const int64_t ent = EntryToIndex(archive->hash_table,
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100904 archive->hash_table_size, entryName);
Narayan Kamath7462f022013-11-21 13:05:04 +0000905
906 if (ent < 0) {
Piotr Jastrzebskiecccc5a2014-08-11 16:35:11 +0100907 ALOGV("Zip: Could not find entry %.*s", entryName.name_length, entryName.name);
Narayan Kamath7462f022013-11-21 13:05:04 +0000908 return ent;
909 }
910
911 return FindEntry(archive, ent, data);
912}
913
914int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -0800915 IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
Narayan Kamath7462f022013-11-21 13:05:04 +0000916 if (handle == NULL) {
917 return kInvalidHandle;
918 }
919
920 ZipArchive* archive = handle->archive;
921 if (archive == NULL || archive->hash_table == NULL) {
922 ALOGW("Zip: Invalid ZipArchiveHandle");
923 return kInvalidHandle;
924 }
925
926 const uint32_t currentOffset = handle->position;
927 const uint32_t hash_table_length = archive->hash_table_size;
928 const ZipEntryName *hash_table = archive->hash_table;
929
930 for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
931 if (hash_table[i].name != NULL &&
Piotr Jastrzebski8e085362014-08-18 11:37:45 +0100932 (handle->prefix_len == 0 ||
933 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
Narayan Kamath7462f022013-11-21 13:05:04 +0000934 handle->position = (i + 1);
935 const int error = FindEntry(archive, i, data);
936 if (!error) {
937 name->name = hash_table[i].name;
938 name->name_length = hash_table[i].name_length;
939 }
940
941 return error;
942 }
943 }
944
945 handle->position = 0;
946 return kIterationEnd;
947}
948
Narayan Kamathf899bd52015-04-17 11:53:14 +0100949class Writer {
950 public:
951 virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
952 virtual ~Writer() {}
953 protected:
954 Writer() = default;
955 private:
956 DISALLOW_COPY_AND_ASSIGN(Writer);
957};
958
959// A Writer that writes data to a fixed size memory region.
960// The size of the memory region must be equal to the total size of
961// the data appended to it.
962class MemoryWriter : public Writer {
963 public:
964 MemoryWriter(uint8_t* buf, size_t size) : Writer(),
965 buf_(buf), size_(size), bytes_written_(0) {
966 }
967
968 virtual bool Append(uint8_t* buf, size_t buf_size) override {
969 if (bytes_written_ + buf_size > size_) {
970 ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
971 size_, bytes_written_ + buf_size);
972 return false;
973 }
974
975 memcpy(buf_ + bytes_written_, buf, buf_size);
976 bytes_written_ += buf_size;
977 return true;
978 }
979
980 private:
981 uint8_t* const buf_;
982 const size_t size_;
983 size_t bytes_written_;
984};
985
986// A Writer that appends data to a file |fd| at its current position.
987// The file will be truncated to the end of the written data.
988class FileWriter : public Writer {
989 public:
990
991 // Creates a FileWriter for |fd| and prepare to write |entry| to it,
992 // guaranteeing that the file descriptor is valid and that there's enough
993 // space on the volume to write out the entry completely and that the file
994 // is truncated to the correct length.
995 //
996 // Returns a valid FileWriter on success, |nullptr| if an error occurred.
997 static std::unique_ptr<FileWriter> Create(int fd, const ZipEntry* entry) {
998 const uint32_t declared_length = entry->uncompressed_length;
999 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1000 if (current_offset == -1) {
1001 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno));
1002 return nullptr;
1003 }
1004
1005 int result = 0;
1006#if defined(__linux__)
1007 if (declared_length > 0) {
1008 // Make sure we have enough space on the volume to extract the compressed
1009 // entry. Note that the call to ftruncate below will change the file size but
1010 // will not allocate space on disk and this call to fallocate will not
1011 // change the file size.
1012 result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length));
1013 if (result == -1) {
1014 ALOGW("Zip: unable to allocate space for file to %" PRId64 ": %s",
1015 static_cast<int64_t>(declared_length + current_offset), strerror(errno));
1016 return std::unique_ptr<FileWriter>(nullptr);
1017 }
1018 }
1019#endif // __linux__
1020
1021 result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1022 if (result == -1) {
1023 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1024 static_cast<int64_t>(declared_length + current_offset), strerror(errno));
1025 return std::unique_ptr<FileWriter>(nullptr);
1026 }
1027
1028 return std::unique_ptr<FileWriter>(new FileWriter(fd, declared_length));
1029 }
1030
1031 virtual bool Append(uint8_t* buf, size_t buf_size) override {
1032 if (total_bytes_written_ + buf_size > declared_length_) {
1033 ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
1034 declared_length_, total_bytes_written_ + buf_size);
1035 return false;
1036 }
1037
1038 // Keep track of the start position so we can calculate the
1039 // total number of bytes written.
1040 const uint8_t* const start = buf;
1041 size_t bytes_written = 0;
1042 while (buf_size > 0) {
1043 ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, buf, buf_size));
1044 if (bytes_written == -1) {
1045 ALOGW("Zip: unable to write " ZD " bytes to file; %s", buf_size, strerror(errno));
1046 return false;
1047 }
1048
1049 buf_size -= bytes_written;
1050 buf += bytes_written;
1051 }
1052
1053 total_bytes_written_ += static_cast<size_t>(
1054 reinterpret_cast<uintptr_t>(buf) - reinterpret_cast<uintptr_t>(start));
1055
1056 return true;
1057 }
1058 private:
1059 FileWriter(const int fd, const size_t declared_length) :
1060 Writer(),
1061 fd_(fd),
1062 declared_length_(declared_length),
1063 total_bytes_written_(0) {
1064 }
1065
1066 const int fd_;
1067 const size_t declared_length_;
1068 size_t total_bytes_written_;
1069};
1070
Dmitriy Ivanovf94e1592015-03-06 13:27:59 -08001071// This method is using libz macros with old-style-casts
1072#pragma GCC diagnostic push
1073#pragma GCC diagnostic ignored "-Wold-style-cast"
1074static inline int zlib_inflateInit2(z_stream* stream, int window_bits) {
1075 return inflateInit2(stream, window_bits);
1076}
1077#pragma GCC diagnostic pop
1078
Narayan Kamathf899bd52015-04-17 11:53:14 +01001079static int32_t InflateEntryToWriter(int fd, const ZipEntry* entry,
1080 Writer* writer, uint64_t* crc_out) {
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001081 const size_t kBufSize = 32768;
1082 std::vector<uint8_t> read_buf(kBufSize);
1083 std::vector<uint8_t> write_buf(kBufSize);
Narayan Kamath7462f022013-11-21 13:05:04 +00001084 z_stream zstream;
1085 int zerr;
1086
1087 /*
1088 * Initialize the zlib stream struct.
1089 */
1090 memset(&zstream, 0, sizeof(zstream));
1091 zstream.zalloc = Z_NULL;
1092 zstream.zfree = Z_NULL;
1093 zstream.opaque = Z_NULL;
1094 zstream.next_in = NULL;
1095 zstream.avail_in = 0;
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001096 zstream.next_out = &write_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001097 zstream.avail_out = kBufSize;
1098 zstream.data_type = Z_UNKNOWN;
1099
1100 /*
1101 * Use the undocumented "negative window bits" feature to tell zlib
1102 * that there's no zlib header waiting for it.
1103 */
Dmitriy Ivanovf94e1592015-03-06 13:27:59 -08001104 zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
Narayan Kamath7462f022013-11-21 13:05:04 +00001105 if (zerr != Z_OK) {
1106 if (zerr == Z_VERSION_ERROR) {
1107 ALOGE("Installed zlib is not compatible with linked version (%s)",
1108 ZLIB_VERSION);
1109 } else {
1110 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
1111 }
1112
1113 return kZlibError;
1114 }
1115
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001116 auto zstream_deleter = [](z_stream* stream) {
1117 inflateEnd(stream); /* free up any allocated structures */
1118 };
1119
1120 std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter);
1121
Narayan Kamath7462f022013-11-21 13:05:04 +00001122 const uint32_t uncompressed_length = entry->uncompressed_length;
1123
1124 uint32_t compressed_length = entry->compressed_length;
1125 uint32_t write_count = 0;
1126 do {
1127 /* read as much as we can */
1128 if (zstream.avail_in == 0) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -07001129 const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001130 const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, &read_buf[0], getSize));
Narayan Kamath7462f022013-11-21 13:05:04 +00001131 if (actual != getSize) {
Mark Salyzyn51d562d2014-05-05 14:38:05 -07001132 ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001133 return kIoError;
Narayan Kamath7462f022013-11-21 13:05:04 +00001134 }
1135
1136 compressed_length -= getSize;
1137
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001138 zstream.next_in = &read_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001139 zstream.avail_in = getSize;
1140 }
1141
1142 /* uncompress the data */
1143 zerr = inflate(&zstream, Z_NO_FLUSH);
1144 if (zerr != Z_OK && zerr != Z_STREAM_END) {
1145 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
1146 zerr, zstream.next_in, zstream.avail_in,
1147 zstream.next_out, zstream.avail_out);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001148 return kZlibError;
Narayan Kamath7462f022013-11-21 13:05:04 +00001149 }
1150
1151 /* write when we're full or when we're done */
1152 if (zstream.avail_out == 0 ||
1153 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001154 const size_t write_size = zstream.next_out - &write_buf[0];
Narayan Kamathf899bd52015-04-17 11:53:14 +01001155 if (!writer->Append(&write_buf[0], write_size)) {
1156 // The file might have declared a bogus length.
1157 return kInconsistentInformation;
Narayan Kamath7462f022013-11-21 13:05:04 +00001158 }
Narayan Kamath7462f022013-11-21 13:05:04 +00001159
Dmitriy Ivanovedbabfe2015-03-12 09:58:15 -07001160 zstream.next_out = &write_buf[0];
Narayan Kamath7462f022013-11-21 13:05:04 +00001161 zstream.avail_out = kBufSize;
1162 }
1163 } while (zerr == Z_OK);
1164
1165 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
1166
1167 // stream.adler holds the crc32 value for such streams.
1168 *crc_out = zstream.adler;
1169
1170 if (zstream.total_out != uncompressed_length || compressed_length != 0) {
Mark Salyzyn088bf902014-05-08 16:02:20 -07001171 ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
Narayan Kamath7462f022013-11-21 13:05:04 +00001172 zstream.total_out, uncompressed_length);
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001173 return kInconsistentInformation;
Narayan Kamath7462f022013-11-21 13:05:04 +00001174 }
1175
Dmitriy Ivanov1f741e52015-03-06 14:26:37 -08001176 return 0;
Narayan Kamath7462f022013-11-21 13:05:04 +00001177}
1178
Narayan Kamathf899bd52015-04-17 11:53:14 +01001179static int32_t CopyEntryToWriter(int fd, const ZipEntry* entry, Writer* writer,
1180 uint64_t *crc_out) {
1181 static const uint32_t kBufSize = 32768;
1182 std::vector<uint8_t> buf(kBufSize);
1183
1184 const uint32_t length = entry->uncompressed_length;
1185 uint32_t count = 0;
1186 uint64_t crc = 0;
1187 while (count < length) {
1188 uint32_t remaining = length - count;
1189
1190 // Safe conversion because kBufSize is narrow enough for a 32 bit signed
1191 // value.
1192 const ssize_t block_size = (remaining > kBufSize) ? kBufSize : remaining;
1193 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, &buf[0], block_size));
1194
1195 if (actual != block_size) {
1196 ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, block_size);
1197 return kIoError;
1198 }
1199
1200 if (!writer->Append(&buf[0], block_size)) {
1201 return kIoError;
1202 }
1203 crc = crc32(crc, &buf[0], block_size);
1204 count += block_size;
1205 }
1206
1207 *crc_out = crc;
1208
1209 return 0;
1210}
1211
1212int32_t ExtractToWriter(ZipArchiveHandle handle,
1213 ZipEntry* entry, Writer* writer) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001214 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
Narayan Kamath7462f022013-11-21 13:05:04 +00001215 const uint16_t method = entry->method;
1216 off64_t data_offset = entry->offset;
1217
1218 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001219 ALOGW("Zip: lseek to data at %" PRId64 " failed", static_cast<int64_t>(data_offset));
Narayan Kamath7462f022013-11-21 13:05:04 +00001220 return kIoError;
1221 }
1222
1223 // this should default to kUnknownCompressionMethod.
1224 int32_t return_value = -1;
1225 uint64_t crc = 0;
1226 if (method == kCompressStored) {
Narayan Kamathf899bd52015-04-17 11:53:14 +01001227 return_value = CopyEntryToWriter(archive->fd, entry, writer, &crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001228 } else if (method == kCompressDeflated) {
Narayan Kamathf899bd52015-04-17 11:53:14 +01001229 return_value = InflateEntryToWriter(archive->fd, entry, writer, &crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001230 }
1231
1232 if (!return_value && entry->has_data_descriptor) {
1233 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1234 if (return_value) {
1235 return return_value;
1236 }
1237 }
1238
1239 // TODO: Fix this check by passing the right flags to inflate2 so that
1240 // it calculates the CRC for us.
1241 if (entry->crc32 != crc && false) {
Mark Salyzyn088bf902014-05-08 16:02:20 -07001242 ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
Narayan Kamath7462f022013-11-21 13:05:04 +00001243 return kInconsistentInformation;
1244 }
1245
1246 return return_value;
1247}
1248
Narayan Kamathf899bd52015-04-17 11:53:14 +01001249int32_t ExtractToMemory(ZipArchiveHandle handle, ZipEntry* entry,
1250 uint8_t* begin, uint32_t size) {
1251 std::unique_ptr<Writer> writer(new MemoryWriter(begin, size));
1252 return ExtractToWriter(handle, entry, writer.get());
1253}
1254
Narayan Kamath7462f022013-11-21 13:05:04 +00001255int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1256 ZipEntry* entry, int fd) {
Narayan Kamathf899bd52015-04-17 11:53:14 +01001257 std::unique_ptr<Writer> writer(FileWriter::Create(fd, entry));
1258 if (writer.get() == nullptr) {
Narayan Kamath7462f022013-11-21 13:05:04 +00001259 return kIoError;
1260 }
1261
Narayan Kamathf899bd52015-04-17 11:53:14 +01001262 return ExtractToWriter(handle, entry, writer.get());
Narayan Kamath7462f022013-11-21 13:05:04 +00001263}
1264
1265const char* ErrorCodeString(int32_t error_code) {
1266 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1267 return kErrorMessages[error_code * -1];
1268 }
1269
1270 return kErrorMessages[0];
1271}
1272
1273int GetFileDescriptor(const ZipArchiveHandle handle) {
Dmitriy Ivanovf4cb8e22015-03-06 10:50:56 -08001274 return reinterpret_cast<ZipArchive*>(handle)->fd;
Narayan Kamath7462f022013-11-21 13:05:04 +00001275}
1276