blob: 547e36a098648179085f8533035e6e4e4711242e [file] [log] [blame]
Mathias Agopian1f5762e2013-05-06 20:20:34 -07001/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 *
20 * This is similar to the more-complete ZipFile class, but no attempt
21 * has been made to make them interchangeable. This class operates under
22 * a very different set of assumptions and constraints.
23 *
24 * One such assumption is that if you're getting file descriptors for
25 * use with this class as a child of a fork() operation, you must be on
26 * a pread() to guarantee correct operation. This is because pread() can
27 * atomically read at a file offset without worrying about a lock around an
28 * lseek() + read() pair.
29 */
30#ifndef __LIBS_ZIPFILERO_H
31#define __LIBS_ZIPFILERO_H
32
33#include <utils/Compat.h>
34#include <utils/Errors.h>
35#include <utils/FileMap.h>
36#include <utils/threads.h>
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <unistd.h>
41#include <time.h>
42
43namespace android {
44
45/*
46 * Trivial typedef to ensure that ZipEntryRO is not treated as a simple
47 * integer. We use NULL to indicate an invalid value.
48 */
49typedef void* ZipEntryRO;
50
51/*
52 * Open a Zip archive for reading.
53 *
54 * We want "open" and "find entry by name" to be fast operations, and we
55 * want to use as little memory as possible. We memory-map the file,
56 * and load a hash table with pointers to the filenames (which aren't
57 * null-terminated). The other fields are at a fixed offset from the
58 * filename, so we don't need to extract those (but we do need to byte-read
59 * and endian-swap them every time we want them).
60 *
61 * To speed comparisons when doing a lookup by name, we could make the mapping
62 * "private" (copy-on-write) and null-terminate the filenames after verifying
63 * the record structure. However, this requires a private mapping of
64 * every page that the Central Directory touches. Easier to tuck a copy
65 * of the string length into the hash table entry.
66 *
67 * NOTE: If this is used on file descriptors inherited from a fork() operation,
68 * you must be on a platform that implements pread() to guarantee correctness
69 * on the shared file descriptors.
70 */
71class ZipFileRO {
72public:
73 ZipFileRO()
74 : mFd(-1), mFileName(NULL), mFileLength(-1),
75 mDirectoryMap(NULL),
76 mNumEntries(-1), mDirectoryOffset(-1),
77 mHashTableSize(-1), mHashTable(NULL)
78 {}
79
80 ~ZipFileRO();
81
82 /*
83 * Open an archive.
84 */
85 status_t open(const char* zipFileName);
86
87 /*
88 * Find an entry, by name. Returns the entry identifier, or NULL if
89 * not found.
90 *
91 * If two entries have the same name, one will be chosen at semi-random.
92 */
93 ZipEntryRO findEntryByName(const char* fileName) const;
94
95 /*
96 * Return the #of entries in the Zip archive.
97 */
98 int getNumEntries(void) const {
99 return mNumEntries;
100 }
101
102 /*
103 * Return the Nth entry. Zip file entries are not stored in sorted
104 * order, and updated entries may appear at the end, so anyone walking
105 * the archive needs to avoid making ordering assumptions. We take
106 * that further by returning the Nth non-empty entry in the hash table
107 * rather than the Nth entry in the archive.
108 *
109 * Valid values are [0..numEntries).
110 *
111 * [This is currently O(n). If it needs to be fast we can allocate an
112 * additional data structure or provide an iterator interface.]
113 */
114 ZipEntryRO findEntryByIndex(int idx) const;
115
116 /*
117 * Copy the filename into the supplied buffer. Returns 0 on success,
118 * -1 if "entry" is invalid, or the filename length if it didn't fit. The
119 * length, and the returned string, include the null-termination.
120 */
121 int getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) const;
122
123 /*
124 * Get the vital stats for an entry. Pass in NULL pointers for anything
125 * you don't need.
126 *
127 * "*pOffset" holds the Zip file offset of the entry's data.
128 *
129 * Returns "false" if "entry" is bogus or if the data in the Zip file
130 * appears to be bad.
131 */
132 bool getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
133 size_t* pCompLen, off64_t* pOffset, long* pModWhen, long* pCrc32) const;
134
135 /*
136 * Create a new FileMap object that maps a subset of the archive. For
137 * an uncompressed entry this effectively provides a pointer to the
138 * actual data, for a compressed entry this provides the input buffer
139 * for inflate().
140 */
141 FileMap* createEntryFileMap(ZipEntryRO entry) const;
142
143 /*
144 * Uncompress the data into a buffer. Depending on the compression
145 * format, this is either an "inflate" operation or a memcpy.
146 *
147 * Use "uncompLen" from getEntryInfo() to determine the required
148 * buffer size.
149 *
150 * Returns "true" on success.
151 */
152 bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
153
154 /*
155 * Uncompress the data to an open file descriptor.
156 */
157 bool uncompressEntry(ZipEntryRO entry, int fd) const;
158
159 /* Zip compression methods we support */
160 enum {
161 kCompressStored = 0, // no compression
162 kCompressDeflated = 8, // standard deflate
163 };
164
165 /*
166 * Utility function: uncompress deflated data, buffer to buffer.
167 */
168 static bool inflateBuffer(void* outBuf, const void* inBuf,
169 size_t uncompLen, size_t compLen);
170
171 /*
172 * Utility function: uncompress deflated data, buffer to fd.
173 */
174 static bool inflateBuffer(int fd, const void* inBuf,
175 size_t uncompLen, size_t compLen);
176
177 /*
178 * Utility function to convert ZIP's time format to a timespec struct.
179 */
180 static inline void zipTimeToTimespec(long when, struct tm* timespec) {
181 const long date = when >> 16;
182 timespec->tm_year = ((date >> 9) & 0x7F) + 80; // Zip is years since 1980
183 timespec->tm_mon = (date >> 5) & 0x0F;
184 timespec->tm_mday = date & 0x1F;
185
186 timespec->tm_hour = (when >> 11) & 0x1F;
187 timespec->tm_min = (when >> 5) & 0x3F;
188 timespec->tm_sec = (when & 0x1F) << 1;
189 }
190
191 /*
192 * Some basic functions for raw data manipulation. "LE" means
193 * Little Endian.
194 */
195 static inline unsigned short get2LE(const unsigned char* buf) {
196 return buf[0] | (buf[1] << 8);
197 }
198 static inline unsigned long get4LE(const unsigned char* buf) {
199 return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
200 }
201
202private:
203 /* these are private and not defined */
204 ZipFileRO(const ZipFileRO& src);
205 ZipFileRO& operator=(const ZipFileRO& src);
206
207 /* locate and parse the central directory */
208 bool mapCentralDirectory(void);
209
210 /* parse the archive, prepping internal structures */
211 bool parseZipArchive(void);
212
213 /* add a new entry to the hash table */
214 void addToHash(const char* str, int strLen, unsigned int hash);
215
216 /* compute string hash code */
217 static unsigned int computeHash(const char* str, int len);
218
219 /* convert a ZipEntryRO back to a hash table index */
220 int entryToIndex(const ZipEntryRO entry) const;
221
222 /*
223 * One entry in the hash table.
224 */
225 typedef struct HashEntry {
226 const char* name;
227 unsigned short nameLen;
228 //unsigned int hash;
229 } HashEntry;
230
231 /* open Zip archive */
232 int mFd;
233
234 /* Lock for handling the file descriptor (seeks, etc) */
235 mutable Mutex mFdLock;
236
237 /* zip file name */
238 char* mFileName;
239
240 /* length of file */
241 size_t mFileLength;
242
243 /* mapped file */
244 FileMap* mDirectoryMap;
245
246 /* number of entries in the Zip archive */
247 int mNumEntries;
248
249 /* CD directory offset in the Zip archive */
250 off64_t mDirectoryOffset;
251
252 /*
253 * We know how many entries are in the Zip archive, so we have a
254 * fixed-size hash table. We probe for an empty slot.
255 */
256 int mHashTableSize;
257 HashEntry* mHashTable;
258};
259
260}; // namespace android
261
262#endif /*__LIBS_ZIPFILERO_H*/