Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | * contributor license agreements. See the NOTICE file distributed with |
| 4 | * this work for additional information regarding copyright ownership. |
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | * (the "License"); you may not use this file except in compliance with |
| 7 | * the License. You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | * |
| 17 | */ |
| 18 | |
| 19 | package org.apache.commons.compress.archivers.zip; |
| 20 | |
| 21 | import java.nio.ByteBuffer; |
Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 22 | import java.nio.charset.Charset; |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 23 | import java.nio.charset.UnsupportedCharsetException; |
| 24 | import java.util.HashMap; |
| 25 | import java.util.Map; |
Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 26 | |
| 27 | /** |
| 28 | * Static helper functions for robustly encoding filenames in zip files. |
| 29 | */ |
| 30 | abstract class ZipEncodingHelper { |
| 31 | |
| 32 | /** |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 33 | * A class, which holds the high characters of a simple encoding |
Stefan Bodewig | 2ab601b | 2009-03-02 16:21:30 +0000 | [diff] [blame] | 34 | * and lazily instantiates a Simple8BitZipEncoding instance in a |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 35 | * thread-safe manner. |
| 36 | */ |
| 37 | private static class SimpleEncodingHolder { |
| 38 | |
| 39 | private final char [] highChars; |
Stefan Bodewig | 2ab601b | 2009-03-02 16:21:30 +0000 | [diff] [blame] | 40 | private Simple8BitZipEncoding encoding; |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 41 | |
| 42 | /** |
| 43 | * Instantiate a simple encoding holder. |
| 44 | * |
| 45 | * @param highChars The characters for byte codes 128 to 255. |
| 46 | * |
Stefan Bodewig | 2ab601b | 2009-03-02 16:21:30 +0000 | [diff] [blame] | 47 | * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[]) |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 48 | */ |
Sebastian Bazley | d6f3633 | 2009-03-28 00:07:58 +0000 | [diff] [blame] | 49 | SimpleEncodingHolder(char [] highChars) { |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 50 | this.highChars = highChars; |
| 51 | } |
| 52 | |
| 53 | /** |
Sebastian Bazley | 8ebac24 | 2009-03-04 00:18:09 +0000 | [diff] [blame] | 54 | * @return The associated {@link Simple8BitZipEncoding}, which |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 55 | * is instantiated if not done so far. |
| 56 | */ |
Stefan Bodewig | 2ab601b | 2009-03-02 16:21:30 +0000 | [diff] [blame] | 57 | public synchronized Simple8BitZipEncoding getEncoding() { |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 58 | if (this.encoding == null) { |
Stefan Bodewig | 2ab601b | 2009-03-02 16:21:30 +0000 | [diff] [blame] | 59 | this.encoding = new Simple8BitZipEncoding(this.highChars); |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 60 | } |
| 61 | return this.encoding; |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | private static final Map simpleEncodings; |
| 66 | |
| 67 | static { |
| 68 | simpleEncodings = new HashMap(); |
| 69 | |
| 70 | char[] cp437_high_chars = |
| 71 | new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, |
| 72 | 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, |
| 73 | 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, |
| 74 | 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, |
| 75 | 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, |
| 76 | 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, |
| 77 | 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, |
| 78 | 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, |
| 79 | 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, |
| 80 | 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, |
| 81 | 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534, |
| 82 | 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, |
| 83 | 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, |
| 84 | 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, |
| 85 | 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, |
| 86 | 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, |
| 87 | 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, |
| 88 | 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, |
| 89 | 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1, |
| 90 | 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, |
| 91 | 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, |
| 92 | 0x25a0, 0x00a0 }; |
| 93 | |
| 94 | SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars); |
| 95 | |
| 96 | simpleEncodings.put("CP437",cp437); |
| 97 | simpleEncodings.put("Cp437",cp437); |
| 98 | simpleEncodings.put("cp437",cp437); |
| 99 | simpleEncodings.put("IBM437",cp437); |
| 100 | simpleEncodings.put("ibm437",cp437); |
| 101 | |
| 102 | char[] cp850_high_chars = |
| 103 | new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, |
| 104 | 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, |
| 105 | 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6, |
| 106 | 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, |
| 107 | 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, |
| 108 | 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa, |
| 109 | 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae, |
| 110 | 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, |
| 111 | 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, |
| 112 | 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557, |
| 113 | 0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534, |
| 114 | 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3, |
| 115 | 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, |
| 116 | 0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb, |
| 117 | 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518, |
| 118 | 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580, |
| 119 | 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, |
| 120 | 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9, |
| 121 | 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1, |
| 122 | 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8, |
| 123 | 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, |
| 124 | 0x25a0, 0x00a0 }; |
| 125 | |
| 126 | SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars); |
| 127 | |
| 128 | simpleEncodings.put("CP850",cp850); |
| 129 | simpleEncodings.put("Cp850",cp850); |
| 130 | simpleEncodings.put("cp850",cp850); |
| 131 | simpleEncodings.put("IBM850",cp850); |
| 132 | simpleEncodings.put("ibm850",cp850); |
| 133 | } |
| 134 | |
| 135 | /** |
Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 136 | * Grow a byte buffer, so it has a minimal capacity or at least |
| 137 | * the double capacity of the original buffer |
| 138 | * |
| 139 | * @param b The original buffer. |
| 140 | * @param newCapacity The minimal requested new capacity. |
| 141 | * @return A byte buffer <code>r</code> with |
| 142 | * <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and |
| 143 | * all the data contained in <code>b</code> copied to the beginning |
| 144 | * of <code>r</code>. |
| 145 | * |
| 146 | */ |
| 147 | static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) { |
| 148 | b.limit(b.position()); |
| 149 | b.rewind(); |
| 150 | |
| 151 | int c2 = b.capacity() * 2; |
| 152 | ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2); |
| 153 | |
| 154 | on.put(b); |
| 155 | return on; |
| 156 | } |
| 157 | |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 158 | |
Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 159 | /** |
| 160 | * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as |
| 161 | * ASCII bytes. |
| 162 | */ |
| 163 | private static final byte[] HEX_DIGITS = |
| 164 | new byte [] { |
| 165 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, |
| 166 | 0x42, 0x43, 0x44, 0x45, 0x46 |
| 167 | }; |
| 168 | |
| 169 | /** |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 170 | * Append <code>%Uxxxx</code> to the given byte buffer. |
| 171 | * The caller must assure, that <code>bb.remaining()>=6</code>. |
Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 172 | * |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 173 | * @param bb The byte buffer to write to. |
| 174 | * @param c The character to write. |
Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 175 | */ |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 176 | static void appendSurrogate(ByteBuffer bb, char c) { |
| 177 | |
| 178 | bb.put((byte) '%'); |
| 179 | bb.put((byte) 'U'); |
| 180 | |
| 181 | bb.put(HEX_DIGITS[(c >> 12)&0x0f]); |
| 182 | bb.put(HEX_DIGITS[(c >> 8)&0x0f]); |
| 183 | bb.put(HEX_DIGITS[(c >> 4)&0x0f]); |
| 184 | bb.put(HEX_DIGITS[c & 0x0f]); |
| 185 | } |
| 186 | |
| 187 | |
| 188 | /** |
| 189 | * name of the encoding UTF-8 |
| 190 | */ |
| 191 | static final String UTF8 = "UTF8"; |
| 192 | |
| 193 | /** |
Stefan Bodewig | ae0f984 | 2010-02-12 15:22:02 +0000 | [diff] [blame] | 194 | * variant name of the encoding UTF-8 used for comparisions. |
| 195 | */ |
Stefan Bodewig | bed564b | 2010-02-18 12:34:02 +0000 | [diff] [blame^] | 196 | private static final String UTF_DASH_8 = "utf-8"; |
Stefan Bodewig | ae0f984 | 2010-02-12 15:22:02 +0000 | [diff] [blame] | 197 | |
| 198 | /** |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 199 | * name of the encoding UTF-8 |
| 200 | */ |
| 201 | static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8); |
| 202 | |
| 203 | /** |
| 204 | * Instantiates a zip encoding. |
| 205 | * |
| 206 | * @param name The name of the zip encoding. Specify <code>null</code> for |
| 207 | * the platform's default encoding. |
| 208 | * @return A zip encoding for the given encoding name. |
| 209 | */ |
| 210 | static ZipEncoding getZipEncoding(String name) { |
| 211 | |
| 212 | // fallback encoding is good enough for utf-8. |
| 213 | if (isUTF8(name)) { |
| 214 | return UTF8_ZIP_ENCODING; |
| 215 | } |
| 216 | |
| 217 | if (name == null) { |
| 218 | return new FallbackZipEncoding(); |
| 219 | } |
| 220 | |
| 221 | SimpleEncodingHolder h = |
| 222 | (SimpleEncodingHolder) simpleEncodings.get(name); |
| 223 | |
| 224 | if (h!=null) { |
| 225 | return h.getEncoding(); |
| 226 | } |
| 227 | |
| 228 | try { |
| 229 | |
| 230 | Charset cs = Charset.forName(name); |
| 231 | return new NioZipEncoding(cs); |
| 232 | |
| 233 | } catch (UnsupportedCharsetException e) { |
| 234 | return new FallbackZipEncoding(name); |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | /** |
| 239 | * Whether a given encoding - or the platform's default encoding |
| 240 | * if the parameter is null - is UTF-8. |
| 241 | */ |
| 242 | static boolean isUTF8(String encoding) { |
Stefan Bodewig | 2d44864 | 2009-02-25 16:09:12 +0000 | [diff] [blame] | 243 | if (encoding == null) { |
Stefan Bodewig | 853176f | 2009-03-02 16:09:20 +0000 | [diff] [blame] | 244 | // check platform's default encoding |
| 245 | encoding = System.getProperty("file.encoding"); |
Stefan Bodewig | 2d44864 | 2009-02-25 16:09:12 +0000 | [diff] [blame] | 246 | } |
Stefan Bodewig | bed564b | 2010-02-18 12:34:02 +0000 | [diff] [blame^] | 247 | return UTF8.equalsIgnoreCase(encoding) |
| 248 | || UTF_DASH_8.equalsIgnoreCase(encoding); |
Stefan Bodewig | a96f1f2 | 2009-02-23 09:01:59 +0000 | [diff] [blame] | 249 | } |
Stefan Bodewig | 4e2380b | 2009-02-18 14:51:10 +0000 | [diff] [blame] | 250 | } |