blob: b234142dcc6b4c0a907af2fccc8475e4f16b92a8 [file] [log] [blame]
Stefan Bodewig4e2380b2009-02-18 14:51:10 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19package org.apache.commons.compress.archivers.zip;
20
21import java.nio.ByteBuffer;
Stefan Bodewig4e2380b2009-02-18 14:51:10 +000022import java.nio.charset.Charset;
Stefan Bodewig853176f2009-03-02 16:09:20 +000023import java.nio.charset.UnsupportedCharsetException;
24import java.util.HashMap;
25import java.util.Map;
Stefan Bodewig4e2380b2009-02-18 14:51:10 +000026
27/**
28 * Static helper functions for robustly encoding filenames in zip files.
29 */
30abstract class ZipEncodingHelper {
31
32 /**
Stefan Bodewig853176f2009-03-02 16:09:20 +000033 * A class, which holds the high characters of a simple encoding
Stefan Bodewig2ab601b2009-03-02 16:21:30 +000034 * and lazily instantiates a Simple8BitZipEncoding instance in a
Stefan Bodewig853176f2009-03-02 16:09:20 +000035 * thread-safe manner.
36 */
37 private static class SimpleEncodingHolder {
38
39 private final char [] highChars;
Stefan Bodewig2ab601b2009-03-02 16:21:30 +000040 private Simple8BitZipEncoding encoding;
Stefan Bodewig853176f2009-03-02 16:09:20 +000041
42 /**
43 * Instantiate a simple encoding holder.
44 *
45 * @param highChars The characters for byte codes 128 to 255.
46 *
Stefan Bodewig2ab601b2009-03-02 16:21:30 +000047 * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
Stefan Bodewig853176f2009-03-02 16:09:20 +000048 */
Sebastian Bazleyd6f36332009-03-28 00:07:58 +000049 SimpleEncodingHolder(char [] highChars) {
Stefan Bodewig853176f2009-03-02 16:09:20 +000050 this.highChars = highChars;
51 }
52
53 /**
Sebastian Bazley8ebac242009-03-04 00:18:09 +000054 * @return The associated {@link Simple8BitZipEncoding}, which
Stefan Bodewig853176f2009-03-02 16:09:20 +000055 * is instantiated if not done so far.
56 */
Stefan Bodewig2ab601b2009-03-02 16:21:30 +000057 public synchronized Simple8BitZipEncoding getEncoding() {
Stefan Bodewig853176f2009-03-02 16:09:20 +000058 if (this.encoding == null) {
Stefan Bodewig2ab601b2009-03-02 16:21:30 +000059 this.encoding = new Simple8BitZipEncoding(this.highChars);
Stefan Bodewig853176f2009-03-02 16:09:20 +000060 }
61 return this.encoding;
62 }
63 }
64
65 private static final Map simpleEncodings;
66
67 static {
68 simpleEncodings = new HashMap();
69
70 char[] cp437_high_chars =
71 new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
72 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
73 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
74 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
75 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
76 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
77 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
78 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
79 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
80 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
81 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
82 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
83 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
84 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
85 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
86 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
87 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
88 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
89 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
90 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
91 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
92 0x25a0, 0x00a0 };
93
94 SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
95
96 simpleEncodings.put("CP437",cp437);
97 simpleEncodings.put("Cp437",cp437);
98 simpleEncodings.put("cp437",cp437);
99 simpleEncodings.put("IBM437",cp437);
100 simpleEncodings.put("ibm437",cp437);
101
102 char[] cp850_high_chars =
103 new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
104 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
105 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
106 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
107 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
108 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
109 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
110 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
111 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
112 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
113 0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
114 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
115 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
116 0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
117 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
118 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
119 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
120 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
121 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
122 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
123 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
124 0x25a0, 0x00a0 };
125
126 SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
127
128 simpleEncodings.put("CP850",cp850);
129 simpleEncodings.put("Cp850",cp850);
130 simpleEncodings.put("cp850",cp850);
131 simpleEncodings.put("IBM850",cp850);
132 simpleEncodings.put("ibm850",cp850);
133 }
134
135 /**
Stefan Bodewig4e2380b2009-02-18 14:51:10 +0000136 * Grow a byte buffer, so it has a minimal capacity or at least
137 * the double capacity of the original buffer
138 *
139 * @param b The original buffer.
140 * @param newCapacity The minimal requested new capacity.
141 * @return A byte buffer <code>r</code> with
142 * <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
143 * all the data contained in <code>b</code> copied to the beginning
144 * of <code>r</code>.
145 *
146 */
147 static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) {
148 b.limit(b.position());
149 b.rewind();
150
151 int c2 = b.capacity() * 2;
152 ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);
153
154 on.put(b);
155 return on;
156 }
157
Stefan Bodewig853176f2009-03-02 16:09:20 +0000158
Stefan Bodewig4e2380b2009-02-18 14:51:10 +0000159 /**
160 * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
161 * ASCII bytes.
162 */
163 private static final byte[] HEX_DIGITS =
164 new byte [] {
165 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41,
166 0x42, 0x43, 0x44, 0x45, 0x46
167 };
168
169 /**
Stefan Bodewig853176f2009-03-02 16:09:20 +0000170 * Append <code>%Uxxxx</code> to the given byte buffer.
171 * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
Stefan Bodewig4e2380b2009-02-18 14:51:10 +0000172 *
Stefan Bodewig853176f2009-03-02 16:09:20 +0000173 * @param bb The byte buffer to write to.
174 * @param c The character to write.
Stefan Bodewig4e2380b2009-02-18 14:51:10 +0000175 */
Stefan Bodewig853176f2009-03-02 16:09:20 +0000176 static void appendSurrogate(ByteBuffer bb, char c) {
177
178 bb.put((byte) '%');
179 bb.put((byte) 'U');
180
181 bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
182 bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
183 bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
184 bb.put(HEX_DIGITS[c & 0x0f]);
185 }
186
187
188 /**
189 * name of the encoding UTF-8
190 */
191 static final String UTF8 = "UTF8";
192
193 /**
Stefan Bodewigae0f9842010-02-12 15:22:02 +0000194 * variant name of the encoding UTF-8 used for comparisions.
195 */
Stefan Bodewigbed564b2010-02-18 12:34:02 +0000196 private static final String UTF_DASH_8 = "utf-8";
Stefan Bodewigae0f9842010-02-12 15:22:02 +0000197
198 /**
Stefan Bodewig853176f2009-03-02 16:09:20 +0000199 * name of the encoding UTF-8
200 */
201 static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
202
203 /**
204 * Instantiates a zip encoding.
205 *
206 * @param name The name of the zip encoding. Specify <code>null</code> for
207 * the platform's default encoding.
208 * @return A zip encoding for the given encoding name.
209 */
210 static ZipEncoding getZipEncoding(String name) {
211
212 // fallback encoding is good enough for utf-8.
213 if (isUTF8(name)) {
214 return UTF8_ZIP_ENCODING;
215 }
216
217 if (name == null) {
218 return new FallbackZipEncoding();
219 }
220
221 SimpleEncodingHolder h =
222 (SimpleEncodingHolder) simpleEncodings.get(name);
223
224 if (h!=null) {
225 return h.getEncoding();
226 }
227
228 try {
229
230 Charset cs = Charset.forName(name);
231 return new NioZipEncoding(cs);
232
233 } catch (UnsupportedCharsetException e) {
234 return new FallbackZipEncoding(name);
235 }
236 }
237
238 /**
239 * Whether a given encoding - or the platform's default encoding
240 * if the parameter is null - is UTF-8.
241 */
242 static boolean isUTF8(String encoding) {
Stefan Bodewig2d448642009-02-25 16:09:12 +0000243 if (encoding == null) {
Stefan Bodewig853176f2009-03-02 16:09:20 +0000244 // check platform's default encoding
245 encoding = System.getProperty("file.encoding");
Stefan Bodewig2d448642009-02-25 16:09:12 +0000246 }
Stefan Bodewigbed564b2010-02-18 12:34:02 +0000247 return UTF8.equalsIgnoreCase(encoding)
248 || UTF_DASH_8.equalsIgnoreCase(encoding);
Stefan Bodewiga96f1f22009-02-23 09:01:59 +0000249 }
Stefan Bodewig4e2380b2009-02-18 14:51:10 +0000250}