Kelly O'Hair | 13c4ad8 | 2012-03-04 11:55:34 -0800 | [diff] [blame] | 1 | /* |
| 2 | * reserved comment block |
| 3 | * DO NOT REMOVE OR ALTER! |
| 4 | */ |
| 5 | /* |
| 6 | * Copyright 1999-2002,2004 The Apache Software Foundation. |
| 7 | * |
| 8 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 | * you may not use this file except in compliance with the License. |
| 10 | * You may obtain a copy of the License at |
| 11 | * |
| 12 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 13 | * |
| 14 | * Unless required by applicable law or agreed to in writing, software |
| 15 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 | * See the License for the specific language governing permissions and |
| 18 | * limitations under the License. |
| 19 | */ |
| 20 | |
| 21 | |
| 22 | package com.sun.org.apache.xml.internal.serialize; |
| 23 | |
| 24 | |
| 25 | import java.io.UnsupportedEncodingException; |
| 26 | import java.util.Hashtable; |
| 27 | import java.util.Locale; |
| 28 | |
| 29 | import com.sun.org.apache.xerces.internal.util.EncodingMap; |
| 30 | |
| 31 | |
| 32 | /** |
| 33 | * Provides information about encodings. Depends on the Java runtime |
| 34 | * to provides writers for the different encodings, but can be used |
| 35 | * to override encoding names and provide the last printable character |
| 36 | * for each encoding. |
| 37 | * |
| 38 | * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> |
| 39 | */ |
| 40 | public class Encodings |
| 41 | { |
| 42 | |
| 43 | |
| 44 | /** |
| 45 | * The last printable character for unknown encodings. |
| 46 | */ |
| 47 | static final int DEFAULT_LAST_PRINTABLE = 0x7F; |
| 48 | |
| 49 | // last printable character for Unicode-compatible encodings |
| 50 | static final int LAST_PRINTABLE_UNICODE = 0xffff; |
| 51 | // unicode-compliant encodings; can express plane 0 |
| 52 | static final String[] UNICODE_ENCODINGS = { |
| 53 | "Unicode", "UnicodeBig", "UnicodeLittle", "GB2312", "UTF8", "UTF-16", |
| 54 | }; |
| 55 | // default (Java) encoding if none supplied: |
| 56 | static final String DEFAULT_ENCODING = "UTF8"; |
| 57 | |
| 58 | // note that the size of this Hashtable |
| 59 | // is bounded by the number of encodings recognized by EncodingMap; |
| 60 | // therefore it poses no static mutability risk. |
| 61 | static Hashtable _encodings = new Hashtable(); |
| 62 | |
| 63 | /** |
| 64 | * @param encoding a MIME charset name, or null. |
| 65 | */ |
| 66 | static EncodingInfo getEncodingInfo(String encoding, boolean allowJavaNames) throws UnsupportedEncodingException { |
| 67 | EncodingInfo eInfo = null; |
| 68 | if (encoding == null) { |
| 69 | if((eInfo = (EncodingInfo)_encodings.get(DEFAULT_ENCODING)) != null) |
| 70 | return eInfo; |
| 71 | eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(DEFAULT_ENCODING), DEFAULT_ENCODING, LAST_PRINTABLE_UNICODE); |
| 72 | _encodings.put(DEFAULT_ENCODING, eInfo); |
| 73 | return eInfo; |
| 74 | } |
| 75 | // need to convert it to upper case: |
| 76 | encoding = encoding.toUpperCase(Locale.ENGLISH); |
| 77 | String jName = EncodingMap.getIANA2JavaMapping(encoding); |
| 78 | if(jName == null) { |
| 79 | // see if the encoding passed in is a Java encoding name. |
| 80 | if(allowJavaNames ) { |
| 81 | EncodingInfo.testJavaEncodingName(encoding); |
| 82 | if((eInfo = (EncodingInfo)_encodings.get(encoding)) != null) |
| 83 | return eInfo; |
| 84 | // is it known to be unicode-compliant? |
| 85 | int i=0; |
| 86 | for(; i<UNICODE_ENCODINGS.length; i++) { |
| 87 | if(UNICODE_ENCODINGS[i].equalsIgnoreCase(encoding)) { |
| 88 | eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, LAST_PRINTABLE_UNICODE); |
| 89 | break; |
| 90 | } |
| 91 | } |
| 92 | if(i == UNICODE_ENCODINGS.length) { |
| 93 | eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, DEFAULT_LAST_PRINTABLE); |
| 94 | } |
| 95 | _encodings.put(encoding, eInfo); |
| 96 | return eInfo; |
| 97 | } else { |
| 98 | throw new UnsupportedEncodingException(encoding); |
| 99 | } |
| 100 | } |
| 101 | if ((eInfo = (EncodingInfo)_encodings.get(jName)) != null) |
| 102 | return eInfo; |
| 103 | // have to create one... |
| 104 | // is it known to be unicode-compliant? |
| 105 | int i=0; |
| 106 | for(; i<UNICODE_ENCODINGS.length; i++) { |
| 107 | if(UNICODE_ENCODINGS[i].equalsIgnoreCase(jName)) { |
| 108 | eInfo = new EncodingInfo(encoding, jName, LAST_PRINTABLE_UNICODE); |
| 109 | break; |
| 110 | } |
| 111 | } |
| 112 | if(i == UNICODE_ENCODINGS.length) { |
| 113 | eInfo = new EncodingInfo(encoding, jName, DEFAULT_LAST_PRINTABLE); |
| 114 | } |
| 115 | _encodings.put(jName, eInfo); |
| 116 | return eInfo; |
| 117 | } |
| 118 | |
| 119 | static final String JIS_DANGER_CHARS |
| 120 | = "\\\u007e\u007f\u00a2\u00a3\u00a5\u00ac" |
| 121 | +"\u2014\u2015\u2016\u2026\u203e\u203e\u2225\u222f\u301c" |
| 122 | +"\uff3c\uff5e\uffe0\uffe1\uffe2\uffe3"; |
| 123 | |
| 124 | } |