J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 1997-2000 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | package javax.swing.text.rtf; |
| 26 | |
| 27 | import java.io.*; |
| 28 | import java.lang.*; |
| 29 | |
| 30 | /** |
| 31 | * <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax |
| 32 | * and passes a stream of control words, text, and begin/end group |
| 33 | * indications to its subclass. |
| 34 | * |
| 35 | * Normally programmers will only use <b>RTFFilter</b>, a subclass of this class that knows what to |
| 36 | * do with the tokens this class parses. |
| 37 | * |
| 38 | * @see AbstractFilter |
| 39 | * @see RTFFilter |
| 40 | */ |
| 41 | abstract class RTFParser extends AbstractFilter |
| 42 | { |
| 43 | /** The current RTF group nesting level. */ |
| 44 | public int level; |
| 45 | |
| 46 | private int state; |
| 47 | private StringBuffer currentCharacters; |
| 48 | private String pendingKeyword; // where keywords go while we |
| 49 | // read their parameters |
| 50 | private int pendingCharacter; // for the \'xx construct |
| 51 | |
| 52 | private long binaryBytesLeft; // in a \bin blob? |
| 53 | ByteArrayOutputStream binaryBuf; |
| 54 | private boolean[] savedSpecials; |
| 55 | |
| 56 | /** A stream to which to write warnings and debugging information |
| 57 | * while parsing. This is set to <code>System.out</code> to log |
| 58 | * any anomalous information to stdout. */ |
| 59 | protected PrintStream warnings; |
| 60 | |
| 61 | // value for the 'state' variable |
| 62 | private final int S_text = 0; // reading random text |
| 63 | private final int S_backslashed = 1; // read a backslash, waiting for next |
| 64 | private final int S_token = 2; // reading a multicharacter token |
| 65 | private final int S_parameter = 3; // reading a token's parameter |
| 66 | |
| 67 | private final int S_aftertick = 4; // after reading \' |
| 68 | private final int S_aftertickc = 5; // after reading \'x |
| 69 | |
| 70 | private final int S_inblob = 6; // in a \bin blob |
| 71 | |
| 72 | /** Implemented by subclasses to interpret a parameter-less RTF keyword. |
| 73 | * The keyword is passed without the leading '/' or any delimiting |
| 74 | * whitespace. */ |
| 75 | public abstract boolean handleKeyword(String keyword); |
| 76 | /** Implemented by subclasses to interpret a keyword with a parameter. |
| 77 | * @param keyword The keyword, as with <code>handleKeyword(String)</code>. |
| 78 | * @param parameter The parameter following the keyword. */ |
| 79 | public abstract boolean handleKeyword(String keyword, int parameter); |
| 80 | /** Implemented by subclasses to interpret text from the RTF stream. */ |
| 81 | public abstract void handleText(String text); |
| 82 | public void handleText(char ch) |
| 83 | { handleText(String.valueOf(ch)); } |
| 84 | /** Implemented by subclasses to handle the contents of the \bin keyword. */ |
| 85 | public abstract void handleBinaryBlob(byte[] data); |
| 86 | /** Implemented by subclasses to react to an increase |
| 87 | * in the nesting level. */ |
| 88 | public abstract void begingroup(); |
| 89 | /** Implemented by subclasses to react to the end of a group. */ |
| 90 | public abstract void endgroup(); |
| 91 | |
| 92 | // table of non-text characters in rtf |
| 93 | static final boolean rtfSpecialsTable[]; |
| 94 | static { |
| 95 | rtfSpecialsTable = (boolean[])noSpecialsTable.clone(); |
| 96 | rtfSpecialsTable['\n'] = true; |
| 97 | rtfSpecialsTable['\r'] = true; |
| 98 | rtfSpecialsTable['{'] = true; |
| 99 | rtfSpecialsTable['}'] = true; |
| 100 | rtfSpecialsTable['\\'] = true; |
| 101 | } |
| 102 | |
| 103 | public RTFParser() |
| 104 | { |
| 105 | currentCharacters = new StringBuffer(); |
| 106 | state = S_text; |
| 107 | pendingKeyword = null; |
| 108 | level = 0; |
| 109 | //warnings = System.out; |
| 110 | |
| 111 | specialsTable = rtfSpecialsTable; |
| 112 | } |
| 113 | |
| 114 | // TODO: Handle wrapup at end of file correctly. |
| 115 | |
| 116 | public void writeSpecial(int b) |
| 117 | throws IOException |
| 118 | { |
| 119 | write((char)b); |
| 120 | } |
| 121 | |
| 122 | protected void warning(String s) { |
| 123 | if (warnings != null) { |
| 124 | warnings.println(s); |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | public void write(String s) |
| 129 | throws IOException |
| 130 | { |
| 131 | if (state != S_text) { |
| 132 | int index = 0; |
| 133 | int length = s.length(); |
| 134 | while(index < length && state != S_text) { |
| 135 | write(s.charAt(index)); |
| 136 | index ++; |
| 137 | } |
| 138 | |
| 139 | if(index >= length) |
| 140 | return; |
| 141 | |
| 142 | s = s.substring(index); |
| 143 | } |
| 144 | |
| 145 | if (currentCharacters.length() > 0) |
| 146 | currentCharacters.append(s); |
| 147 | else |
| 148 | handleText(s); |
| 149 | } |
| 150 | |
| 151 | public void write(char ch) |
| 152 | throws IOException |
| 153 | { |
| 154 | boolean ok; |
| 155 | |
| 156 | switch (state) |
| 157 | { |
| 158 | case S_text: |
| 159 | if (ch == '\n' || ch == '\r') { |
| 160 | break; // unadorned newlines are ignored |
| 161 | } else if (ch == '{') { |
| 162 | if (currentCharacters.length() > 0) { |
| 163 | handleText(currentCharacters.toString()); |
| 164 | currentCharacters = new StringBuffer(); |
| 165 | } |
| 166 | level ++; |
| 167 | begingroup(); |
| 168 | } else if(ch == '}') { |
| 169 | if (currentCharacters.length() > 0) { |
| 170 | handleText(currentCharacters.toString()); |
| 171 | currentCharacters = new StringBuffer(); |
| 172 | } |
| 173 | if (level == 0) |
| 174 | throw new IOException("Too many close-groups in RTF text"); |
| 175 | endgroup(); |
| 176 | level --; |
| 177 | } else if(ch == '\\') { |
| 178 | if (currentCharacters.length() > 0) { |
| 179 | handleText(currentCharacters.toString()); |
| 180 | currentCharacters = new StringBuffer(); |
| 181 | } |
| 182 | state = S_backslashed; |
| 183 | } else { |
| 184 | currentCharacters.append(ch); |
| 185 | } |
| 186 | break; |
| 187 | case S_backslashed: |
| 188 | if (ch == '\'') { |
| 189 | state = S_aftertick; |
| 190 | break; |
| 191 | } |
| 192 | if (!Character.isLetter(ch)) { |
| 193 | char newstring[] = new char[1]; |
| 194 | newstring[0] = ch; |
| 195 | if (!handleKeyword(new String(newstring))) { |
| 196 | warning("Unknown keyword: " + newstring + " (" + (byte)ch + ")"); |
| 197 | } |
| 198 | state = S_text; |
| 199 | pendingKeyword = null; |
| 200 | /* currentCharacters is already an empty stringBuffer */ |
| 201 | break; |
| 202 | } |
| 203 | |
| 204 | state = S_token; |
| 205 | /* FALL THROUGH */ |
| 206 | case S_token: |
| 207 | if (Character.isLetter(ch)) { |
| 208 | currentCharacters.append(ch); |
| 209 | } else { |
| 210 | pendingKeyword = currentCharacters.toString(); |
| 211 | currentCharacters = new StringBuffer(); |
| 212 | |
| 213 | // Parameter following? |
| 214 | if (Character.isDigit(ch) || (ch == '-')) { |
| 215 | state = S_parameter; |
| 216 | currentCharacters.append(ch); |
| 217 | } else { |
| 218 | ok = handleKeyword(pendingKeyword); |
| 219 | if (!ok) |
| 220 | warning("Unknown keyword: " + pendingKeyword); |
| 221 | pendingKeyword = null; |
| 222 | state = S_text; |
| 223 | |
| 224 | // Non-space delimiters get included in the text |
| 225 | if (!Character.isWhitespace(ch)) |
| 226 | write(ch); |
| 227 | } |
| 228 | } |
| 229 | break; |
| 230 | case S_parameter: |
| 231 | if (Character.isDigit(ch)) { |
| 232 | currentCharacters.append(ch); |
| 233 | } else { |
| 234 | /* TODO: Test correct behavior of \bin keyword */ |
| 235 | if (pendingKeyword.equals("bin")) { /* magic layer-breaking kwd */ |
| 236 | long parameter = Long.parseLong(currentCharacters.toString()); |
| 237 | pendingKeyword = null; |
| 238 | state = S_inblob; |
| 239 | binaryBytesLeft = parameter; |
| 240 | if (binaryBytesLeft > Integer.MAX_VALUE) |
| 241 | binaryBuf = new ByteArrayOutputStream(Integer.MAX_VALUE); |
| 242 | else |
| 243 | binaryBuf = new ByteArrayOutputStream((int)binaryBytesLeft); |
| 244 | savedSpecials = specialsTable; |
| 245 | specialsTable = allSpecialsTable; |
| 246 | break; |
| 247 | } |
| 248 | |
| 249 | int parameter = Integer.parseInt(currentCharacters.toString()); |
| 250 | ok = handleKeyword(pendingKeyword, parameter); |
| 251 | if (!ok) |
| 252 | warning("Unknown keyword: " + pendingKeyword + |
| 253 | " (param " + currentCharacters + ")"); |
| 254 | pendingKeyword = null; |
| 255 | currentCharacters = new StringBuffer(); |
| 256 | state = S_text; |
| 257 | |
| 258 | // Delimiters here are interpreted as text too |
| 259 | if (!Character.isWhitespace(ch)) |
| 260 | write(ch); |
| 261 | } |
| 262 | break; |
| 263 | case S_aftertick: |
| 264 | if (Character.digit(ch, 16) == -1) |
| 265 | state = S_text; |
| 266 | else { |
| 267 | pendingCharacter = Character.digit(ch, 16); |
| 268 | state = S_aftertickc; |
| 269 | } |
| 270 | break; |
| 271 | case S_aftertickc: |
| 272 | state = S_text; |
| 273 | if (Character.digit(ch, 16) != -1) |
| 274 | { |
| 275 | pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16); |
| 276 | ch = translationTable[pendingCharacter]; |
| 277 | if (ch != 0) |
| 278 | handleText(ch); |
| 279 | } |
| 280 | break; |
| 281 | case S_inblob: |
| 282 | binaryBuf.write(ch); |
| 283 | binaryBytesLeft --; |
| 284 | if (binaryBytesLeft == 0) { |
| 285 | state = S_text; |
| 286 | specialsTable = savedSpecials; |
| 287 | savedSpecials = null; |
| 288 | handleBinaryBlob(binaryBuf.toByteArray()); |
| 289 | binaryBuf = null; |
| 290 | } |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | /** Flushes any buffered but not yet written characters. |
| 295 | * Subclasses which override this method should call this |
| 296 | * method <em>before</em> flushing |
| 297 | * any of their own buffers. */ |
| 298 | public void flush() |
| 299 | throws IOException |
| 300 | { |
| 301 | super.flush(); |
| 302 | |
| 303 | if (state == S_text && currentCharacters.length() > 0) { |
| 304 | handleText(currentCharacters.toString()); |
| 305 | currentCharacters = new StringBuffer(); |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | /** Closes the parser. Currently, this simply does a <code>flush()</code>, |
| 310 | * followed by some minimal consistency checks. */ |
| 311 | public void close() |
| 312 | throws IOException |
| 313 | { |
| 314 | flush(); |
| 315 | |
| 316 | if (state != S_text || level > 0) { |
| 317 | warning("Truncated RTF file."); |
| 318 | |
| 319 | /* TODO: any sane way to handle termination in a non-S_text state? */ |
| 320 | /* probably not */ |
| 321 | |
| 322 | /* this will cause subclasses to behave more reasonably |
| 323 | some of the time */ |
| 324 | while (level > 0) { |
| 325 | endgroup(); |
| 326 | level --; |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | super.close(); |
| 331 | } |
| 332 | |
| 333 | } |