blob: bc637ab3a4e05bf0aeb8858e3e01a450d580bb19 [file] [log] [blame]
Shuyi Chend7955ce2013-05-22 14:51:55 -07001// Copyright (c) 2003-2004 Brian Wellington (bwelling@xbill.org)
2//
3// Copyright (C) 2003-2004 Nominum, Inc.
4//
5// Permission to use, copy, modify, and distribute this software for any
6// purpose with or without fee is hereby granted, provided that the above
7// copyright notice and this permission notice appear in all copies.
8//
9// THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
10// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY
12// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15// OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16//
17
18package org.xbill.DNS;
19
20import java.io.*;
21import java.net.*;
22
23import org.xbill.DNS.utils.*;
24
25/**
26 * Tokenizer is used to parse DNS records and zones from text format,
27 *
28 * @author Brian Wellington
29 * @author Bob Halley
30 */
31
32public class Tokenizer {
33
34private static String delim = " \t\n;()\"";
35private static String quotes = "\"";
36
37/** End of file */
38public static final int EOF = 0;
39
40/** End of line */
41public static final int EOL = 1;
42
43/** Whitespace; only returned when wantWhitespace is set */
44public static final int WHITESPACE = 2;
45
46/** An identifier (unquoted string) */
47public static final int IDENTIFIER = 3;
48
49/** A quoted string */
50public static final int QUOTED_STRING = 4;
51
52/** A comment; only returned when wantComment is set */
53public static final int COMMENT = 5;
54
55private PushbackInputStream is;
56private boolean ungottenToken;
57private int multiline;
58private boolean quoting;
59private String delimiters;
60private Token current;
61private StringBuffer sb;
62private boolean wantClose;
63
64private String filename;
65private int line;
66
67public static class Token {
68 /** The type of token. */
69 public int type;
70
71 /** The value of the token, or null for tokens without values. */
72 public String value;
73
74 private
75 Token() {
76 type = -1;
77 value = null;
78 }
79
80 private Token
81 set(int type, StringBuffer value) {
82 if (type < 0)
83 throw new IllegalArgumentException();
84 this.type = type;
85 this.value = value == null ? null : value.toString();
86 return this;
87 }
88
89 /**
90 * Converts the token to a string containing a representation useful
91 * for debugging.
92 */
93 public String
94 toString() {
95 switch (type) {
96 case EOF:
97 return "<eof>";
98 case EOL:
99 return "<eol>";
100 case WHITESPACE:
101 return "<whitespace>";
102 case IDENTIFIER:
103 return "<identifier: " + value + ">";
104 case QUOTED_STRING:
105 return "<quoted_string: " + value + ">";
106 case COMMENT:
107 return "<comment: " + value + ">";
108 default:
109 return "<unknown>";
110 }
111 }
112
113 /** Indicates whether this token contains a string. */
114 public boolean
115 isString() {
116 return (type == IDENTIFIER || type == QUOTED_STRING);
117 }
118
119 /** Indicates whether this token contains an EOL or EOF. */
120 public boolean
121 isEOL() {
122 return (type == EOL || type == EOF);
123 }
124}
125
126static class TokenizerException extends TextParseException {
127 String message;
128
129 public
130 TokenizerException(String filename, int line, String message) {
131 super(filename + ":" + line + ": " + message);
132 this.message = message;
133 }
134
135 public String
136 getBaseMessage() {
137 return message;
138 }
139}
140
141/**
142 * Creates a Tokenizer from an arbitrary input stream.
143 * @param is The InputStream to tokenize.
144 */
145public
146Tokenizer(InputStream is) {
147 if (!(is instanceof BufferedInputStream))
148 is = new BufferedInputStream(is);
149 this.is = new PushbackInputStream(is, 2);
150 ungottenToken = false;
151 multiline = 0;
152 quoting = false;
153 delimiters = delim;
154 current = new Token();
155 sb = new StringBuffer();
156 filename = "<none>";
157 line = 1;
158}
159
160/**
161 * Creates a Tokenizer from a string.
162 * @param s The String to tokenize.
163 */
164public
165Tokenizer(String s) {
166 this(new ByteArrayInputStream(s.getBytes()));
167}
168
169/**
170 * Creates a Tokenizer from a file.
171 * @param f The File to tokenize.
172 */
173public
174Tokenizer(File f) throws FileNotFoundException {
175 this(new FileInputStream(f));
176 wantClose = true;
177 filename = f.getName();
178}
179
180private int
181getChar() throws IOException {
182 int c = is.read();
183 if (c == '\r') {
184 int next = is.read();
185 if (next != '\n')
186 is.unread(next);
187 c = '\n';
188 }
189 if (c == '\n')
190 line++;
191 return c;
192}
193
194private void
195ungetChar(int c) throws IOException {
196 if (c == -1)
197 return;
198 is.unread(c);
199 if (c == '\n')
200 line--;
201}
202
203private int
204skipWhitespace() throws IOException {
205 int skipped = 0;
206 while (true) {
207 int c = getChar();
208 if (c != ' ' && c != '\t') {
209 if (!(c == '\n' && multiline > 0)) {
210 ungetChar(c);
211 return skipped;
212 }
213 }
214 skipped++;
215 }
216}
217
218private void
219checkUnbalancedParens() throws TextParseException {
220 if (multiline > 0)
221 throw exception("unbalanced parentheses");
222}
223
224/**
225 * Gets the next token from a tokenizer.
226 * @param wantWhitespace If true, leading whitespace will be returned as a
227 * token.
228 * @param wantComment If true, comments are returned as tokens.
229 * @return The next token in the stream.
230 * @throws TextParseException The input was invalid.
231 * @throws IOException An I/O error occurred.
232 */
233public Token
234get(boolean wantWhitespace, boolean wantComment) throws IOException {
235 int type;
236 int c;
237
238 if (ungottenToken) {
239 ungottenToken = false;
240 if (current.type == WHITESPACE) {
241 if (wantWhitespace)
242 return current;
243 } else if (current.type == COMMENT) {
244 if (wantComment)
245 return current;
246 } else {
247 if (current.type == EOL)
248 line++;
249 return current;
250 }
251 }
252 int skipped = skipWhitespace();
253 if (skipped > 0 && wantWhitespace)
254 return current.set(WHITESPACE, null);
255 type = IDENTIFIER;
256 sb.setLength(0);
257 while (true) {
258 c = getChar();
259 if (c == -1 || delimiters.indexOf(c) != -1) {
260 if (c == -1) {
261 if (quoting)
262 throw exception("EOF in " +
263 "quoted string");
264 else if (sb.length() == 0)
265 return current.set(EOF, null);
266 else
267 return current.set(type, sb);
268 }
269 if (sb.length() == 0 && type != QUOTED_STRING) {
270 if (c == '(') {
271 multiline++;
272 skipWhitespace();
273 continue;
274 } else if (c == ')') {
275 if (multiline <= 0)
276 throw exception("invalid " +
277 "close " +
278 "parenthesis");
279 multiline--;
280 skipWhitespace();
281 continue;
282 } else if (c == '"') {
283 if (!quoting) {
284 quoting = true;
285 delimiters = quotes;
286 type = QUOTED_STRING;
287 } else {
288 quoting = false;
289 delimiters = delim;
290 skipWhitespace();
291 }
292 continue;
293 } else if (c == '\n') {
294 return current.set(EOL, null);
295 } else if (c == ';') {
296 while (true) {
297 c = getChar();
298 if (c == '\n' || c == -1)
299 break;
300 sb.append((char)c);
301 }
302 if (wantComment) {
303 ungetChar(c);
304 return current.set(COMMENT, sb);
305 } else if (c == -1 &&
306 type != QUOTED_STRING)
307 {
308 checkUnbalancedParens();
309 return current.set(EOF, null);
310 } else if (multiline > 0) {
311 skipWhitespace();
312 sb.setLength(0);
313 continue;
314 } else
315 return current.set(EOL, null);
316 } else
317 throw new IllegalStateException();
318 } else
319 ungetChar(c);
320 break;
321 } else if (c == '\\') {
322 c = getChar();
323 if (c == -1)
324 throw exception("unterminated escape sequence");
325 sb.append('\\');
326 } else if (quoting && c == '\n') {
327 throw exception("newline in quoted string");
328 }
329 sb.append((char)c);
330 }
331 if (sb.length() == 0 && type != QUOTED_STRING) {
332 checkUnbalancedParens();
333 return current.set(EOF, null);
334 }
335 return current.set(type, sb);
336}
337
338/**
339 * Gets the next token from a tokenizer, ignoring whitespace and comments.
340 * @return The next token in the stream.
341 * @throws TextParseException The input was invalid.
342 * @throws IOException An I/O error occurred.
343 */
344public Token
345get() throws IOException {
346 return get(false, false);
347}
348
349/**
350 * Returns a token to the stream, so that it will be returned by the next call
351 * to get().
352 * @throws IllegalStateException There are already ungotten tokens.
353 */
354public void
355unget() {
356 if (ungottenToken)
357 throw new IllegalStateException
358 ("Cannot unget multiple tokens");
359 if (current.type == EOL)
360 line--;
361 ungottenToken = true;
362}
363
364/**
365 * Gets the next token from a tokenizer and converts it to a string.
366 * @return The next token in the stream, as a string.
367 * @throws TextParseException The input was invalid or not a string.
368 * @throws IOException An I/O error occurred.
369 */
370public String
371getString() throws IOException {
372 Token next = get();
373 if (!next.isString()) {
374 throw exception("expected a string");
375 }
376 return next.value;
377}
378
379private String
380_getIdentifier(String expected) throws IOException {
381 Token next = get();
382 if (next.type != IDENTIFIER)
383 throw exception("expected " + expected);
384 return next.value;
385}
386
387/**
388 * Gets the next token from a tokenizer, ensures it is an unquoted string,
389 * and converts it to a string.
390 * @return The next token in the stream, as a string.
391 * @throws TextParseException The input was invalid or not an unquoted string.
392 * @throws IOException An I/O error occurred.
393 */
394public String
395getIdentifier() throws IOException {
396 return _getIdentifier("an identifier");
397}
398
399/**
400 * Gets the next token from a tokenizer and converts it to a long.
401 * @return The next token in the stream, as a long.
402 * @throws TextParseException The input was invalid or not a long.
403 * @throws IOException An I/O error occurred.
404 */
405public long
406getLong() throws IOException {
407 String next = _getIdentifier("an integer");
408 if (!Character.isDigit(next.charAt(0)))
409 throw exception("expected an integer");
410 try {
411 return Long.parseLong(next);
412 } catch (NumberFormatException e) {
413 throw exception("expected an integer");
414 }
415}
416
417/**
418 * Gets the next token from a tokenizer and converts it to an unsigned 32 bit
419 * integer.
420 * @return The next token in the stream, as an unsigned 32 bit integer.
421 * @throws TextParseException The input was invalid or not an unsigned 32
422 * bit integer.
423 * @throws IOException An I/O error occurred.
424 */
425public long
426getUInt32() throws IOException {
427 long l = getLong();
428 if (l < 0 || l > 0xFFFFFFFFL)
429 throw exception("expected an 32 bit unsigned integer");
430 return l;
431}
432
433/**
434 * Gets the next token from a tokenizer and converts it to an unsigned 16 bit
435 * integer.
436 * @return The next token in the stream, as an unsigned 16 bit integer.
437 * @throws TextParseException The input was invalid or not an unsigned 16
438 * bit integer.
439 * @throws IOException An I/O error occurred.
440 */
441public int
442getUInt16() throws IOException {
443 long l = getLong();
444 if (l < 0 || l > 0xFFFFL)
445 throw exception("expected an 16 bit unsigned integer");
446 return (int) l;
447}
448
449/**
450 * Gets the next token from a tokenizer and converts it to an unsigned 8 bit
451 * integer.
452 * @return The next token in the stream, as an unsigned 8 bit integer.
453 * @throws TextParseException The input was invalid or not an unsigned 8
454 * bit integer.
455 * @throws IOException An I/O error occurred.
456 */
457public int
458getUInt8() throws IOException {
459 long l = getLong();
460 if (l < 0 || l > 0xFFL)
461 throw exception("expected an 8 bit unsigned integer");
462 return (int) l;
463}
464
465/**
466 * Gets the next token from a tokenizer and parses it as a TTL.
467 * @return The next token in the stream, as an unsigned 32 bit integer.
468 * @throws TextParseException The input was not valid.
469 * @throws IOException An I/O error occurred.
470 * @see TTL
471 */
472public long
473getTTL() throws IOException {
474 String next = _getIdentifier("a TTL value");
475 try {
476 return TTL.parseTTL(next);
477 }
478 catch (NumberFormatException e) {
479 throw exception("expected a TTL value");
480 }
481}
482
483/**
484 * Gets the next token from a tokenizer and parses it as if it were a TTL.
485 * @return The next token in the stream, as an unsigned 32 bit integer.
486 * @throws TextParseException The input was not valid.
487 * @throws IOException An I/O error occurred.
488 * @see TTL
489 */
490public long
491getTTLLike() throws IOException {
492 String next = _getIdentifier("a TTL-like value");
493 try {
494 return TTL.parse(next, false);
495 }
496 catch (NumberFormatException e) {
497 throw exception("expected a TTL-like value");
498 }
499}
500
501/**
502 * Gets the next token from a tokenizer and converts it to a name.
503 * @param origin The origin to append to relative names.
504 * @return The next token in the stream, as a name.
505 * @throws TextParseException The input was invalid or not a valid name.
506 * @throws IOException An I/O error occurred.
507 * @throws RelativeNameException The parsed name was relative, even with the
508 * origin.
509 * @see Name
510 */
511public Name
512getName(Name origin) throws IOException {
513 String next = _getIdentifier("a name");
514 try {
515 Name name = Name.fromString(next, origin);
516 if (!name.isAbsolute())
517 throw new RelativeNameException(name);
518 return name;
519 }
520 catch (TextParseException e) {
521 throw exception(e.getMessage());
522 }
523}
524
525/**
526 * Gets the next token from a tokenizer and converts it to an IP Address.
527 * @param family The address family.
528 * @return The next token in the stream, as an InetAddress
529 * @throws TextParseException The input was invalid or not a valid address.
530 * @throws IOException An I/O error occurred.
531 * @see Address
532 */
533public InetAddress
534getAddress(int family) throws IOException {
535 String next = _getIdentifier("an address");
536 try {
537 return Address.getByAddress(next, family);
538 }
539 catch (UnknownHostException e) {
540 throw exception(e.getMessage());
541 }
542}
543
544/**
545 * Gets the next token from a tokenizer, which must be an EOL or EOF.
546 * @throws TextParseException The input was invalid or not an EOL or EOF token.
547 * @throws IOException An I/O error occurred.
548 */
549public void
550getEOL() throws IOException {
551 Token next = get();
552 if (next.type != EOL && next.type != EOF) {
553 throw exception("expected EOL or EOF");
554 }
555}
556
557/**
558 * Returns a concatenation of the remaining strings from a Tokenizer.
559 */
560private String
561remainingStrings() throws IOException {
562 StringBuffer buffer = null;
563 while (true) {
564 Tokenizer.Token t = get();
565 if (!t.isString())
566 break;
567 if (buffer == null)
568 buffer = new StringBuffer();
569 buffer.append(t.value);
570 }
571 unget();
572 if (buffer == null)
573 return null;
574 return buffer.toString();
575}
576
577/**
578 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
579 * them together, and converts the base64 encoded data to a byte array.
580 * @param required If true, an exception will be thrown if no strings remain;
581 * otherwise null be be returned.
582 * @return The byte array containing the decoded strings, or null if there
583 * were no strings to decode.
584 * @throws TextParseException The input was invalid.
585 * @throws IOException An I/O error occurred.
586 */
587public byte []
588getBase64(boolean required) throws IOException {
589 String s = remainingStrings();
590 if (s == null) {
591 if (required)
592 throw exception("expected base64 encoded string");
593 else
594 return null;
595 }
596 byte [] array = base64.fromString(s);
597 if (array == null)
598 throw exception("invalid base64 encoding");
599 return array;
600}
601
602/**
603 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
604 * them together, and converts the base64 encoded data to a byte array.
605 * @return The byte array containing the decoded strings, or null if there
606 * were no strings to decode.
607 * @throws TextParseException The input was invalid.
608 * @throws IOException An I/O error occurred.
609 */
610public byte []
611getBase64() throws IOException {
612 return getBase64(false);
613}
614
615/**
616 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
617 * them together, and converts the hex encoded data to a byte array.
618 * @param required If true, an exception will be thrown if no strings remain;
619 * otherwise null be be returned.
620 * @return The byte array containing the decoded strings, or null if there
621 * were no strings to decode.
622 * @throws TextParseException The input was invalid.
623 * @throws IOException An I/O error occurred.
624 */
625public byte []
626getHex(boolean required) throws IOException {
627 String s = remainingStrings();
628 if (s == null) {
629 if (required)
630 throw exception("expected hex encoded string");
631 else
632 return null;
633 }
634 byte [] array = base16.fromString(s);
635 if (array == null)
636 throw exception("invalid hex encoding");
637 return array;
638}
639
640/**
641 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
642 * them together, and converts the hex encoded data to a byte array.
643 * @return The byte array containing the decoded strings, or null if there
644 * were no strings to decode.
645 * @throws TextParseException The input was invalid.
646 * @throws IOException An I/O error occurred.
647 */
648public byte []
649getHex() throws IOException {
650 return getHex(false);
651}
652
653/**
654 * Gets the next token from a tokenizer and decodes it as hex.
655 * @return The byte array containing the decoded string.
656 * @throws TextParseException The input was invalid.
657 * @throws IOException An I/O error occurred.
658 */
659public byte []
660getHexString() throws IOException {
661 String next = _getIdentifier("a hex string");
662 byte [] array = base16.fromString(next);
663 if (array == null)
664 throw exception("invalid hex encoding");
665 return array;
666}
667
668/**
669 * Gets the next token from a tokenizer and decodes it as base32.
670 * @param b32 The base32 context to decode with.
671 * @return The byte array containing the decoded string.
672 * @throws TextParseException The input was invalid.
673 * @throws IOException An I/O error occurred.
674 */
675public byte []
676getBase32String(base32 b32) throws IOException {
677 String next = _getIdentifier("a base32 string");
678 byte [] array = b32.fromString(next);
679 if (array == null)
680 throw exception("invalid base32 encoding");
681 return array;
682}
683
684/**
685 * Creates an exception which includes the current state in the error message
686 * @param s The error message to include.
687 * @return The exception to be thrown
688 */
689public TextParseException
690exception(String s) {
691 return new TokenizerException(filename, line, s);
692}
693
694/**
695 * Closes any files opened by this tokenizer.
696 */
697public void
698close() {
699 if (wantClose) {
700 try {
701 is.close();
702 }
703 catch (IOException e) {
704 }
705 }
706}
707
708protected void
709finalize() {
710 close();
711}
712
713}