blob: 05d6432630cfafdf2a302f084d187e907346e20a [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1994-2004 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.tools.java;
27
28import java.io.IOException;
29import java.io.InputStream;
30import java.util.Hashtable;
31
32/**
33 * A Scanner for Java tokens. Errors are reported
34 * to the environment object.<p>
35 *
36 * The scanner keeps track of the current token,
37 * the value of the current token (if any), and the start
38 * position of the current token.<p>
39 *
40 * The scan() method advances the scanner to the next
41 * token in the input.<p>
42 *
43 * The match() method is used to quickly match opening
44 * brackets (ie: '(', '{', or '[') with their closing
45 * counter part. This is useful during error recovery.<p>
46 *
47 * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
48 * this means that both the line number and the exact offset into
49 * the file are encoded in each position value.<p>
50 *
51 * The compiler treats either "\n", "\r" or "\r\n" as the
52 * end of a line.<p>
53 *
54 * WARNING: The contents of this source file are not part of any
55 * supported API. Code that depends on them does so at its own risk:
56 * they are subject to change or removal without notice.
57 *
58 * @author Arthur van Hoff
59 */
60
61public
62class Scanner implements Constants {
63 /**
64 * The increment for each character.
65 */
66 public static final long OFFSETINC = 1;
67
68 /**
69 * The increment for each line.
70 */
71 public static final long LINEINC = 1L << WHEREOFFSETBITS;
72
73 /**
74 * End of input
75 */
76 public static final int EOF = -1;
77
78 /**
79 * Where errors are reported
80 */
81 public Environment env;
82
83 /**
84 * Input reader
85 */
86 protected ScannerInputReader in;
87
88 /**
89 * If true, present all comments as tokens.
90 * Contents are not saved, but positions are recorded accurately,
91 * so the comment can be recovered from the text.
92 * Line terminations are also returned as comment tokens,
93 * and may be distinguished by their start and end positions,
94 * which are equal (meaning, these tokens contain no chars).
95 */
96 public boolean scanComments = false;
97
98 /**
99 * Current token
100 */
101 public int token;
102
103 /**
104 * The position of the current token
105 */
106 public long pos;
107
108 /**
109 * The position of the previous token
110 */
111 public long prevPos;
112
113 /**
114 * The current character
115 */
116 protected int ch;
117
118 /*
119 * Token values.
120 */
121 public char charValue;
122 public int intValue;
123 public long longValue;
124 public float floatValue;
125 public double doubleValue;
126 public String stringValue;
127 public Identifier idValue;
128 public int radix; // Radix, when reading int or long
129
130 /*
131 * A doc comment preceding the most recent token
132 */
133 public String docComment;
134
135 /*
136 * A growable character buffer.
137 */
138 private int count;
139 private char buffer[] = new char[1024];
140 private void growBuffer() {
141 char newBuffer[] = new char[buffer.length * 2];
142 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
143 buffer = newBuffer;
144 }
145
146 // The following two methods have been hand-inlined in
147 // scanDocComment. If you make changes here, you should
148 // check to see if scanDocComment also needs modification.
149 private void putc(int ch) {
150 if (count == buffer.length) {
151 growBuffer();
152 }
153 buffer[count++] = (char)ch;
154 }
155
156 private String bufferString() {
157 return new String(buffer, 0, count);
158 }
159
160 /**
161 * Create a scanner to scan an input stream.
162 */
163 public Scanner(Environment env, InputStream in) throws IOException {
164 this.env = env;
165 useInputStream(in);
166 }
167
168 /**
169 * Setup input from the given input stream,
170 * and scan the first token from it.
171 */
172 protected void useInputStream(InputStream in) throws IOException {
173 try {
174 this.in = new ScannerInputReader(env, in);
175 } catch (Exception e) {
176 env.setCharacterEncoding(null);
177 this.in = new ScannerInputReader(env, in);
178 }
179
180 ch = this.in.read();
181 prevPos = this.in.pos;
182
183 scan();
184 }
185
186 /**
187 * Create a scanner to scan an input stream.
188 */
189 protected Scanner(Environment env) {
190 this.env = env;
191 // Expect the subclass to call useInputStream at the right time.
192 }
193
194 /**
195 * Define a keyword.
196 */
197 private static void defineKeyword(int val) {
198 Identifier.lookup(opNames[val]).setType(val);
199 }
200
201 /**
202 * Initialized keyword and token Hashtables
203 */
204 static {
205 // Statement keywords
206 defineKeyword(FOR);
207 defineKeyword(IF);
208 defineKeyword(ELSE);
209 defineKeyword(WHILE);
210 defineKeyword(DO);
211 defineKeyword(SWITCH);
212 defineKeyword(CASE);
213 defineKeyword(DEFAULT);
214 defineKeyword(BREAK);
215 defineKeyword(CONTINUE);
216 defineKeyword(RETURN);
217 defineKeyword(TRY);
218 defineKeyword(CATCH);
219 defineKeyword(FINALLY);
220 defineKeyword(THROW);
221
222 // Type defineKeywords
223 defineKeyword(BYTE);
224 defineKeyword(CHAR);
225 defineKeyword(SHORT);
226 defineKeyword(INT);
227 defineKeyword(LONG);
228 defineKeyword(FLOAT);
229 defineKeyword(DOUBLE);
230 defineKeyword(VOID);
231 defineKeyword(BOOLEAN);
232
233 // Expression keywords
234 defineKeyword(INSTANCEOF);
235 defineKeyword(TRUE);
236 defineKeyword(FALSE);
237 defineKeyword(NEW);
238 defineKeyword(THIS);
239 defineKeyword(SUPER);
240 defineKeyword(NULL);
241
242 // Declaration keywords
243 defineKeyword(IMPORT);
244 defineKeyword(CLASS);
245 defineKeyword(EXTENDS);
246 defineKeyword(IMPLEMENTS);
247 defineKeyword(INTERFACE);
248 defineKeyword(PACKAGE);
249 defineKeyword(THROWS);
250
251 // Modifier keywords
252 defineKeyword(PRIVATE);
253 defineKeyword(PUBLIC);
254 defineKeyword(PROTECTED);
255 defineKeyword(STATIC);
256 defineKeyword(TRANSIENT);
257 defineKeyword(SYNCHRONIZED);
258 defineKeyword(NATIVE);
259 defineKeyword(ABSTRACT);
260 defineKeyword(VOLATILE);
261 defineKeyword(FINAL);
262 defineKeyword(STRICTFP);
263
264 // reserved keywords
265 defineKeyword(CONST);
266 defineKeyword(GOTO);
267 }
268
269 /**
270 * Scan a comment. This method should be
271 * called once the initial /, * and the next
272 * character have been read.
273 */
274 private void skipComment() throws IOException {
275 while (true) {
276 switch (ch) {
277 case EOF:
278 env.error(pos, "eof.in.comment");
279 return;
280
281 case '*':
282 if ((ch = in.read()) == '/') {
283 ch = in.read();
284 return;
285 }
286 break;
287
288 default:
289 ch = in.read();
290 break;
291 }
292 }
293 }
294
295 /**
296 * Scan a doc comment. This method should be called
297 * once the initial /, * and * have been read. It gathers
298 * the content of the comment (witout leading spaces and '*'s)
299 * in the string buffer.
300 */
301 private String scanDocComment() throws IOException {
302 // Note: this method has been hand-optimized to yield
303 // better performance. This was done after it was noted
304 // that javadoc spent a great deal of its time here.
305 // This should also help the performance of the compiler
306 // as well -- it scans the doc comments to find
307 // @deprecated tags.
308 //
309 // The logic of the method has been completely rewritten
310 // to avoid the use of flags that need to be looked at
311 // for every character read. Members that are accessed
312 // more than once have been stored in local variables.
313 // The methods putc() and bufferString() have been
314 // inlined by hand. Extra cases have been added to
315 // switch statements to trick the compiler into generating
316 // a tableswitch instead of a lookupswitch.
317 //
318 // This implementation aims to preserve the previous
319 // behavior of this method.
320
321 int c;
322
323 // Put `in' in a local variable.
324 final ScannerInputReader in = this.in;
325
326 // We maintain the buffer locally rather than calling putc().
327 char[] buffer = this.buffer;
328 int count = 0;
329
330 // We are called pointing at the second star of the doc
331 // comment:
332 //
333 // Input: /** the rest of the comment ... */
334 // ^
335 //
336 // We rely on this in the code below.
337
338 // Consume any number of stars.
339 while ((c = in.read()) == '*')
340 ;
341
342 // Is the comment of the form /**/, /***/, /****/, etc.?
343 if (c == '/') {
344 // Set ch and return
345 ch = in.read();
346 return "";
347 }
348
349 // Skip a newline on the first line of the comment.
350 if (c == '\n') {
351 c = in.read();
352 }
353
354 outerLoop:
355 // The outerLoop processes the doc comment, looping once
356 // for each line. For each line, it first strips off
357 // whitespace, then it consumes any stars, then it
358 // puts the rest of the line into our buffer.
359 while (true) {
360
361 // The wsLoop consumes whitespace from the beginning
362 // of each line.
363 wsLoop:
364 while (true) {
365 switch (c) {
366 case ' ':
367 case '\t':
368 // We could check for other forms of whitespace
369 // as well, but this is left as is for minimum
370 // disturbance of functionality.
371 //
372 // Just skip whitespace.
373 c = in.read();
374 break;
375
376 // We have added extra cases here to trick the
377 // compiler into using a tableswitch instead of
378 // a lookupswitch. They can be removed without
379 // a change in meaning.
380 case 10: case 11: case 12: case 13: case 14: case 15:
381 case 16: case 17: case 18: case 19: case 20: case 21:
382 case 22: case 23: case 24: case 25: case 26: case 27:
383 case 28: case 29: case 30: case 31:
384 default:
385 // We've seen something that isn't whitespace,
386 // jump out.
387 break wsLoop;
388 }
389 } // end wsLoop.
390
391 // Are there stars here? If so, consume them all
392 // and check for the end of comment.
393 if (c == '*') {
394 // Skip all of the stars...
395 do {
396 c = in.read();
397 } while (c == '*');
398
399 // ...then check for the closing slash.
400 if (c == '/') {
401 // We're done with the doc comment.
402 // Set ch and break out.
403 ch = in.read();
404 break outerLoop;
405 }
406 }
407
408 // The textLoop processes the rest of the characters
409 // on the line, adding them to our buffer.
410 textLoop:
411 while (true) {
412 switch (c) {
413 case EOF:
414 // We've seen a premature EOF. Break out
415 // of the loop.
416 env.error(pos, "eof.in.comment");
417 ch = EOF;
418 break outerLoop;
419
420 case '*':
421 // Is this just a star? Or is this the
422 // end of a comment?
423 c = in.read();
424 if (c == '/') {
425 // This is the end of the comment,
426 // set ch and return our buffer.
427 ch = in.read();
428 break outerLoop;
429 }
430 // This is just an ordinary star. Add it to
431 // the buffer.
432 if (count == buffer.length) {
433 growBuffer();
434 buffer = this.buffer;
435 }
436 buffer[count++] = '*';
437 break;
438
439 case '\n':
440 // We've seen a newline. Add it to our
441 // buffer and break out of this loop,
442 // starting fresh on a new line.
443 if (count == buffer.length) {
444 growBuffer();
445 buffer = this.buffer;
446 }
447 buffer[count++] = '\n';
448 c = in.read();
449 break textLoop;
450
451 // Again, the extra cases here are a trick
452 // to get the compiler to generate a tableswitch.
453 case 0: case 1: case 2: case 3: case 4: case 5:
454 case 6: case 7: case 8: case 11: case 12: case 13:
455 case 14: case 15: case 16: case 17: case 18: case 19:
456 case 20: case 21: case 22: case 23: case 24: case 25:
457 case 26: case 27: case 28: case 29: case 30: case 31:
458 case 32: case 33: case 34: case 35: case 36: case 37:
459 case 38: case 39: case 40:
460 default:
461 // Add the character to our buffer.
462 if (count == buffer.length) {
463 growBuffer();
464 buffer = this.buffer;
465 }
466 buffer[count++] = (char)c;
467 c = in.read();
468 break;
469 }
470 } // end textLoop
471 } // end outerLoop
472
473 // We have scanned our doc comment. It is stored in
474 // buffer. The previous implementation of scanDocComment
475 // stripped off all trailing spaces and stars from the comment.
476 // We will do this as well, so as to cause a minimum of
477 // disturbance. Is this what we want?
478 if (count > 0) {
479 int i = count - 1;
480 trailLoop:
481 while (i > -1) {
482 switch (buffer[i]) {
483 case ' ':
484 case '\t':
485 case '*':
486 i--;
487 break;
488 // And again, the extra cases here are a trick
489 // to get the compiler to generate a tableswitch.
490 case 0: case 1: case 2: case 3: case 4: case 5:
491 case 6: case 7: case 8: case 10: case 11: case 12:
492 case 13: case 14: case 15: case 16: case 17: case 18:
493 case 19: case 20: case 21: case 22: case 23: case 24:
494 case 25: case 26: case 27: case 28: case 29: case 30:
495 case 31: case 33: case 34: case 35: case 36: case 37:
496 case 38: case 39: case 40:
497 default:
498 break trailLoop;
499 }
500 }
501 count = i + 1;
502
503 // Return the text of the doc comment.
504 return new String(buffer, 0, count);
505 } else {
506 return "";
507 }
508 }
509
510 /**
511 * Scan a number. The first digit of the number should be the current
512 * character. We may be scanning hex, decimal, or octal at this point
513 */
514 private void scanNumber() throws IOException {
515 boolean seenNonOctal = false;
516 boolean overflow = false;
517 boolean seenDigit = false; // used to detect invalid hex number 0xL
518 radix = (ch == '0' ? 8 : 10);
519 long value = ch - '0';
520 count = 0;
521 putc(ch); // save character in buffer
522 numberLoop:
523 for (;;) {
524 switch (ch = in.read()) {
525 case '.':
526 if (radix == 16)
527 break numberLoop; // an illegal character
528 scanReal();
529 return;
530
531 case '8': case '9':
532 // We can't yet throw an error if reading an octal. We might
533 // discover we're really reading a real.
534 seenNonOctal = true;
535 case '0': case '1': case '2': case '3':
536 case '4': case '5': case '6': case '7':
537 seenDigit = true;
538 putc(ch);
539 if (radix == 10) {
540 overflow = overflow || (value * 10)/10 != value;
541 value = (value * 10) + (ch - '0');
542 overflow = overflow || (value - 1 < -1);
543 } else if (radix == 8) {
544 overflow = overflow || (value >>> 61) != 0;
545 value = (value << 3) + (ch - '0');
546 } else {
547 overflow = overflow || (value >>> 60) != 0;
548 value = (value << 4) + (ch - '0');
549 }
550 break;
551
552 case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
553 if (radix != 16) {
554 scanReal();
555 return;
556 }
557 // fall through
558 case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
559 seenDigit = true;
560 putc(ch);
561 if (radix != 16)
562 break numberLoop; // an illegal character
563 overflow = overflow || (value >>> 60) != 0;
564 value = (value << 4) + 10 +
565 Character.toLowerCase((char)ch) - 'a';
566 break;
567
568 case 'l': case 'L':
569 ch = in.read(); // skip over 'l'
570 longValue = value;
571 token = LONGVAL;
572 break numberLoop;
573
574 case 'x': case 'X':
575 // if the first character is a '0' and this is the second
576 // letter, then read in a hexadecimal number. Otherwise, error.
577 if (count == 1 && radix == 8) {
578 radix = 16;
579 seenDigit = false;
580 break;
581 } else {
582 // we'll get an illegal character error
583 break numberLoop;
584 }
585
586 default:
587 intValue = (int)value;
588 token = INTVAL;
589 break numberLoop;
590 }
591 } // while true
592
593 // We have just finished reading the number. The next thing better
594 // not be a letter or digit.
595 // Note: There will be deprecation warnings against these uses
596 // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
597 // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
598 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
599 env.error(in.pos, "invalid.number");
600 do { ch = in.read(); }
601 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
602 intValue = 0;
603 token = INTVAL;
604 } else if (radix == 8 && seenNonOctal) {
605 // A bogus octal literal.
606 intValue = 0;
607 token = INTVAL;
608 env.error(pos, "invalid.octal.number");
609 } else if (radix == 16 && seenDigit == false) {
610 // A hex literal with no digits, 0xL, for example.
611 intValue = 0;
612 token = INTVAL;
613 env.error(pos, "invalid.hex.number");
614 } else {
615 if (token == INTVAL) {
616 // Check for overflow. Note that base 10 literals
617 // have different rules than base 8 and 16.
618 overflow = overflow ||
619 (value & 0xFFFFFFFF00000000L) != 0 ||
620 (radix == 10 && value > 2147483648L);
621
622 if (overflow) {
623 intValue = 0;
624
625 // Give a specific error message which tells
626 // the user the range.
627 switch (radix) {
628 case 8:
629 env.error(pos, "overflow.int.oct");
630 break;
631 case 10:
632 env.error(pos, "overflow.int.dec");
633 break;
634 case 16:
635 env.error(pos, "overflow.int.hex");
636 break;
637 default:
638 throw new CompilerError("invalid radix");
639 }
640 }
641 } else {
642 if (overflow) {
643 longValue = 0;
644
645 // Give a specific error message which tells
646 // the user the range.
647 switch (radix) {
648 case 8:
649 env.error(pos, "overflow.long.oct");
650 break;
651 case 10:
652 env.error(pos, "overflow.long.dec");
653 break;
654 case 16:
655 env.error(pos, "overflow.long.hex");
656 break;
657 default:
658 throw new CompilerError("invalid radix");
659 }
660 }
661 }
662 }
663 }
664
665 /**
666 * Scan a float. We are either looking at the decimal, or we have already
667 * seen it and put it into the buffer. We haven't seen an exponent.
668 * Scan a float. Should be called with the current character is either
669 * the 'e', 'E' or '.'
670 */
671 private void scanReal() throws IOException {
672 boolean seenExponent = false;
673 boolean isSingleFloat = false;
674 char lastChar;
675 if (ch == '.') {
676 putc(ch);
677 ch = in.read();
678 }
679
680 numberLoop:
681 for ( ; ; ch = in.read()) {
682 switch (ch) {
683 case '0': case '1': case '2': case '3': case '4':
684 case '5': case '6': case '7': case '8': case '9':
685 putc(ch);
686 break;
687
688 case 'e': case 'E':
689 if (seenExponent)
690 break numberLoop; // we'll get a format error
691 putc(ch);
692 seenExponent = true;
693 break;
694
695 case '+': case '-':
696 lastChar = buffer[count - 1];
697 if (lastChar != 'e' && lastChar != 'E')
698 break numberLoop; // this isn't an error, though!
699 putc(ch);
700 break;
701
702 case 'f': case 'F':
703 ch = in.read(); // skip over 'f'
704 isSingleFloat = true;
705 break numberLoop;
706
707 case 'd': case 'D':
708 ch = in.read(); // skip over 'd'
709 // fall through
710 default:
711 break numberLoop;
712 } // sswitch
713 } // loop
714
715 // we have just finished reading the number. The next thing better
716 // not be a letter or digit.
717 if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
718 env.error(in.pos, "invalid.number");
719 do { ch = in.read(); }
720 while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
721 doubleValue = 0;
722 token = DOUBLEVAL;
723 } else {
724 token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
725 try {
726 lastChar = buffer[count - 1];
727 if (lastChar == 'e' || lastChar == 'E'
728 || lastChar == '+' || lastChar == '-') {
729 env.error(in.pos -1, "float.format");
730 } else if (isSingleFloat) {
731 String string = bufferString();
732 floatValue = Float.valueOf(string).floatValue();
733 if (Float.isInfinite(floatValue)) {
734 env.error(pos, "overflow.float");
735 } else if (floatValue == 0 && !looksLikeZero(string)) {
736 env.error(pos, "underflow.float");
737 }
738 } else {
739 String string = bufferString();
740 doubleValue = Double.valueOf(string).doubleValue();
741 if (Double.isInfinite(doubleValue)) {
742 env.error(pos, "overflow.double");
743 } else if (doubleValue == 0 && !looksLikeZero(string)) {
744 env.error(pos, "underflow.double");
745 }
746 }
747 } catch (NumberFormatException ee) {
748 env.error(pos, "float.format");
749 doubleValue = 0;
750 floatValue = 0;
751 }
752 }
753 return;
754 }
755
756 // We have a token that parses as a number. Is this token possibly zero?
757 // i.e. does it have a non-zero value in the mantissa?
758 private static boolean looksLikeZero(String token) {
759 int length = token.length();
760 for (int i = 0; i < length; i++) {
761 switch (token.charAt(i)) {
762 case 0: case '.':
763 continue;
764 case '1': case '2': case '3': case '4': case '5':
765 case '6': case '7': case '8': case '9':
766 return false;
767 case 'e': case 'E': case 'f': case 'F':
768 return true;
769 }
770 }
771 return true;
772 }
773
774 /**
775 * Scan an escape character.
776 * @return the character or -1 if it escaped an
777 * end-of-line.
778 */
779 private int scanEscapeChar() throws IOException {
780 long p = in.pos;
781
782 switch (ch = in.read()) {
783 case '0': case '1': case '2': case '3':
784 case '4': case '5': case '6': case '7': {
785 int n = ch - '0';
786 for (int i = 2 ; i > 0 ; i--) {
787 switch (ch = in.read()) {
788 case '0': case '1': case '2': case '3':
789 case '4': case '5': case '6': case '7':
790 n = (n << 3) + ch - '0';
791 break;
792
793 default:
794 if (n > 0xFF) {
795 env.error(p, "invalid.escape.char");
796 }
797 return n;
798 }
799 }
800 ch = in.read();
801 if (n > 0xFF) {
802 env.error(p, "invalid.escape.char");
803 }
804 return n;
805 }
806
807 case 'r': ch = in.read(); return '\r';
808 case 'n': ch = in.read(); return '\n';
809 case 'f': ch = in.read(); return '\f';
810 case 'b': ch = in.read(); return '\b';
811 case 't': ch = in.read(); return '\t';
812 case '\\': ch = in.read(); return '\\';
813 case '\"': ch = in.read(); return '\"';
814 case '\'': ch = in.read(); return '\'';
815 }
816
817 env.error(p, "invalid.escape.char");
818 ch = in.read();
819 return -1;
820 }
821
822 /**
823 * Scan a string. The current character
824 * should be the opening " of the string.
825 */
826 private void scanString() throws IOException {
827 token = STRINGVAL;
828 count = 0;
829 ch = in.read();
830
831 // Scan a String
832 while (true) {
833 switch (ch) {
834 case EOF:
835 env.error(pos, "eof.in.string");
836 stringValue = bufferString();
837 return;
838
839 case '\r':
840 case '\n':
841 ch = in.read();
842 env.error(pos, "newline.in.string");
843 stringValue = bufferString();
844 return;
845
846 case '"':
847 ch = in.read();
848 stringValue = bufferString();
849 return;
850
851 case '\\': {
852 int c = scanEscapeChar();
853 if (c >= 0) {
854 putc((char)c);
855 }
856 break;
857 }
858
859 default:
860 putc(ch);
861 ch = in.read();
862 break;
863 }
864 }
865 }
866
867 /**
868 * Scan a character. The current character should be
869 * the opening ' of the character constant.
870 */
871 private void scanCharacter() throws IOException {
872 token = CHARVAL;
873
874 switch (ch = in.read()) {
875 case '\\':
876 int c = scanEscapeChar();
877 charValue = (char)((c >= 0) ? c : 0);
878 break;
879
880 case '\'':
881 // There are two standard problems this case deals with. One
882 // is the malformed single quote constant (i.e. the programmer
883 // uses ''' instead of '\'') and the other is the empty
884 // character constant (i.e. ''). Just consume any number of
885 // single quotes and emit an error message.
886 charValue = 0;
887 env.error(pos, "invalid.char.constant");
888 ch = in.read();
889 while (ch == '\'') {
890 ch = in.read();
891 }
892 return;
893
894 case '\r':
895 case '\n':
896 charValue = 0;
897 env.error(pos, "invalid.char.constant");
898 return;
899
900 default:
901 charValue = (char)ch;
902 ch = in.read();
903 break;
904 }
905
906 if (ch == '\'') {
907 ch = in.read();
908 } else {
909 env.error(pos, "invalid.char.constant");
910 while (true) {
911 switch (ch) {
912 case '\'':
913 ch = in.read();
914 return;
915 case ';':
916 case '\n':
917 case EOF:
918 return;
919 default:
920 ch = in.read();
921 }
922 }
923 }
924 }
925
926 /**
927 * Scan an Identifier. The current character should
928 * be the first character of the identifier.
929 */
930 private void scanIdentifier() throws IOException {
931 count = 0;
932
933 while (true) {
934 putc(ch);
935 switch (ch = in.read()) {
936 case 'a': case 'b': case 'c': case 'd': case 'e':
937 case 'f': case 'g': case 'h': case 'i': case 'j':
938 case 'k': case 'l': case 'm': case 'n': case 'o':
939 case 'p': case 'q': case 'r': case 's': case 't':
940 case 'u': case 'v': case 'w': case 'x': case 'y':
941 case 'z':
942 case 'A': case 'B': case 'C': case 'D': case 'E':
943 case 'F': case 'G': case 'H': case 'I': case 'J':
944 case 'K': case 'L': case 'M': case 'N': case 'O':
945 case 'P': case 'Q': case 'R': case 'S': case 'T':
946 case 'U': case 'V': case 'W': case 'X': case 'Y':
947 case 'Z':
948 case '0': case '1': case '2': case '3': case '4':
949 case '5': case '6': case '7': case '8': case '9':
950 case '$': case '_':
951 break;
952
953 default:
954 if (!Character.isJavaLetterOrDigit((char)ch)) {
955 idValue = Identifier.lookup(bufferString());
956 token = idValue.getType();
957 return;
958 }
959 }
960 }
961 }
962
963 /**
964 * The ending position of the current token
965 */
966 // Note: This should be part of the pos itself.
967 public long getEndPos() {
968 return in.pos;
969 }
970
971 /**
972 * If the current token is IDENT, return the identifier occurrence.
973 * It will be freshly allocated.
974 */
975 public IdentifierToken getIdToken() {
976 return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
977 }
978
979 /**
980 * Scan the next token.
981 * @return the position of the previous token.
982 */
983 public long scan() throws IOException {
984 return xscan();
985 }
986
987 protected long xscan() throws IOException {
988 final ScannerInputReader in = this.in;
989 long retPos = pos;
990 prevPos = in.pos;
991 docComment = null;
992 while (true) {
993 pos = in.pos;
994
995 switch (ch) {
996 case EOF:
997 token = EOF;
998 return retPos;
999
1000 case '\n':
1001 if (scanComments) {
1002 ch = ' ';
1003 // Avoid this path the next time around.
1004 // Do not just call in.read; we want to present
1005 // a null token (and also avoid read-ahead).
1006 token = COMMENT;
1007 return retPos;
1008 }
1009 case ' ':
1010 case '\t':
1011 case '\f':
1012 ch = in.read();
1013 break;
1014
1015 case '/':
1016 switch (ch = in.read()) {
1017 case '/':
1018 // Parse a // comment
1019 while (((ch = in.read()) != EOF) && (ch != '\n'));
1020 if (scanComments) {
1021 token = COMMENT;
1022 return retPos;
1023 }
1024 break;
1025
1026 case '*':
1027 ch = in.read();
1028 if (ch == '*') {
1029 docComment = scanDocComment();
1030 } else {
1031 skipComment();
1032 }
1033 if (scanComments) {
1034 return retPos;
1035 }
1036 break;
1037
1038 case '=':
1039 ch = in.read();
1040 token = ASGDIV;
1041 return retPos;
1042
1043 default:
1044 token = DIV;
1045 return retPos;
1046 }
1047 break;
1048
1049 case '"':
1050 scanString();
1051 return retPos;
1052
1053 case '\'':
1054 scanCharacter();
1055 return retPos;
1056
1057 case '0': case '1': case '2': case '3': case '4':
1058 case '5': case '6': case '7': case '8': case '9':
1059 scanNumber();
1060 return retPos;
1061
1062 case '.':
1063 switch (ch = in.read()) {
1064 case '0': case '1': case '2': case '3': case '4':
1065 case '5': case '6': case '7': case '8': case '9':
1066 count = 0;
1067 putc('.');
1068 scanReal();
1069 break;
1070 default:
1071 token = FIELD;
1072 }
1073 return retPos;
1074
1075 case '{':
1076 ch = in.read();
1077 token = LBRACE;
1078 return retPos;
1079
1080 case '}':
1081 ch = in.read();
1082 token = RBRACE;
1083 return retPos;
1084
1085 case '(':
1086 ch = in.read();
1087 token = LPAREN;
1088 return retPos;
1089
1090 case ')':
1091 ch = in.read();
1092 token = RPAREN;
1093 return retPos;
1094
1095 case '[':
1096 ch = in.read();
1097 token = LSQBRACKET;
1098 return retPos;
1099
1100 case ']':
1101 ch = in.read();
1102 token = RSQBRACKET;
1103 return retPos;
1104
1105 case ',':
1106 ch = in.read();
1107 token = COMMA;
1108 return retPos;
1109
1110 case ';':
1111 ch = in.read();
1112 token = SEMICOLON;
1113 return retPos;
1114
1115 case '?':
1116 ch = in.read();
1117 token = QUESTIONMARK;
1118 return retPos;
1119
1120 case '~':
1121 ch = in.read();
1122 token = BITNOT;
1123 return retPos;
1124
1125 case ':':
1126 ch = in.read();
1127 token = COLON;
1128 return retPos;
1129
1130 case '-':
1131 switch (ch = in.read()) {
1132 case '-':
1133 ch = in.read();
1134 token = DEC;
1135 return retPos;
1136
1137 case '=':
1138 ch = in.read();
1139 token = ASGSUB;
1140 return retPos;
1141 }
1142 token = SUB;
1143 return retPos;
1144
1145 case '+':
1146 switch (ch = in.read()) {
1147 case '+':
1148 ch = in.read();
1149 token = INC;
1150 return retPos;
1151
1152 case '=':
1153 ch = in.read();
1154 token = ASGADD;
1155 return retPos;
1156 }
1157 token = ADD;
1158 return retPos;
1159
1160 case '<':
1161 switch (ch = in.read()) {
1162 case '<':
1163 if ((ch = in.read()) == '=') {
1164 ch = in.read();
1165 token = ASGLSHIFT;
1166 return retPos;
1167 }
1168 token = LSHIFT;
1169 return retPos;
1170
1171 case '=':
1172 ch = in.read();
1173 token = LE;
1174 return retPos;
1175 }
1176 token = LT;
1177 return retPos;
1178
1179 case '>':
1180 switch (ch = in.read()) {
1181 case '>':
1182 switch (ch = in.read()) {
1183 case '=':
1184 ch = in.read();
1185 token = ASGRSHIFT;
1186 return retPos;
1187
1188 case '>':
1189 if ((ch = in.read()) == '=') {
1190 ch = in.read();
1191 token = ASGURSHIFT;
1192 return retPos;
1193 }
1194 token = URSHIFT;
1195 return retPos;
1196 }
1197 token = RSHIFT;
1198 return retPos;
1199
1200 case '=':
1201 ch = in.read();
1202 token = GE;
1203 return retPos;
1204 }
1205 token = GT;
1206 return retPos;
1207
1208 case '|':
1209 switch (ch = in.read()) {
1210 case '|':
1211 ch = in.read();
1212 token = OR;
1213 return retPos;
1214
1215 case '=':
1216 ch = in.read();
1217 token = ASGBITOR;
1218 return retPos;
1219 }
1220 token = BITOR;
1221 return retPos;
1222
1223 case '&':
1224 switch (ch = in.read()) {
1225 case '&':
1226 ch = in.read();
1227 token = AND;
1228 return retPos;
1229
1230 case '=':
1231 ch = in.read();
1232 token = ASGBITAND;
1233 return retPos;
1234 }
1235 token = BITAND;
1236 return retPos;
1237
1238 case '=':
1239 if ((ch = in.read()) == '=') {
1240 ch = in.read();
1241 token = EQ;
1242 return retPos;
1243 }
1244 token = ASSIGN;
1245 return retPos;
1246
1247 case '%':
1248 if ((ch = in.read()) == '=') {
1249 ch = in.read();
1250 token = ASGREM;
1251 return retPos;
1252 }
1253 token = REM;
1254 return retPos;
1255
1256 case '^':
1257 if ((ch = in.read()) == '=') {
1258 ch = in.read();
1259 token = ASGBITXOR;
1260 return retPos;
1261 }
1262 token = BITXOR;
1263 return retPos;
1264
1265 case '!':
1266 if ((ch = in.read()) == '=') {
1267 ch = in.read();
1268 token = NE;
1269 return retPos;
1270 }
1271 token = NOT;
1272 return retPos;
1273
1274 case '*':
1275 if ((ch = in.read()) == '=') {
1276 ch = in.read();
1277 token = ASGMUL;
1278 return retPos;
1279 }
1280 token = MUL;
1281 return retPos;
1282
1283 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1284 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1285 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1286 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1287 case 'y': case 'z':
1288 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1289 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1290 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1291 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1292 case 'Y': case 'Z':
1293 case '$': case '_':
1294 scanIdentifier();
1295 return retPos;
1296
1297 case '\u001a':
1298 // Our one concession to DOS.
1299 if ((ch = in.read()) == EOF) {
1300 token = EOF;
1301 return retPos;
1302 }
1303 env.error(pos, "funny.char");
1304 ch = in.read();
1305 break;
1306
1307
1308 default:
1309 if (Character.isJavaLetter((char)ch)) {
1310 scanIdentifier();
1311 return retPos;
1312 }
1313 env.error(pos, "funny.char");
1314 ch = in.read();
1315 break;
1316 }
1317 }
1318 }
1319
1320 /**
1321 * Scan to a matching '}', ']' or ')'. The current token must be
1322 * a '{', '[' or '(';
1323 */
1324 public void match(int open, int close) throws IOException {
1325 int depth = 1;
1326
1327 while (true) {
1328 scan();
1329 if (token == open) {
1330 depth++;
1331 } else if (token == close) {
1332 if (--depth == 0) {
1333 return;
1334 }
1335 } else if (token == EOF) {
1336 env.error(pos, "unbalanced.paren");
1337 return;
1338 }
1339 }
1340 }
1341}