blob: 6504e0cec87b0519b7cb4aa1f3917455e51e8d7a [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum9bfef441993-03-29 10:43:31 +00002Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
3Amsterdam, The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* Tokenizer implementation */
26
27/* XXX This is rather old, should be restructured perhaps */
28/* XXX Need a better interface to report errors than writing to stderr */
Guido van Rossum3f5da241990-12-20 15:06:42 +000029/* XXX Should use editor resource to fetch true tab size on Macintosh */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000030
Guido van Rossum3f5da241990-12-20 15:06:42 +000031#include "pgenheaders.h"
32
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000033#include <ctype.h>
34#include "string.h"
35
Guido van Rossum3f5da241990-12-20 15:06:42 +000036#include "fgetsintr.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037#include "tokenizer.h"
38#include "errcode.h"
39
Guido van Rossum4fe87291992-02-26 15:24:44 +000040/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000042
Guido van Rossum3f5da241990-12-20 15:06:42 +000043/* Forward */
44static struct tok_state *tok_new PROTO((void));
45static int tok_nextc PROTO((struct tok_state *tok));
46static void tok_backup PROTO((struct tok_state *tok, int c));
47
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048/* Token names */
49
50char *tok_name[] = {
51 "ENDMARKER",
52 "NAME",
53 "NUMBER",
54 "STRING",
55 "NEWLINE",
56 "INDENT",
57 "DEDENT",
58 "LPAR",
59 "RPAR",
60 "LSQB",
61 "RSQB",
62 "COLON",
63 "COMMA",
64 "SEMI",
65 "PLUS",
66 "MINUS",
67 "STAR",
68 "SLASH",
69 "VBAR",
70 "AMPER",
71 "LESS",
72 "GREATER",
73 "EQUAL",
74 "DOT",
75 "PERCENT",
76 "BACKQUOTE",
77 "LBRACE",
78 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000079 "EQEQUAL",
80 "NOTEQUAL",
81 "LESSEQUAL",
82 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000083 "TILDE",
84 "CIRCUMFLEX",
85 "LEFTSHIFT",
86 "RIGHTSHIFT",
Guido van Rossumfbab9051991-10-20 20:25:03 +000087 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088 "OP",
89 "<ERRORTOKEN>",
90 "<N_TOKENS>"
91};
92
93
94/* Create and initialize a new tok_state structure */
95
96static struct tok_state *
97tok_new()
98{
99 struct tok_state *tok = NEW(struct tok_state, 1);
100 if (tok == NULL)
101 return NULL;
102 tok->buf = tok->cur = tok->end = tok->inp = NULL;
103 tok->done = E_OK;
104 tok->fp = NULL;
105 tok->tabsize = TABSIZE;
106 tok->indent = 0;
107 tok->indstack[0] = 0;
108 tok->atbol = 1;
109 tok->pendin = 0;
110 tok->prompt = tok->nextprompt = NULL;
111 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000112 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000113 return tok;
114}
115
116
117/* Set up tokenizer for string */
118
119struct tok_state *
120tok_setups(str)
121 char *str;
122{
123 struct tok_state *tok = tok_new();
124 if (tok == NULL)
125 return NULL;
126 tok->buf = tok->cur = str;
127 tok->end = tok->inp = strchr(str, '\0');
128 return tok;
129}
130
131
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000132/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133
134struct tok_state *
135tok_setupf(fp, ps1, ps2)
136 FILE *fp;
137 char *ps1, *ps2;
138{
139 struct tok_state *tok = tok_new();
140 if (tok == NULL)
141 return NULL;
142 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
143 DEL(tok);
144 return NULL;
145 }
146 tok->cur = tok->inp = tok->buf;
147 tok->end = tok->buf + BUFSIZ;
148 tok->fp = fp;
149 tok->prompt = ps1;
150 tok->nextprompt = ps2;
151 return tok;
152}
153
154
155/* Free a tok_state structure */
156
157void
158tok_free(tok)
159 struct tok_state *tok;
160{
161 /* XXX really need a separate flag to say 'my buffer' */
162 if (tok->fp != NULL && tok->buf != NULL)
163 DEL(tok->buf);
164 DEL(tok);
165}
166
167
168/* Get next char, updating state; error code goes into tok->done */
169
170static int
171tok_nextc(tok)
172 register struct tok_state *tok;
173{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000174 for (;;) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000175 if (tok->cur != tok->inp)
176 return *tok->cur++; /* Fast path */
177 if (tok->done != E_OK)
178 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 if (tok->fp == NULL) {
180 tok->done = E_EOF;
181 return EOF;
182 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000183#ifdef USE_READLINE
184 if (tok->prompt != NULL) {
185 extern char *readline PROTO((char *prompt));
186 static int been_here;
187 if (!been_here) {
188 /* Force rebind of TAB to insert-tab */
189 extern int rl_insert();
190 rl_bind_key('\t', rl_insert);
191 been_here++;
192 }
193 if (tok->buf != NULL)
194 free(tok->buf);
195 tok->buf = readline(tok->prompt);
196 (void) intrcheck(); /* Clear pending interrupt */
197 if (tok->nextprompt != NULL)
198 tok->prompt = tok->nextprompt;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000199 if (tok->buf == NULL) {
200 tok->done = E_EOF;
201 }
202 else {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000203 tok->end = strchr(tok->buf, '\0');
204 if (tok->end > tok->buf)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 add_history(tok->buf);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000206 /* Replace trailing '\n' by '\0'
207 (we don't need a '\0', but the
208 tokenizer wants a '\n'...) */
209 *tok->end++ = '\n';
210 tok->inp = tok->end;
211 tok->cur = tok->buf;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000212 }
213 }
214 else
215#endif
216 {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000217 if (tok->prompt != NULL) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218 fprintf(stderr, "%s", tok->prompt);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000219 if (tok->nextprompt != NULL)
220 tok->prompt = tok->nextprompt;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000221 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000222 if (tok->buf == NULL) {
223 tok->buf = NEW(char, BUFSIZ);
224 if (tok->buf == NULL) {
225 tok->done = E_NOMEM;
226 return EOF;
227 }
228 tok->end = tok->buf + BUFSIZ;
229 }
230 tok->done = fgets_intr(tok->buf,
231 (int)(tok->end - tok->buf), tok->fp);
232 tok->inp = strchr(tok->buf, '\0');
233 /* Read until '\n' or EOF */
234 while (tok->inp+1==tok->end && tok->inp[-1]!='\n') {
235 int curvalid = tok->inp - tok->buf;
236 int cursize = tok->end - tok->buf;
237 int newsize = cursize + BUFSIZ;
238 char *newbuf = tok->buf;
239 RESIZE(newbuf, char, newsize);
240 if (newbuf == NULL) {
241 tok->done = E_NOMEM;
242 tok->cur = tok->inp;
243 return EOF;
244 }
245 tok->buf = newbuf;
246 tok->inp = tok->buf + curvalid;
247 tok->end = tok->buf + newsize;
248 if (fgets_intr(tok->inp,
249 (int)(tok->end - tok->inp),
250 tok->fp) != E_OK)
251 break;
252 tok->inp = strchr(tok->inp, '\0');
253 }
254 tok->cur = tok->buf;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000255 }
256 if (tok->done != E_OK) {
257 if (tok->prompt != NULL)
258 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000259 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260 return EOF;
261 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000263 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264}
265
266
267/* Back-up one character */
268
269static void
270tok_backup(tok, c)
271 register struct tok_state *tok;
272 register int c;
273{
274 if (c != EOF) {
275 if (--tok->cur < tok->buf) {
276 fprintf(stderr, "tok_backup: begin of buffer\n");
277 abort();
278 }
279 if (*tok->cur != c)
280 *tok->cur = c;
281 }
282}
283
284
285/* Return the token corresponding to a single character */
286
287int
288tok_1char(c)
289 int c;
290{
291 switch (c) {
292 case '(': return LPAR;
293 case ')': return RPAR;
294 case '[': return LSQB;
295 case ']': return RSQB;
296 case ':': return COLON;
297 case ',': return COMMA;
298 case ';': return SEMI;
299 case '+': return PLUS;
300 case '-': return MINUS;
301 case '*': return STAR;
302 case '/': return SLASH;
303 case '|': return VBAR;
304 case '&': return AMPER;
305 case '<': return LESS;
306 case '>': return GREATER;
307 case '=': return EQUAL;
308 case '.': return DOT;
309 case '%': return PERCENT;
310 case '`': return BACKQUOTE;
311 case '{': return LBRACE;
312 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000313 case '^': return CIRCUMFLEX;
314 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315 default: return OP;
316 }
317}
318
319
Guido van Rossumfbab9051991-10-20 20:25:03 +0000320int
321tok_2char(c1, c2)
322 int c1, c2;
323{
324 switch (c1) {
325 case '=':
326 switch (c2) {
327 case '=': return EQEQUAL;
328 }
329 break;
330 case '!':
331 switch (c2) {
332 case '=': return NOTEQUAL;
333 }
334 break;
335 case '<':
336 switch (c2) {
337 case '>': return NOTEQUAL;
338 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000339 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000340 }
341 break;
342 case '>':
343 switch (c2) {
344 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000345 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000346 }
347 break;
348 }
349 return OP;
350}
351
352
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353/* Get next token, after space stripping etc. */
354
355int
356tok_get(tok, p_start, p_end)
357 register struct tok_state *tok; /* In/out: tokenizer state */
358 char **p_start, **p_end; /* Out: point to start/end of token */
359{
360 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000361 int blankline;
362
363 nextline:
364 blankline = 0;
365
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000366 /* Get indentation level */
367 if (tok->atbol) {
368 register int col = 0;
369 tok->atbol = 0;
370 tok->lineno++;
371 for (;;) {
372 c = tok_nextc(tok);
373 if (c == ' ')
374 col++;
375 else if (c == '\t')
376 col = (col/tok->tabsize + 1) * tok->tabsize;
377 else
378 break;
379 }
380 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000381 if (c == '#' || c == '\n') {
382 /* Lines with only whitespace and/or comments
383 shouldn't affect the indentation and are
384 not passed to the parser as NEWLINE tokens,
385 except *totally* empty lines in interactive
386 mode, which signal the end of a command group. */
387 if (col == 0 && c == '\n' && tok->prompt != NULL)
388 blankline = 0; /* Let it through */
389 else
390 blankline = 1; /* Ignore completely */
391 /* We can't jump back right here since we still
392 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000393 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000394 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000395 if (col == tok->indstack[tok->indent]) {
396 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000398 else if (col > tok->indstack[tok->indent]) {
399 /* Indent -- always one */
400 if (tok->indent+1 >= MAXINDENT) {
401 fprintf(stderr, "excessive indent\n");
402 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000403 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000404 return ERRORTOKEN;
405 }
406 tok->pendin++;
407 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000409 else /* col < tok->indstack[tok->indent] */ {
410 /* Dedent -- any number, must be consistent */
411 while (tok->indent > 0 &&
412 col < tok->indstack[tok->indent]) {
413 tok->indent--;
414 tok->pendin--;
415 }
416 if (col != tok->indstack[tok->indent]) {
417 fprintf(stderr, "inconsistent dedent\n");
418 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000419 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000420 return ERRORTOKEN;
421 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000422 }
423 }
424 }
425
426 *p_start = *p_end = tok->cur;
427
428 /* Return pending indents/dedents */
429 if (tok->pendin != 0) {
430 if (tok->pendin < 0) {
431 tok->pendin++;
432 return DEDENT;
433 }
434 else {
435 tok->pendin--;
436 return INDENT;
437 }
438 }
439
440 again:
441 /* Skip spaces */
442 do {
443 c = tok_nextc(tok);
444 } while (c == ' ' || c == '\t');
445
446 /* Set start of current token */
447 *p_start = tok->cur - 1;
448
449 /* Skip comment */
450 if (c == '#') {
451 /* Hack to allow overriding the tabsize in the file.
452 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000453 beginning or end of the file. (Will vi never die...?)
454 For Python it must be at the beginning of the file! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000455 int x;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000456 /* XXX The cast to (unsigned char *) is needed by THINK C 3.0 */
Guido van Rossumb156d721990-12-20 23:13:00 +0000457 if (sscanf(/*(unsigned char *)*/tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000458 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000460 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000461 tok->tabsize = x;
462 }
463 do {
464 c = tok_nextc(tok);
465 } while (c != EOF && c != '\n');
466 }
467
468 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000469 if (c == EOF) {
470 *p_start = *p_end = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000471 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000472 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473
474 /* Identifier (most frequent token!) */
475 if (isalpha(c) || c == '_') {
476 do {
477 c = tok_nextc(tok);
478 } while (isalnum(c) || c == '_');
479 tok_backup(tok, c);
480 *p_end = tok->cur;
481 return NAME;
482 }
483
484 /* Newline */
485 if (c == '\n') {
486 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000487 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000488 goto nextline;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000489 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
490 return NEWLINE;
491 }
492
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000493 /* Period or number starting with period? */
494 if (c == '.') {
495 c = tok_nextc(tok);
496 if (isdigit(c)) {
497 goto fraction;
498 }
499 else {
500 tok_backup(tok, c);
501 *p_end = tok->cur;
502 return DOT;
503 }
504 }
505
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000506 /* Number */
507 if (isdigit(c)) {
508 if (c == '0') {
509 /* Hex or octal */
510 c = tok_nextc(tok);
511 if (c == '.')
512 goto fraction;
513 if (c == 'x' || c == 'X') {
514 /* Hex */
515 do {
516 c = tok_nextc(tok);
517 } while (isxdigit(c));
518 }
519 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000520 /* XXX This is broken! E.g.,
521 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522 /* Octal; c is first char of it */
523 /* There's no 'isoctdigit' macro, sigh */
524 while ('0' <= c && c < '8') {
525 c = tok_nextc(tok);
526 }
527 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000528 if (c == 'l' || c == 'L')
529 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000530 }
531 else {
532 /* Decimal */
533 do {
534 c = tok_nextc(tok);
535 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000536 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000537 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000538 else {
539 /* Accept floating point numbers.
540 XXX This accepts incomplete things like
541 XXX 12e or 1e+; worry run-time.
542 XXX Doesn't accept numbers
543 XXX starting with a dot */
544 if (c == '.') {
545 fraction:
546 /* Fraction */
547 do {
548 c = tok_nextc(tok);
549 } while (isdigit(c));
550 }
551 if (c == 'e' || c == 'E') {
552 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000553 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000554 if (c == '+' || c == '-')
555 c = tok_nextc(tok);
556 while (isdigit(c)) {
557 c = tok_nextc(tok);
558 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559 }
560 }
561 }
562 tok_backup(tok, c);
563 *p_end = tok->cur;
564 return NUMBER;
565 }
566
Guido van Rossum8054fad1993-10-26 15:19:44 +0000567 /* String (single quotes) */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 if (c == '\'') {
569 for (;;) {
570 c = tok_nextc(tok);
571 if (c == '\n' || c == EOF) {
572 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000573 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000574 return ERRORTOKEN;
575 }
576 if (c == '\\') {
577 c = tok_nextc(tok);
578 *p_end = tok->cur;
579 if (c == '\n' || c == EOF) {
580 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000581 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582 return ERRORTOKEN;
583 }
584 continue;
585 }
586 if (c == '\'')
587 break;
588 }
589 *p_end = tok->cur;
590 return STRING;
591 }
592
Guido van Rossum8054fad1993-10-26 15:19:44 +0000593 /* String (double quotes) */
594 if (c == '\"') {
595 for (;;) {
596 c = tok_nextc(tok);
597 if (c == '\n' || c == EOF) {
598 tok->done = E_TOKEN;
599 tok->cur = tok->inp;
600 return ERRORTOKEN;
601 }
602 if (c == '\\') {
603 c = tok_nextc(tok);
604 *p_end = tok->cur;
605 if (c == '\n' || c == EOF) {
606 tok->done = E_TOKEN;
607 tok->cur = tok->inp;
608 return ERRORTOKEN;
609 }
610 continue;
611 }
612 if (c == '\"')
613 break;
614 }
615 *p_end = tok->cur;
616 return STRING;
617 }
618
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000619 /* Line continuation */
620 if (c == '\\') {
621 c = tok_nextc(tok);
622 if (c != '\n') {
623 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000624 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625 return ERRORTOKEN;
626 }
Guido van Rossuma7691721990-11-09 15:08:39 +0000627 tok->lineno++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000628 goto again; /* Read next line */
629 }
630
Guido van Rossumfbab9051991-10-20 20:25:03 +0000631 /* Check for two-character token */
632 {
633 int c2 = tok_nextc(tok);
634 int token = tok_2char(c, c2);
635 if (token != OP) {
636 *p_end = tok->cur;
637 return token;
638 }
639 tok_backup(tok, c2);
640 }
641
Guido van Rossuma849b831993-05-12 11:35:44 +0000642 /* Keep track of parenteses nesting level */
643 switch (c) {
644 case '(':
645 case '[':
646 case '{':
647 tok->level++;
648 break;
649 case ')':
650 case ']':
651 case '}':
652 tok->level--;
653 break;
654 }
655
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000656 /* Punctuation character */
657 *p_end = tok->cur;
658 return tok_1char(c);
659}
660
661
662#ifdef DEBUG
663
664void
665tok_dump(type, start, end)
666 int type;
667 char *start, *end;
668{
669 printf("%s", tok_name[type]);
670 if (type == NAME || type == NUMBER || type == STRING || type == OP)
671 printf("(%.*s)", (int)(end - start), start);
672}
673
674#endif