blob: 75b63fbcb1b6916ea471af6af0b6e35690e3fa70 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* Tokenizer implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "pgenheaders.h"
28
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000030
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031#include "tokenizer.h"
32#include "errcode.h"
33
Guido van Rossumf4b1a641994-08-29 12:43:07 +000034extern char *my_readline PROTO((char *));
35/* Return malloc'ed string including trailing \n;
36 empty malloc'ed string for EOF;
37 NULL if interrupted */
38
Guido van Rossum4fe87291992-02-26 15:24:44 +000039/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041
Guido van Rossum3f5da241990-12-20 15:06:42 +000042/* Forward */
43static struct tok_state *tok_new PROTO((void));
44static int tok_nextc PROTO((struct tok_state *tok));
45static void tok_backup PROTO((struct tok_state *tok, int c));
46
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047/* Token names */
48
49char *tok_name[] = {
50 "ENDMARKER",
51 "NAME",
52 "NUMBER",
53 "STRING",
54 "NEWLINE",
55 "INDENT",
56 "DEDENT",
57 "LPAR",
58 "RPAR",
59 "LSQB",
60 "RSQB",
61 "COLON",
62 "COMMA",
63 "SEMI",
64 "PLUS",
65 "MINUS",
66 "STAR",
67 "SLASH",
68 "VBAR",
69 "AMPER",
70 "LESS",
71 "GREATER",
72 "EQUAL",
73 "DOT",
74 "PERCENT",
75 "BACKQUOTE",
76 "LBRACE",
77 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000078 "EQEQUAL",
79 "NOTEQUAL",
80 "LESSEQUAL",
81 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000082 "TILDE",
83 "CIRCUMFLEX",
84 "LEFTSHIFT",
85 "RIGHTSHIFT",
Guido van Rossumfbab9051991-10-20 20:25:03 +000086 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000087 "OP",
88 "<ERRORTOKEN>",
89 "<N_TOKENS>"
90};
91
92
93/* Create and initialize a new tok_state structure */
94
95static struct tok_state *
96tok_new()
97{
98 struct tok_state *tok = NEW(struct tok_state, 1);
99 if (tok == NULL)
100 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000101 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102 tok->done = E_OK;
103 tok->fp = NULL;
104 tok->tabsize = TABSIZE;
105 tok->indent = 0;
106 tok->indstack[0] = 0;
107 tok->atbol = 1;
108 tok->pendin = 0;
109 tok->prompt = tok->nextprompt = NULL;
110 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000111 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 return tok;
113}
114
115
116/* Set up tokenizer for string */
117
118struct tok_state *
119tok_setups(str)
120 char *str;
121{
122 struct tok_state *tok = tok_new();
123 if (tok == NULL)
124 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000125 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return tok;
127}
128
129
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000130/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000131
132struct tok_state *
133tok_setupf(fp, ps1, ps2)
134 FILE *fp;
135 char *ps1, *ps2;
136{
137 struct tok_state *tok = tok_new();
138 if (tok == NULL)
139 return NULL;
140 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
141 DEL(tok);
142 return NULL;
143 }
144 tok->cur = tok->inp = tok->buf;
145 tok->end = tok->buf + BUFSIZ;
146 tok->fp = fp;
147 tok->prompt = ps1;
148 tok->nextprompt = ps2;
149 return tok;
150}
151
152
153/* Free a tok_state structure */
154
155void
156tok_free(tok)
157 struct tok_state *tok;
158{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000159 if (tok->fp != NULL && tok->buf != NULL)
160 DEL(tok->buf);
161 DEL(tok);
162}
163
164
165/* Get next char, updating state; error code goes into tok->done */
166
167static int
168tok_nextc(tok)
169 register struct tok_state *tok;
170{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000171 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000172 if (tok->cur != tok->inp) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000173 return *tok->cur++; /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000174 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000175 if (tok->done != E_OK)
176 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000178 char *end = strchr(tok->inp, '\n');
179 if (end != NULL)
180 end++;
181 else {
182 end = strchr(tok->inp, '\0');
183 if (end == tok->inp) {
184 tok->done = E_EOF;
185 return EOF;
186 }
187 }
188 if (tok->start == NULL)
189 tok->buf = tok->cur;
190 tok->lineno++;
191 tok->inp = end;
192 return *tok->cur++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000193 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000194 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000195 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000196 if (tok->nextprompt != NULL)
197 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000198 if (new == NULL)
199 tok->done = E_INTR;
200 else if (*new == '\0') {
201 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000202 tok->done = E_EOF;
203 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000204 else if (tok->start != NULL) {
205 int start = tok->start - tok->buf;
206 int oldlen = tok->cur - tok->buf;
207 int newlen = oldlen + strlen(new);
208 char *buf = realloc(tok->buf, newlen+1);
209 tok->lineno++;
210 if (buf == NULL) {
211 free(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000212 tok->buf = NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000213 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000214 tok->done = E_NOMEM;
215 return EOF;
216 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000217 tok->buf = buf;
218 tok->cur = tok->buf + oldlen;
219 strcpy(tok->buf + oldlen, new);
220 free(new);
221 tok->inp = tok->buf + newlen;
222 tok->end = tok->inp + 1;
223 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000224 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 else {
226 tok->lineno++;
227 if (tok->buf != NULL)
228 free(tok->buf);
229 tok->buf = new;
230 tok->cur = tok->buf;
231 tok->inp = strchr(tok->buf, '\0');
232 tok->end = tok->inp + 1;
233 }
234 }
235 else {
236 int done = 0;
237 int cur = 0;
238 if (tok->start == NULL) {
239 if (tok->buf == NULL) {
240 tok->buf = NEW(char, BUFSIZ);
241 if (tok->buf == NULL) {
242 tok->done = E_NOMEM;
243 return EOF;
244 }
245 tok->end = tok->buf + BUFSIZ;
246 }
247 if (fgets(tok->buf, (int)(tok->end - tok->buf),
248 tok->fp) == NULL) {
249 tok->done = E_EOF;
250 done = 1;
251 }
252 else {
253 tok->done = E_OK;
254 tok->inp = strchr(tok->buf, '\0');
255 done = tok->inp[-1] == '\n';
256 }
257 }
258 else {
259 cur = tok->cur - tok->buf;
260 tok->done = E_OK;
261 }
262 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000263 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000264 while (!done) {
265 int curstart = tok->start == NULL ? -1 :
266 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000267 int curvalid = tok->inp - tok->buf;
268 int cursize = tok->end - tok->buf;
269 int newsize = cursize + BUFSIZ;
270 char *newbuf = tok->buf;
271 RESIZE(newbuf, char, newsize);
272 if (newbuf == NULL) {
273 tok->done = E_NOMEM;
274 tok->cur = tok->inp;
275 return EOF;
276 }
277 tok->buf = newbuf;
278 tok->inp = tok->buf + curvalid;
279 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000280 tok->start = curstart < 0 ? NULL :
281 tok->buf + curstart;
282 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000283 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000284 tok->fp) == NULL) {
285 /* Last line does not end in \n,
286 fake one */
287 strcpy(tok->inp, "\n");
288 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000289 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000290 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000291 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000292 tok->cur = tok->buf + cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000293 }
294 if (tok->done != E_OK) {
295 if (tok->prompt != NULL)
296 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000297 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 return EOF;
299 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000301 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000302}
303
304
305/* Back-up one character */
306
307static void
308tok_backup(tok, c)
309 register struct tok_state *tok;
310 register int c;
311{
312 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000313 if (--tok->cur < tok->buf)
314 fatal("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315 if (*tok->cur != c)
316 *tok->cur = c;
317 }
318}
319
320
321/* Return the token corresponding to a single character */
322
323int
324tok_1char(c)
325 int c;
326{
327 switch (c) {
328 case '(': return LPAR;
329 case ')': return RPAR;
330 case '[': return LSQB;
331 case ']': return RSQB;
332 case ':': return COLON;
333 case ',': return COMMA;
334 case ';': return SEMI;
335 case '+': return PLUS;
336 case '-': return MINUS;
337 case '*': return STAR;
338 case '/': return SLASH;
339 case '|': return VBAR;
340 case '&': return AMPER;
341 case '<': return LESS;
342 case '>': return GREATER;
343 case '=': return EQUAL;
344 case '.': return DOT;
345 case '%': return PERCENT;
346 case '`': return BACKQUOTE;
347 case '{': return LBRACE;
348 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000349 case '^': return CIRCUMFLEX;
350 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351 default: return OP;
352 }
353}
354
355
Guido van Rossumfbab9051991-10-20 20:25:03 +0000356int
357tok_2char(c1, c2)
358 int c1, c2;
359{
360 switch (c1) {
361 case '=':
362 switch (c2) {
363 case '=': return EQEQUAL;
364 }
365 break;
366 case '!':
367 switch (c2) {
368 case '=': return NOTEQUAL;
369 }
370 break;
371 case '<':
372 switch (c2) {
373 case '>': return NOTEQUAL;
374 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000375 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000376 }
377 break;
378 case '>':
379 switch (c2) {
380 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000381 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000382 }
383 break;
384 }
385 return OP;
386}
387
388
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000389/* Get next token, after space stripping etc. */
390
391int
392tok_get(tok, p_start, p_end)
393 register struct tok_state *tok; /* In/out: tokenizer state */
394 char **p_start, **p_end; /* Out: point to start/end of token */
395{
396 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000397 int blankline;
398
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000399 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000400 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000401 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000402 blankline = 0;
403
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404 /* Get indentation level */
405 if (tok->atbol) {
406 register int col = 0;
407 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408 for (;;) {
409 c = tok_nextc(tok);
410 if (c == ' ')
411 col++;
412 else if (c == '\t')
413 col = (col/tok->tabsize + 1) * tok->tabsize;
414 else
415 break;
416 }
417 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000418 if (c == '#' || c == '\n') {
419 /* Lines with only whitespace and/or comments
420 shouldn't affect the indentation and are
421 not passed to the parser as NEWLINE tokens,
422 except *totally* empty lines in interactive
423 mode, which signal the end of a command group. */
424 if (col == 0 && c == '\n' && tok->prompt != NULL)
425 blankline = 0; /* Let it through */
426 else
427 blankline = 1; /* Ignore completely */
428 /* We can't jump back right here since we still
429 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000430 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000431 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000432 if (col == tok->indstack[tok->indent]) {
433 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000434 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000435 else if (col > tok->indstack[tok->indent]) {
436 /* Indent -- always one */
437 if (tok->indent+1 >= MAXINDENT) {
438 fprintf(stderr, "excessive indent\n");
439 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000440 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000441 return ERRORTOKEN;
442 }
443 tok->pendin++;
444 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000445 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000446 else /* col < tok->indstack[tok->indent] */ {
447 /* Dedent -- any number, must be consistent */
448 while (tok->indent > 0 &&
449 col < tok->indstack[tok->indent]) {
450 tok->indent--;
451 tok->pendin--;
452 }
453 if (col != tok->indstack[tok->indent]) {
454 fprintf(stderr, "inconsistent dedent\n");
455 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000456 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000457 return ERRORTOKEN;
458 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459 }
460 }
461 }
462
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000463 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464
465 /* Return pending indents/dedents */
466 if (tok->pendin != 0) {
467 if (tok->pendin < 0) {
468 tok->pendin++;
469 return DEDENT;
470 }
471 else {
472 tok->pendin--;
473 return INDENT;
474 }
475 }
476
477 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000478 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479 /* Skip spaces */
480 do {
481 c = tok_nextc(tok);
482 } while (c == ' ' || c == '\t');
483
484 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000485 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000486
487 /* Skip comment */
488 if (c == '#') {
489 /* Hack to allow overriding the tabsize in the file.
490 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000491 beginning or end of the file. (Will vi never die...?)
492 For Python it must be at the beginning of the file! */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000493 /* XXX The real vi syntax is actually different :-( */
494 /* XXX Should recognize Emacs syntax, too */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000495 int x;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000496 if (sscanf(tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000497 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000499 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000500 tok->tabsize = x;
501 }
502 do {
503 c = tok_nextc(tok);
504 } while (c != EOF && c != '\n');
505 }
506
507 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000508 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000510 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000511
512 /* Identifier (most frequent token!) */
513 if (isalpha(c) || c == '_') {
514 do {
515 c = tok_nextc(tok);
516 } while (isalnum(c) || c == '_');
517 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000518 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519 *p_end = tok->cur;
520 return NAME;
521 }
522
523 /* Newline */
524 if (c == '\n') {
525 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000526 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000527 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000528 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
530 return NEWLINE;
531 }
532
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000533 /* Period or number starting with period? */
534 if (c == '.') {
535 c = tok_nextc(tok);
536 if (isdigit(c)) {
537 goto fraction;
538 }
539 else {
540 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000541 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000542 *p_end = tok->cur;
543 return DOT;
544 }
545 }
546
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000547 /* Number */
548 if (isdigit(c)) {
549 if (c == '0') {
550 /* Hex or octal */
551 c = tok_nextc(tok);
552 if (c == '.')
553 goto fraction;
554 if (c == 'x' || c == 'X') {
555 /* Hex */
556 do {
557 c = tok_nextc(tok);
558 } while (isxdigit(c));
559 }
560 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000561 /* XXX This is broken! E.g.,
562 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000563 /* Octal; c is first char of it */
564 /* There's no 'isoctdigit' macro, sigh */
565 while ('0' <= c && c < '8') {
566 c = tok_nextc(tok);
567 }
568 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000569 if (c == 'l' || c == 'L')
570 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000571 }
572 else {
573 /* Decimal */
574 do {
575 c = tok_nextc(tok);
576 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000577 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000578 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000579 else {
580 /* Accept floating point numbers.
581 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000582 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000583 if (c == '.') {
584 fraction:
585 /* Fraction */
586 do {
587 c = tok_nextc(tok);
588 } while (isdigit(c));
589 }
590 if (c == 'e' || c == 'E') {
591 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000592 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000593 if (c == '+' || c == '-')
594 c = tok_nextc(tok);
595 while (isdigit(c)) {
596 c = tok_nextc(tok);
597 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000598 }
599 }
600 }
601 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000602 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000603 *p_end = tok->cur;
604 return NUMBER;
605 }
606
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000607 /* String */
608 if (c == '\'' || c == '"') {
609 int quote = c;
610 int triple = 0;
611 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 for (;;) {
613 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000614 if (c == '\n') {
615 if (!triple) {
616 tok->done = E_TOKEN;
617 tok_backup(tok, c);
618 return ERRORTOKEN;
619 }
620 tripcount = 0;
621 }
622 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000624 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625 return ERRORTOKEN;
626 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000627 else if (c == quote) {
628 tripcount++;
629 if (tok->cur == tok->start+2) {
630 c = tok_nextc(tok);
631 if (c == quote) {
632 triple = 1;
633 tripcount = 0;
634 continue;
635 }
636 tok_backup(tok, c);
637 }
638 if (!triple || tripcount == 3)
639 break;
640 }
641 else if (c == '\\') {
642 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000644 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000645 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000646 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000647 return ERRORTOKEN;
648 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000650 else
651 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000652 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000653 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000654 *p_end = tok->cur;
655 return STRING;
656 }
657
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658 /* Line continuation */
659 if (c == '\\') {
660 c = tok_nextc(tok);
661 if (c != '\n') {
662 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000663 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000664 return ERRORTOKEN;
665 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000666 goto again; /* Read next line */
667 }
668
Guido van Rossumfbab9051991-10-20 20:25:03 +0000669 /* Check for two-character token */
670 {
671 int c2 = tok_nextc(tok);
672 int token = tok_2char(c, c2);
673 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000674 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000675 *p_end = tok->cur;
676 return token;
677 }
678 tok_backup(tok, c2);
679 }
680
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000681 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000682 switch (c) {
683 case '(':
684 case '[':
685 case '{':
686 tok->level++;
687 break;
688 case ')':
689 case ']':
690 case '}':
691 tok->level--;
692 break;
693 }
694
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000696 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697 *p_end = tok->cur;
698 return tok_1char(c);
699}
700
701
702#ifdef DEBUG
703
704void
705tok_dump(type, start, end)
706 int type;
707 char *start, *end;
708{
709 printf("%s", tok_name[type]);
710 if (type == NAME || type == NUMBER || type == STRING || type == OP)
711 printf("(%.*s)", (int)(end - start), start);
712}
713
714#endif