blob: 7c2545015e397dfaab15036004be21786850c9dd [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumf4b1a641994-08-29 12:43:07 +00002Copyright 1991, 1992, 1993, 1994 by Stichting Mathematisch Centrum,
Guido van Rossum9bfef441993-03-29 10:43:31 +00003Amsterdam, The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* Tokenizer implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "pgenheaders.h"
28
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000030
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031#include "tokenizer.h"
32#include "errcode.h"
33
Guido van Rossumf4b1a641994-08-29 12:43:07 +000034extern char *my_readline PROTO((char *));
35/* Return malloc'ed string including trailing \n;
36 empty malloc'ed string for EOF;
37 NULL if interrupted */
38
Guido van Rossum4fe87291992-02-26 15:24:44 +000039/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041
Guido van Rossum3f5da241990-12-20 15:06:42 +000042/* Forward */
43static struct tok_state *tok_new PROTO((void));
44static int tok_nextc PROTO((struct tok_state *tok));
45static void tok_backup PROTO((struct tok_state *tok, int c));
46
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047/* Token names */
48
49char *tok_name[] = {
50 "ENDMARKER",
51 "NAME",
52 "NUMBER",
53 "STRING",
54 "NEWLINE",
55 "INDENT",
56 "DEDENT",
57 "LPAR",
58 "RPAR",
59 "LSQB",
60 "RSQB",
61 "COLON",
62 "COMMA",
63 "SEMI",
64 "PLUS",
65 "MINUS",
66 "STAR",
67 "SLASH",
68 "VBAR",
69 "AMPER",
70 "LESS",
71 "GREATER",
72 "EQUAL",
73 "DOT",
74 "PERCENT",
75 "BACKQUOTE",
76 "LBRACE",
77 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000078 "EQEQUAL",
79 "NOTEQUAL",
80 "LESSEQUAL",
81 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000082 "TILDE",
83 "CIRCUMFLEX",
84 "LEFTSHIFT",
85 "RIGHTSHIFT",
Guido van Rossumfbab9051991-10-20 20:25:03 +000086 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000087 "OP",
88 "<ERRORTOKEN>",
89 "<N_TOKENS>"
90};
91
92
93/* Create and initialize a new tok_state structure */
94
95static struct tok_state *
96tok_new()
97{
98 struct tok_state *tok = NEW(struct tok_state, 1);
99 if (tok == NULL)
100 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000101 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102 tok->done = E_OK;
103 tok->fp = NULL;
104 tok->tabsize = TABSIZE;
105 tok->indent = 0;
106 tok->indstack[0] = 0;
107 tok->atbol = 1;
108 tok->pendin = 0;
109 tok->prompt = tok->nextprompt = NULL;
110 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000111 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 return tok;
113}
114
115
116/* Set up tokenizer for string */
117
118struct tok_state *
119tok_setups(str)
120 char *str;
121{
122 struct tok_state *tok = tok_new();
123 if (tok == NULL)
124 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000125 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return tok;
127}
128
129
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000130/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000131
132struct tok_state *
133tok_setupf(fp, ps1, ps2)
134 FILE *fp;
135 char *ps1, *ps2;
136{
137 struct tok_state *tok = tok_new();
138 if (tok == NULL)
139 return NULL;
140 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
141 DEL(tok);
142 return NULL;
143 }
144 tok->cur = tok->inp = tok->buf;
145 tok->end = tok->buf + BUFSIZ;
146 tok->fp = fp;
147 tok->prompt = ps1;
148 tok->nextprompt = ps2;
149 return tok;
150}
151
152
153/* Free a tok_state structure */
154
155void
156tok_free(tok)
157 struct tok_state *tok;
158{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000159 if (tok->fp != NULL && tok->buf != NULL)
160 DEL(tok->buf);
161 DEL(tok);
162}
163
164
165/* Get next char, updating state; error code goes into tok->done */
166
167static int
168tok_nextc(tok)
169 register struct tok_state *tok;
170{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000171 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000172 if (tok->cur != tok->inp) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000173 return *tok->cur++; /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000174 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000175 if (tok->done != E_OK)
176 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000178 char *end = strchr(tok->inp, '\n');
179 if (end != NULL)
180 end++;
181 else {
182 end = strchr(tok->inp, '\0');
183 if (end == tok->inp) {
184 tok->done = E_EOF;
185 return EOF;
186 }
187 }
188 if (tok->start == NULL)
189 tok->buf = tok->cur;
190 tok->lineno++;
191 tok->inp = end;
192 return *tok->cur++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000193 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000194 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000195 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000196 if (tok->nextprompt != NULL)
197 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000198 if (new == NULL)
199 tok->done = E_INTR;
200 else if (*new == '\0') {
201 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000202 tok->done = E_EOF;
203 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000204 else if (tok->start != NULL) {
205 int start = tok->start - tok->buf;
206 int oldlen = tok->cur - tok->buf;
207 int newlen = oldlen + strlen(new);
208 char *buf = realloc(tok->buf, newlen+1);
209 tok->lineno++;
210 if (buf == NULL) {
211 free(tok->buf);
212 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000213 tok->done = E_NOMEM;
214 return EOF;
215 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000216 tok->buf = buf;
217 tok->cur = tok->buf + oldlen;
218 strcpy(tok->buf + oldlen, new);
219 free(new);
220 tok->inp = tok->buf + newlen;
221 tok->end = tok->inp + 1;
222 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000223 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000224 else {
225 tok->lineno++;
226 if (tok->buf != NULL)
227 free(tok->buf);
228 tok->buf = new;
229 tok->cur = tok->buf;
230 tok->inp = strchr(tok->buf, '\0');
231 tok->end = tok->inp + 1;
232 }
233 }
234 else {
235 int done = 0;
236 int cur = 0;
237 if (tok->start == NULL) {
238 if (tok->buf == NULL) {
239 tok->buf = NEW(char, BUFSIZ);
240 if (tok->buf == NULL) {
241 tok->done = E_NOMEM;
242 return EOF;
243 }
244 tok->end = tok->buf + BUFSIZ;
245 }
246 if (fgets(tok->buf, (int)(tok->end - tok->buf),
247 tok->fp) == NULL) {
248 tok->done = E_EOF;
249 done = 1;
250 }
251 else {
252 tok->done = E_OK;
253 tok->inp = strchr(tok->buf, '\0');
254 done = tok->inp[-1] == '\n';
255 }
256 }
257 else {
258 cur = tok->cur - tok->buf;
259 tok->done = E_OK;
260 }
261 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000262 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000263 while (!done) {
264 int curstart = tok->start == NULL ? -1 :
265 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000266 int curvalid = tok->inp - tok->buf;
267 int cursize = tok->end - tok->buf;
268 int newsize = cursize + BUFSIZ;
269 char *newbuf = tok->buf;
270 RESIZE(newbuf, char, newsize);
271 if (newbuf == NULL) {
272 tok->done = E_NOMEM;
273 tok->cur = tok->inp;
274 return EOF;
275 }
276 tok->buf = newbuf;
277 tok->inp = tok->buf + curvalid;
278 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000279 tok->start = curstart < 0 ? NULL :
280 tok->buf + curstart;
281 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000282 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000283 tok->fp) == NULL) {
284 /* Last line does not end in \n,
285 fake one */
286 strcpy(tok->inp, "\n");
287 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000288 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000289 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000290 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000291 tok->cur = tok->buf + cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292 }
293 if (tok->done != E_OK) {
294 if (tok->prompt != NULL)
295 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000296 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297 return EOF;
298 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000299 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000300 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000301}
302
303
304/* Back-up one character */
305
306static void
307tok_backup(tok, c)
308 register struct tok_state *tok;
309 register int c;
310{
311 if (c != EOF) {
312 if (--tok->cur < tok->buf) {
313 fprintf(stderr, "tok_backup: begin of buffer\n");
314 abort();
315 }
316 if (*tok->cur != c)
317 *tok->cur = c;
318 }
319}
320
321
322/* Return the token corresponding to a single character */
323
324int
325tok_1char(c)
326 int c;
327{
328 switch (c) {
329 case '(': return LPAR;
330 case ')': return RPAR;
331 case '[': return LSQB;
332 case ']': return RSQB;
333 case ':': return COLON;
334 case ',': return COMMA;
335 case ';': return SEMI;
336 case '+': return PLUS;
337 case '-': return MINUS;
338 case '*': return STAR;
339 case '/': return SLASH;
340 case '|': return VBAR;
341 case '&': return AMPER;
342 case '<': return LESS;
343 case '>': return GREATER;
344 case '=': return EQUAL;
345 case '.': return DOT;
346 case '%': return PERCENT;
347 case '`': return BACKQUOTE;
348 case '{': return LBRACE;
349 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000350 case '^': return CIRCUMFLEX;
351 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000352 default: return OP;
353 }
354}
355
356
Guido van Rossumfbab9051991-10-20 20:25:03 +0000357int
358tok_2char(c1, c2)
359 int c1, c2;
360{
361 switch (c1) {
362 case '=':
363 switch (c2) {
364 case '=': return EQEQUAL;
365 }
366 break;
367 case '!':
368 switch (c2) {
369 case '=': return NOTEQUAL;
370 }
371 break;
372 case '<':
373 switch (c2) {
374 case '>': return NOTEQUAL;
375 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000376 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000377 }
378 break;
379 case '>':
380 switch (c2) {
381 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000382 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000383 }
384 break;
385 }
386 return OP;
387}
388
389
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390/* Get next token, after space stripping etc. */
391
392int
393tok_get(tok, p_start, p_end)
394 register struct tok_state *tok; /* In/out: tokenizer state */
395 char **p_start, **p_end; /* Out: point to start/end of token */
396{
397 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000398 int blankline;
399
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000400 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000401 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000402 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000403 blankline = 0;
404
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405 /* Get indentation level */
406 if (tok->atbol) {
407 register int col = 0;
408 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000409 for (;;) {
410 c = tok_nextc(tok);
411 if (c == ' ')
412 col++;
413 else if (c == '\t')
414 col = (col/tok->tabsize + 1) * tok->tabsize;
415 else
416 break;
417 }
418 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000419 if (c == '#' || c == '\n') {
420 /* Lines with only whitespace and/or comments
421 shouldn't affect the indentation and are
422 not passed to the parser as NEWLINE tokens,
423 except *totally* empty lines in interactive
424 mode, which signal the end of a command group. */
425 if (col == 0 && c == '\n' && tok->prompt != NULL)
426 blankline = 0; /* Let it through */
427 else
428 blankline = 1; /* Ignore completely */
429 /* We can't jump back right here since we still
430 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000431 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000432 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000433 if (col == tok->indstack[tok->indent]) {
434 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000436 else if (col > tok->indstack[tok->indent]) {
437 /* Indent -- always one */
438 if (tok->indent+1 >= MAXINDENT) {
439 fprintf(stderr, "excessive indent\n");
440 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000441 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000442 return ERRORTOKEN;
443 }
444 tok->pendin++;
445 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000447 else /* col < tok->indstack[tok->indent] */ {
448 /* Dedent -- any number, must be consistent */
449 while (tok->indent > 0 &&
450 col < tok->indstack[tok->indent]) {
451 tok->indent--;
452 tok->pendin--;
453 }
454 if (col != tok->indstack[tok->indent]) {
455 fprintf(stderr, "inconsistent dedent\n");
456 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000457 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000458 return ERRORTOKEN;
459 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000460 }
461 }
462 }
463
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000464 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000465
466 /* Return pending indents/dedents */
467 if (tok->pendin != 0) {
468 if (tok->pendin < 0) {
469 tok->pendin++;
470 return DEDENT;
471 }
472 else {
473 tok->pendin--;
474 return INDENT;
475 }
476 }
477
478 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000479 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000480 /* Skip spaces */
481 do {
482 c = tok_nextc(tok);
483 } while (c == ' ' || c == '\t');
484
485 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000486 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000487
488 /* Skip comment */
489 if (c == '#') {
490 /* Hack to allow overriding the tabsize in the file.
491 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000492 beginning or end of the file. (Will vi never die...?)
493 For Python it must be at the beginning of the file! */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000494 /* XXX The real vi syntax is actually different :-( */
495 /* XXX Should recognize Emacs syntax, too */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 int x;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000497 if (sscanf(tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000498 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000499 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000500 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501 tok->tabsize = x;
502 }
503 do {
504 c = tok_nextc(tok);
505 } while (c != EOF && c != '\n');
506 }
507
508 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000509 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000511 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000512
513 /* Identifier (most frequent token!) */
514 if (isalpha(c) || c == '_') {
515 do {
516 c = tok_nextc(tok);
517 } while (isalnum(c) || c == '_');
518 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000519 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000520 *p_end = tok->cur;
521 return NAME;
522 }
523
524 /* Newline */
525 if (c == '\n') {
526 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000527 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000528 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000529 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000530 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
531 return NEWLINE;
532 }
533
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000534 /* Period or number starting with period? */
535 if (c == '.') {
536 c = tok_nextc(tok);
537 if (isdigit(c)) {
538 goto fraction;
539 }
540 else {
541 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000542 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000543 *p_end = tok->cur;
544 return DOT;
545 }
546 }
547
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000548 /* Number */
549 if (isdigit(c)) {
550 if (c == '0') {
551 /* Hex or octal */
552 c = tok_nextc(tok);
553 if (c == '.')
554 goto fraction;
555 if (c == 'x' || c == 'X') {
556 /* Hex */
557 do {
558 c = tok_nextc(tok);
559 } while (isxdigit(c));
560 }
561 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000562 /* XXX This is broken! E.g.,
563 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000564 /* Octal; c is first char of it */
565 /* There's no 'isoctdigit' macro, sigh */
566 while ('0' <= c && c < '8') {
567 c = tok_nextc(tok);
568 }
569 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000570 if (c == 'l' || c == 'L')
571 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000572 }
573 else {
574 /* Decimal */
575 do {
576 c = tok_nextc(tok);
577 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000578 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000580 else {
581 /* Accept floating point numbers.
582 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000583 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000584 if (c == '.') {
585 fraction:
586 /* Fraction */
587 do {
588 c = tok_nextc(tok);
589 } while (isdigit(c));
590 }
591 if (c == 'e' || c == 'E') {
592 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000594 if (c == '+' || c == '-')
595 c = tok_nextc(tok);
596 while (isdigit(c)) {
597 c = tok_nextc(tok);
598 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000599 }
600 }
601 }
602 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000603 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000604 *p_end = tok->cur;
605 return NUMBER;
606 }
607
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000608 /* String */
609 if (c == '\'' || c == '"') {
610 int quote = c;
611 int triple = 0;
612 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613 for (;;) {
614 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000615 if (c == '\n') {
616 if (!triple) {
617 tok->done = E_TOKEN;
618 tok_backup(tok, c);
619 return ERRORTOKEN;
620 }
621 tripcount = 0;
622 }
623 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000625 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000626 return ERRORTOKEN;
627 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000628 else if (c == quote) {
629 tripcount++;
630 if (tok->cur == tok->start+2) {
631 c = tok_nextc(tok);
632 if (c == quote) {
633 triple = 1;
634 tripcount = 0;
635 continue;
636 }
637 tok_backup(tok, c);
638 }
639 if (!triple || tripcount == 3)
640 break;
641 }
642 else if (c == '\\') {
643 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000645 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000647 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 return ERRORTOKEN;
649 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000651 else
652 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000654 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000655 *p_end = tok->cur;
656 return STRING;
657 }
658
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000659 /* Line continuation */
660 if (c == '\\') {
661 c = tok_nextc(tok);
662 if (c != '\n') {
663 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000664 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 return ERRORTOKEN;
666 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667 goto again; /* Read next line */
668 }
669
Guido van Rossumfbab9051991-10-20 20:25:03 +0000670 /* Check for two-character token */
671 {
672 int c2 = tok_nextc(tok);
673 int token = tok_2char(c, c2);
674 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000675 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000676 *p_end = tok->cur;
677 return token;
678 }
679 tok_backup(tok, c2);
680 }
681
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000682 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000683 switch (c) {
684 case '(':
685 case '[':
686 case '{':
687 tok->level++;
688 break;
689 case ')':
690 case ']':
691 case '}':
692 tok->level--;
693 break;
694 }
695
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000697 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698 *p_end = tok->cur;
699 return tok_1char(c);
700}
701
702
703#ifdef DEBUG
704
705void
706tok_dump(type, start, end)
707 int type;
708 char *start, *end;
709{
710 printf("%s", tok_name[type]);
711 if (type == NAME || type == NUMBER || type == STRING || type == OP)
712 printf("(%.*s)", (int)(end - start), start);
713}
714
715#endif