blob: 8b1b085961a563fa0685d0dbc554d6edfd3ec65c [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumf4b1a641994-08-29 12:43:07 +00002Copyright 1991, 1992, 1993, 1994 by Stichting Mathematisch Centrum,
Guido van Rossum9bfef441993-03-29 10:43:31 +00003Amsterdam, The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* Tokenizer implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "pgenheaders.h"
28
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000030
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031#include "tokenizer.h"
32#include "errcode.h"
33
Guido van Rossumf4b1a641994-08-29 12:43:07 +000034extern char *my_readline PROTO((char *));
35/* Return malloc'ed string including trailing \n;
36 empty malloc'ed string for EOF;
37 NULL if interrupted */
38
Guido van Rossum4fe87291992-02-26 15:24:44 +000039/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041
Guido van Rossum3f5da241990-12-20 15:06:42 +000042/* Forward */
43static struct tok_state *tok_new PROTO((void));
44static int tok_nextc PROTO((struct tok_state *tok));
45static void tok_backup PROTO((struct tok_state *tok, int c));
46
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047/* Token names */
48
49char *tok_name[] = {
50 "ENDMARKER",
51 "NAME",
52 "NUMBER",
53 "STRING",
54 "NEWLINE",
55 "INDENT",
56 "DEDENT",
57 "LPAR",
58 "RPAR",
59 "LSQB",
60 "RSQB",
61 "COLON",
62 "COMMA",
63 "SEMI",
64 "PLUS",
65 "MINUS",
66 "STAR",
67 "SLASH",
68 "VBAR",
69 "AMPER",
70 "LESS",
71 "GREATER",
72 "EQUAL",
73 "DOT",
74 "PERCENT",
75 "BACKQUOTE",
76 "LBRACE",
77 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000078 "EQEQUAL",
79 "NOTEQUAL",
80 "LESSEQUAL",
81 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000082 "TILDE",
83 "CIRCUMFLEX",
84 "LEFTSHIFT",
85 "RIGHTSHIFT",
Guido van Rossumfbab9051991-10-20 20:25:03 +000086 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000087 "OP",
88 "<ERRORTOKEN>",
89 "<N_TOKENS>"
90};
91
92
93/* Create and initialize a new tok_state structure */
94
95static struct tok_state *
96tok_new()
97{
98 struct tok_state *tok = NEW(struct tok_state, 1);
99 if (tok == NULL)
100 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000101 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102 tok->done = E_OK;
103 tok->fp = NULL;
104 tok->tabsize = TABSIZE;
105 tok->indent = 0;
106 tok->indstack[0] = 0;
107 tok->atbol = 1;
108 tok->pendin = 0;
109 tok->prompt = tok->nextprompt = NULL;
110 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000111 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 return tok;
113}
114
115
116/* Set up tokenizer for string */
117
118struct tok_state *
119tok_setups(str)
120 char *str;
121{
122 struct tok_state *tok = tok_new();
123 if (tok == NULL)
124 return NULL;
125 tok->buf = tok->cur = str;
126 tok->end = tok->inp = strchr(str, '\0');
127 return tok;
128}
129
130
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000131/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000132
133struct tok_state *
134tok_setupf(fp, ps1, ps2)
135 FILE *fp;
136 char *ps1, *ps2;
137{
138 struct tok_state *tok = tok_new();
139 if (tok == NULL)
140 return NULL;
141 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
142 DEL(tok);
143 return NULL;
144 }
145 tok->cur = tok->inp = tok->buf;
146 tok->end = tok->buf + BUFSIZ;
147 tok->fp = fp;
148 tok->prompt = ps1;
149 tok->nextprompt = ps2;
150 return tok;
151}
152
153
154/* Free a tok_state structure */
155
156void
157tok_free(tok)
158 struct tok_state *tok;
159{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000160 if (tok->fp != NULL && tok->buf != NULL)
161 DEL(tok->buf);
162 DEL(tok);
163}
164
165
166/* Get next char, updating state; error code goes into tok->done */
167
168static int
169tok_nextc(tok)
170 register struct tok_state *tok;
171{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000172 for (;;) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000173 if (tok->cur != tok->inp)
174 return *tok->cur++; /* Fast path */
175 if (tok->done != E_OK)
176 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177 if (tok->fp == NULL) {
178 tok->done = E_EOF;
179 return EOF;
180 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000182 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000183 if (tok->nextprompt != NULL)
184 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000185 if (new == NULL)
186 tok->done = E_INTR;
187 else if (*new == '\0') {
188 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000189 tok->done = E_EOF;
190 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000191 else if (tok->start != NULL) {
192 int start = tok->start - tok->buf;
193 int oldlen = tok->cur - tok->buf;
194 int newlen = oldlen + strlen(new);
195 char *buf = realloc(tok->buf, newlen+1);
196 tok->lineno++;
197 if (buf == NULL) {
198 free(tok->buf);
199 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000200 tok->done = E_NOMEM;
201 return EOF;
202 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000203 tok->buf = buf;
204 tok->cur = tok->buf + oldlen;
205 strcpy(tok->buf + oldlen, new);
206 free(new);
207 tok->inp = tok->buf + newlen;
208 tok->end = tok->inp + 1;
209 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000210 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000211 else {
212 tok->lineno++;
213 if (tok->buf != NULL)
214 free(tok->buf);
215 tok->buf = new;
216 tok->cur = tok->buf;
217 tok->inp = strchr(tok->buf, '\0');
218 tok->end = tok->inp + 1;
219 }
220 }
221 else {
222 int done = 0;
223 int cur = 0;
224 if (tok->start == NULL) {
225 if (tok->buf == NULL) {
226 tok->buf = NEW(char, BUFSIZ);
227 if (tok->buf == NULL) {
228 tok->done = E_NOMEM;
229 return EOF;
230 }
231 tok->end = tok->buf + BUFSIZ;
232 }
233 if (fgets(tok->buf, (int)(tok->end - tok->buf),
234 tok->fp) == NULL) {
235 tok->done = E_EOF;
236 done = 1;
237 }
238 else {
239 tok->done = E_OK;
240 tok->inp = strchr(tok->buf, '\0');
241 done = tok->inp[-1] == '\n';
242 }
243 }
244 else {
245 cur = tok->cur - tok->buf;
246 tok->done = E_OK;
247 }
248 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000249 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000250 while (!done) {
251 int curstart = tok->start == NULL ? -1 :
252 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000253 int curvalid = tok->inp - tok->buf;
254 int cursize = tok->end - tok->buf;
255 int newsize = cursize + BUFSIZ;
256 char *newbuf = tok->buf;
257 RESIZE(newbuf, char, newsize);
258 if (newbuf == NULL) {
259 tok->done = E_NOMEM;
260 tok->cur = tok->inp;
261 return EOF;
262 }
263 tok->buf = newbuf;
264 tok->inp = tok->buf + curvalid;
265 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000266 tok->start = curstart < 0 ? NULL :
267 tok->buf + curstart;
268 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000269 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000270 tok->fp) == NULL) {
271 /* Last line does not end in \n,
272 fake one */
273 strcpy(tok->inp, "\n");
274 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000275 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000276 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000277 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000278 tok->cur = tok->buf + cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000279 }
280 if (tok->done != E_OK) {
281 if (tok->prompt != NULL)
282 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000283 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 return EOF;
285 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000286 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000287 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000288}
289
290
291/* Back-up one character */
292
293static void
294tok_backup(tok, c)
295 register struct tok_state *tok;
296 register int c;
297{
298 if (c != EOF) {
299 if (--tok->cur < tok->buf) {
300 fprintf(stderr, "tok_backup: begin of buffer\n");
301 abort();
302 }
303 if (*tok->cur != c)
304 *tok->cur = c;
305 }
306}
307
308
309/* Return the token corresponding to a single character */
310
311int
312tok_1char(c)
313 int c;
314{
315 switch (c) {
316 case '(': return LPAR;
317 case ')': return RPAR;
318 case '[': return LSQB;
319 case ']': return RSQB;
320 case ':': return COLON;
321 case ',': return COMMA;
322 case ';': return SEMI;
323 case '+': return PLUS;
324 case '-': return MINUS;
325 case '*': return STAR;
326 case '/': return SLASH;
327 case '|': return VBAR;
328 case '&': return AMPER;
329 case '<': return LESS;
330 case '>': return GREATER;
331 case '=': return EQUAL;
332 case '.': return DOT;
333 case '%': return PERCENT;
334 case '`': return BACKQUOTE;
335 case '{': return LBRACE;
336 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000337 case '^': return CIRCUMFLEX;
338 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 default: return OP;
340 }
341}
342
343
Guido van Rossumfbab9051991-10-20 20:25:03 +0000344int
345tok_2char(c1, c2)
346 int c1, c2;
347{
348 switch (c1) {
349 case '=':
350 switch (c2) {
351 case '=': return EQEQUAL;
352 }
353 break;
354 case '!':
355 switch (c2) {
356 case '=': return NOTEQUAL;
357 }
358 break;
359 case '<':
360 switch (c2) {
361 case '>': return NOTEQUAL;
362 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000363 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000364 }
365 break;
366 case '>':
367 switch (c2) {
368 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000369 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000370 }
371 break;
372 }
373 return OP;
374}
375
376
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000377/* Get next token, after space stripping etc. */
378
379int
380tok_get(tok, p_start, p_end)
381 register struct tok_state *tok; /* In/out: tokenizer state */
382 char **p_start, **p_end; /* Out: point to start/end of token */
383{
384 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000385 int blankline;
386
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000387 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000388 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000389 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000390 blankline = 0;
391
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392 /* Get indentation level */
393 if (tok->atbol) {
394 register int col = 0;
395 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000396 for (;;) {
397 c = tok_nextc(tok);
398 if (c == ' ')
399 col++;
400 else if (c == '\t')
401 col = (col/tok->tabsize + 1) * tok->tabsize;
402 else
403 break;
404 }
405 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000406 if (c == '#' || c == '\n') {
407 /* Lines with only whitespace and/or comments
408 shouldn't affect the indentation and are
409 not passed to the parser as NEWLINE tokens,
410 except *totally* empty lines in interactive
411 mode, which signal the end of a command group. */
412 if (col == 0 && c == '\n' && tok->prompt != NULL)
413 blankline = 0; /* Let it through */
414 else
415 blankline = 1; /* Ignore completely */
416 /* We can't jump back right here since we still
417 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000418 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000419 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000420 if (col == tok->indstack[tok->indent]) {
421 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000422 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000423 else if (col > tok->indstack[tok->indent]) {
424 /* Indent -- always one */
425 if (tok->indent+1 >= MAXINDENT) {
426 fprintf(stderr, "excessive indent\n");
427 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000428 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000429 return ERRORTOKEN;
430 }
431 tok->pendin++;
432 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000433 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000434 else /* col < tok->indstack[tok->indent] */ {
435 /* Dedent -- any number, must be consistent */
436 while (tok->indent > 0 &&
437 col < tok->indstack[tok->indent]) {
438 tok->indent--;
439 tok->pendin--;
440 }
441 if (col != tok->indstack[tok->indent]) {
442 fprintf(stderr, "inconsistent dedent\n");
443 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000444 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000445 return ERRORTOKEN;
446 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000447 }
448 }
449 }
450
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000451 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000452
453 /* Return pending indents/dedents */
454 if (tok->pendin != 0) {
455 if (tok->pendin < 0) {
456 tok->pendin++;
457 return DEDENT;
458 }
459 else {
460 tok->pendin--;
461 return INDENT;
462 }
463 }
464
465 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000466 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 /* Skip spaces */
468 do {
469 c = tok_nextc(tok);
470 } while (c == ' ' || c == '\t');
471
472 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000473 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474
475 /* Skip comment */
476 if (c == '#') {
477 /* Hack to allow overriding the tabsize in the file.
478 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000479 beginning or end of the file. (Will vi never die...?)
480 For Python it must be at the beginning of the file! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 int x;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000482 /* XXX The cast to (unsigned char *) is needed by THINK C 3.0 */
Guido van Rossumb156d721990-12-20 23:13:00 +0000483 if (sscanf(/*(unsigned char *)*/tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000484 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000485 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000486 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000487 tok->tabsize = x;
488 }
489 do {
490 c = tok_nextc(tok);
491 } while (c != EOF && c != '\n');
492 }
493
494 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000495 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000497 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498
499 /* Identifier (most frequent token!) */
500 if (isalpha(c) || c == '_') {
501 do {
502 c = tok_nextc(tok);
503 } while (isalnum(c) || c == '_');
504 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000505 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000506 *p_end = tok->cur;
507 return NAME;
508 }
509
510 /* Newline */
511 if (c == '\n') {
512 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000513 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000514 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000515 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
517 return NEWLINE;
518 }
519
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000520 /* Period or number starting with period? */
521 if (c == '.') {
522 c = tok_nextc(tok);
523 if (isdigit(c)) {
524 goto fraction;
525 }
526 else {
527 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000528 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000529 *p_end = tok->cur;
530 return DOT;
531 }
532 }
533
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 /* Number */
535 if (isdigit(c)) {
536 if (c == '0') {
537 /* Hex or octal */
538 c = tok_nextc(tok);
539 if (c == '.')
540 goto fraction;
541 if (c == 'x' || c == 'X') {
542 /* Hex */
543 do {
544 c = tok_nextc(tok);
545 } while (isxdigit(c));
546 }
547 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000548 /* XXX This is broken! E.g.,
549 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000550 /* Octal; c is first char of it */
551 /* There's no 'isoctdigit' macro, sigh */
552 while ('0' <= c && c < '8') {
553 c = tok_nextc(tok);
554 }
555 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000556 if (c == 'l' || c == 'L')
557 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000558 }
559 else {
560 /* Decimal */
561 do {
562 c = tok_nextc(tok);
563 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000564 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000565 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000566 else {
567 /* Accept floating point numbers.
568 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000569 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000570 if (c == '.') {
571 fraction:
572 /* Fraction */
573 do {
574 c = tok_nextc(tok);
575 } while (isdigit(c));
576 }
577 if (c == 'e' || c == 'E') {
578 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000580 if (c == '+' || c == '-')
581 c = tok_nextc(tok);
582 while (isdigit(c)) {
583 c = tok_nextc(tok);
584 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
586 }
587 }
588 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000589 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 *p_end = tok->cur;
591 return NUMBER;
592 }
593
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000594 /* String */
595 if (c == '\'' || c == '"') {
596 int quote = c;
597 int triple = 0;
598 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000599 for (;;) {
600 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000601 if (c == '\n') {
602 if (!triple) {
603 tok->done = E_TOKEN;
604 tok_backup(tok, c);
605 return ERRORTOKEN;
606 }
607 tripcount = 0;
608 }
609 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000611 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 return ERRORTOKEN;
613 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000614 else if (c == quote) {
615 tripcount++;
616 if (tok->cur == tok->start+2) {
617 c = tok_nextc(tok);
618 if (c == quote) {
619 triple = 1;
620 tripcount = 0;
621 continue;
622 }
623 tok_backup(tok, c);
624 }
625 if (!triple || tripcount == 3)
626 break;
627 }
628 else if (c == '\\') {
629 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000631 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000632 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000633 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 return ERRORTOKEN;
635 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000636 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000637 else
638 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000640 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000641 *p_end = tok->cur;
642 return STRING;
643 }
644
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000645 /* Line continuation */
646 if (c == '\\') {
647 c = tok_nextc(tok);
648 if (c != '\n') {
649 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000650 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000651 return ERRORTOKEN;
652 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653 goto again; /* Read next line */
654 }
655
Guido van Rossumfbab9051991-10-20 20:25:03 +0000656 /* Check for two-character token */
657 {
658 int c2 = tok_nextc(tok);
659 int token = tok_2char(c, c2);
660 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000661 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000662 *p_end = tok->cur;
663 return token;
664 }
665 tok_backup(tok, c2);
666 }
667
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000668 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000669 switch (c) {
670 case '(':
671 case '[':
672 case '{':
673 tok->level++;
674 break;
675 case ')':
676 case ']':
677 case '}':
678 tok->level--;
679 break;
680 }
681
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000682 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000683 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000684 *p_end = tok->cur;
685 return tok_1char(c);
686}
687
688
689#ifdef DEBUG
690
691void
692tok_dump(type, start, end)
693 int type;
694 char *start, *end;
695{
696 printf("%s", tok_name[type]);
697 if (type == NAME || type == NUMBER || type == STRING || type == OP)
698 printf("(%.*s)", (int)(end - start), start);
699}
700
701#endif