blob: 896b0ce5362e65c04d8f8a46adb059db1bf0da2d [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* Tokenizer implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "pgenheaders.h"
28
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000030
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031#include "tokenizer.h"
32#include "errcode.h"
33
Guido van Rossumf4b1a641994-08-29 12:43:07 +000034extern char *my_readline PROTO((char *));
35/* Return malloc'ed string including trailing \n;
36 empty malloc'ed string for EOF;
37 NULL if interrupted */
38
Guido van Rossum4fe87291992-02-26 15:24:44 +000039/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041
Guido van Rossum3f5da241990-12-20 15:06:42 +000042/* Forward */
43static struct tok_state *tok_new PROTO((void));
44static int tok_nextc PROTO((struct tok_state *tok));
45static void tok_backup PROTO((struct tok_state *tok, int c));
46
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047/* Token names */
48
49char *tok_name[] = {
50 "ENDMARKER",
51 "NAME",
52 "NUMBER",
53 "STRING",
54 "NEWLINE",
55 "INDENT",
56 "DEDENT",
57 "LPAR",
58 "RPAR",
59 "LSQB",
60 "RSQB",
61 "COLON",
62 "COMMA",
63 "SEMI",
64 "PLUS",
65 "MINUS",
66 "STAR",
67 "SLASH",
68 "VBAR",
69 "AMPER",
70 "LESS",
71 "GREATER",
72 "EQUAL",
73 "DOT",
74 "PERCENT",
75 "BACKQUOTE",
76 "LBRACE",
77 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000078 "EQEQUAL",
79 "NOTEQUAL",
80 "LESSEQUAL",
81 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000082 "TILDE",
83 "CIRCUMFLEX",
84 "LEFTSHIFT",
85 "RIGHTSHIFT",
Guido van Rossumfbab9051991-10-20 20:25:03 +000086 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000087 "OP",
88 "<ERRORTOKEN>",
89 "<N_TOKENS>"
90};
91
92
93/* Create and initialize a new tok_state structure */
94
95static struct tok_state *
96tok_new()
97{
98 struct tok_state *tok = NEW(struct tok_state, 1);
99 if (tok == NULL)
100 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000101 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102 tok->done = E_OK;
103 tok->fp = NULL;
104 tok->tabsize = TABSIZE;
105 tok->indent = 0;
106 tok->indstack[0] = 0;
107 tok->atbol = 1;
108 tok->pendin = 0;
109 tok->prompt = tok->nextprompt = NULL;
110 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000111 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 return tok;
113}
114
115
116/* Set up tokenizer for string */
117
118struct tok_state *
119tok_setups(str)
120 char *str;
121{
122 struct tok_state *tok = tok_new();
123 if (tok == NULL)
124 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000125 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return tok;
127}
128
129
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000130/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000131
132struct tok_state *
133tok_setupf(fp, ps1, ps2)
134 FILE *fp;
135 char *ps1, *ps2;
136{
137 struct tok_state *tok = tok_new();
138 if (tok == NULL)
139 return NULL;
140 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
141 DEL(tok);
142 return NULL;
143 }
144 tok->cur = tok->inp = tok->buf;
145 tok->end = tok->buf + BUFSIZ;
146 tok->fp = fp;
147 tok->prompt = ps1;
148 tok->nextprompt = ps2;
149 return tok;
150}
151
152
153/* Free a tok_state structure */
154
155void
156tok_free(tok)
157 struct tok_state *tok;
158{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000159 if (tok->fp != NULL && tok->buf != NULL)
160 DEL(tok->buf);
161 DEL(tok);
162}
163
164
165/* Get next char, updating state; error code goes into tok->done */
166
167static int
168tok_nextc(tok)
169 register struct tok_state *tok;
170{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000171 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000172 if (tok->cur != tok->inp) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000173 return *tok->cur++; /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000174 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000175 if (tok->done != E_OK)
176 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000178 char *end = strchr(tok->inp, '\n');
179 if (end != NULL)
180 end++;
181 else {
182 end = strchr(tok->inp, '\0');
183 if (end == tok->inp) {
184 tok->done = E_EOF;
185 return EOF;
186 }
187 }
188 if (tok->start == NULL)
189 tok->buf = tok->cur;
190 tok->lineno++;
191 tok->inp = end;
192 return *tok->cur++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000193 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000194 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000195 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000196 if (tok->nextprompt != NULL)
197 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000198 if (new == NULL)
199 tok->done = E_INTR;
200 else if (*new == '\0') {
201 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000202 tok->done = E_EOF;
203 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000204 else if (tok->start != NULL) {
205 int start = tok->start - tok->buf;
206 int oldlen = tok->cur - tok->buf;
207 int newlen = oldlen + strlen(new);
208 char *buf = realloc(tok->buf, newlen+1);
209 tok->lineno++;
210 if (buf == NULL) {
211 free(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000212 tok->buf = NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000213 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000214 tok->done = E_NOMEM;
215 return EOF;
216 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000217 tok->buf = buf;
218 tok->cur = tok->buf + oldlen;
219 strcpy(tok->buf + oldlen, new);
220 free(new);
221 tok->inp = tok->buf + newlen;
222 tok->end = tok->inp + 1;
223 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000224 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 else {
226 tok->lineno++;
227 if (tok->buf != NULL)
228 free(tok->buf);
229 tok->buf = new;
230 tok->cur = tok->buf;
231 tok->inp = strchr(tok->buf, '\0');
232 tok->end = tok->inp + 1;
233 }
234 }
235 else {
236 int done = 0;
237 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000238 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000239 if (tok->start == NULL) {
240 if (tok->buf == NULL) {
241 tok->buf = NEW(char, BUFSIZ);
242 if (tok->buf == NULL) {
243 tok->done = E_NOMEM;
244 return EOF;
245 }
246 tok->end = tok->buf + BUFSIZ;
247 }
248 if (fgets(tok->buf, (int)(tok->end - tok->buf),
249 tok->fp) == NULL) {
250 tok->done = E_EOF;
251 done = 1;
252 }
253 else {
254 tok->done = E_OK;
255 tok->inp = strchr(tok->buf, '\0');
256 done = tok->inp[-1] == '\n';
257 }
258 }
259 else {
260 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000261 if (feof(tok->fp)) {
262 tok->done = E_EOF;
263 done = 1;
264 }
265 else
266 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000267 }
268 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000269 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000270 while (!done) {
271 int curstart = tok->start == NULL ? -1 :
272 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000273 int curvalid = tok->inp - tok->buf;
274 int cursize = tok->end - tok->buf;
275 int newsize = cursize + BUFSIZ;
276 char *newbuf = tok->buf;
277 RESIZE(newbuf, char, newsize);
278 if (newbuf == NULL) {
279 tok->done = E_NOMEM;
280 tok->cur = tok->inp;
281 return EOF;
282 }
283 tok->buf = newbuf;
284 tok->inp = tok->buf + curvalid;
285 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000286 tok->start = curstart < 0 ? NULL :
287 tok->buf + curstart;
288 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000289 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000290 tok->fp) == NULL) {
291 /* Last line does not end in \n,
292 fake one */
293 strcpy(tok->inp, "\n");
294 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000295 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000296 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000297 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000298 tok->cur = tok->buf + cur;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000299 /* replace "\r\n" with "\n" */
300 pt = tok->inp - 2;
301 if (pt >= tok->buf && *pt == '\r') {
302 *pt++ = '\n';
303 *pt = '\0';
304 tok->inp = pt;
305 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000306 }
307 if (tok->done != E_OK) {
308 if (tok->prompt != NULL)
309 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000310 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000311 return EOF;
312 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000313 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000314 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315}
316
317
318/* Back-up one character */
319
320static void
321tok_backup(tok, c)
322 register struct tok_state *tok;
323 register int c;
324{
325 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000326 if (--tok->cur < tok->buf)
327 fatal("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328 if (*tok->cur != c)
329 *tok->cur = c;
330 }
331}
332
333
334/* Return the token corresponding to a single character */
335
336int
337tok_1char(c)
338 int c;
339{
340 switch (c) {
341 case '(': return LPAR;
342 case ')': return RPAR;
343 case '[': return LSQB;
344 case ']': return RSQB;
345 case ':': return COLON;
346 case ',': return COMMA;
347 case ';': return SEMI;
348 case '+': return PLUS;
349 case '-': return MINUS;
350 case '*': return STAR;
351 case '/': return SLASH;
352 case '|': return VBAR;
353 case '&': return AMPER;
354 case '<': return LESS;
355 case '>': return GREATER;
356 case '=': return EQUAL;
357 case '.': return DOT;
358 case '%': return PERCENT;
359 case '`': return BACKQUOTE;
360 case '{': return LBRACE;
361 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000362 case '^': return CIRCUMFLEX;
363 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000364 default: return OP;
365 }
366}
367
368
Guido van Rossumfbab9051991-10-20 20:25:03 +0000369int
370tok_2char(c1, c2)
371 int c1, c2;
372{
373 switch (c1) {
374 case '=':
375 switch (c2) {
376 case '=': return EQEQUAL;
377 }
378 break;
379 case '!':
380 switch (c2) {
381 case '=': return NOTEQUAL;
382 }
383 break;
384 case '<':
385 switch (c2) {
386 case '>': return NOTEQUAL;
387 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000388 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000389 }
390 break;
391 case '>':
392 switch (c2) {
393 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000394 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000395 }
396 break;
397 }
398 return OP;
399}
400
401
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402/* Get next token, after space stripping etc. */
403
404int
405tok_get(tok, p_start, p_end)
406 register struct tok_state *tok; /* In/out: tokenizer state */
407 char **p_start, **p_end; /* Out: point to start/end of token */
408{
409 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000410 int blankline;
411
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000412 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000413 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000414 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000415 blankline = 0;
416
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417 /* Get indentation level */
418 if (tok->atbol) {
419 register int col = 0;
420 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000421 for (;;) {
422 c = tok_nextc(tok);
423 if (c == ' ')
424 col++;
425 else if (c == '\t')
426 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum94d32b11995-07-07 22:27:27 +0000427 else if (c == '\014') /* Control-L (formfeed) */
428 col = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000429 else
430 break;
431 }
432 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000433 if (c == '#' || c == '\n') {
434 /* Lines with only whitespace and/or comments
435 shouldn't affect the indentation and are
436 not passed to the parser as NEWLINE tokens,
437 except *totally* empty lines in interactive
438 mode, which signal the end of a command group. */
439 if (col == 0 && c == '\n' && tok->prompt != NULL)
440 blankline = 0; /* Let it through */
441 else
442 blankline = 1; /* Ignore completely */
443 /* We can't jump back right here since we still
444 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000445 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000446 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000447 if (col == tok->indstack[tok->indent]) {
448 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000450 else if (col > tok->indstack[tok->indent]) {
451 /* Indent -- always one */
452 if (tok->indent+1 >= MAXINDENT) {
453 fprintf(stderr, "excessive indent\n");
454 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000455 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000456 return ERRORTOKEN;
457 }
458 tok->pendin++;
459 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000460 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000461 else /* col < tok->indstack[tok->indent] */ {
462 /* Dedent -- any number, must be consistent */
463 while (tok->indent > 0 &&
464 col < tok->indstack[tok->indent]) {
465 tok->indent--;
466 tok->pendin--;
467 }
468 if (col != tok->indstack[tok->indent]) {
469 fprintf(stderr, "inconsistent dedent\n");
470 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000471 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000472 return ERRORTOKEN;
473 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474 }
475 }
476 }
477
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000478 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479
480 /* Return pending indents/dedents */
481 if (tok->pendin != 0) {
482 if (tok->pendin < 0) {
483 tok->pendin++;
484 return DEDENT;
485 }
486 else {
487 tok->pendin--;
488 return INDENT;
489 }
490 }
491
492 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000493 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000494 /* Skip spaces */
495 do {
496 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000497 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498
499 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000500 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501
502 /* Skip comment */
503 if (c == '#') {
504 /* Hack to allow overriding the tabsize in the file.
505 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000506 beginning or end of the file. (Will vi never die...?)
507 For Python it must be at the beginning of the file! */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000508 /* XXX The real vi syntax is actually different :-( */
509 /* XXX Should recognize Emacs syntax, too */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510 int x;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000511 if (sscanf(tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000512 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000514 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000515 tok->tabsize = x;
516 }
517 do {
518 c = tok_nextc(tok);
519 } while (c != EOF && c != '\n');
520 }
521
522 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000523 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000525 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526
527 /* Identifier (most frequent token!) */
528 if (isalpha(c) || c == '_') {
529 do {
530 c = tok_nextc(tok);
531 } while (isalnum(c) || c == '_');
532 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000533 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 *p_end = tok->cur;
535 return NAME;
536 }
537
538 /* Newline */
539 if (c == '\n') {
540 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000541 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000542 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000543 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000544 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
545 return NEWLINE;
546 }
547
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000548 /* Period or number starting with period? */
549 if (c == '.') {
550 c = tok_nextc(tok);
551 if (isdigit(c)) {
552 goto fraction;
553 }
554 else {
555 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000556 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000557 *p_end = tok->cur;
558 return DOT;
559 }
560 }
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562 /* Number */
563 if (isdigit(c)) {
564 if (c == '0') {
565 /* Hex or octal */
566 c = tok_nextc(tok);
567 if (c == '.')
568 goto fraction;
569 if (c == 'x' || c == 'X') {
570 /* Hex */
571 do {
572 c = tok_nextc(tok);
573 } while (isxdigit(c));
574 }
575 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000576 /* XXX This is broken! E.g.,
577 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000578 /* Octal; c is first char of it */
579 /* There's no 'isoctdigit' macro, sigh */
580 while ('0' <= c && c < '8') {
581 c = tok_nextc(tok);
582 }
583 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000584 if (c == 'l' || c == 'L')
585 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000586 }
587 else {
588 /* Decimal */
589 do {
590 c = tok_nextc(tok);
591 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000592 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000594 else {
595 /* Accept floating point numbers.
596 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000597 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000598 if (c == '.') {
599 fraction:
600 /* Fraction */
601 do {
602 c = tok_nextc(tok);
603 } while (isdigit(c));
604 }
605 if (c == 'e' || c == 'E') {
606 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000608 if (c == '+' || c == '-')
609 c = tok_nextc(tok);
610 while (isdigit(c)) {
611 c = tok_nextc(tok);
612 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613 }
614 }
615 }
616 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000617 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618 *p_end = tok->cur;
619 return NUMBER;
620 }
621
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000622 /* String */
623 if (c == '\'' || c == '"') {
624 int quote = c;
625 int triple = 0;
626 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000627 for (;;) {
628 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000629 if (c == '\n') {
630 if (!triple) {
631 tok->done = E_TOKEN;
632 tok_backup(tok, c);
633 return ERRORTOKEN;
634 }
635 tripcount = 0;
636 }
637 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000638 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000639 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 return ERRORTOKEN;
641 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000642 else if (c == quote) {
643 tripcount++;
644 if (tok->cur == tok->start+2) {
645 c = tok_nextc(tok);
646 if (c == quote) {
647 triple = 1;
648 tripcount = 0;
649 continue;
650 }
651 tok_backup(tok, c);
652 }
653 if (!triple || tripcount == 3)
654 break;
655 }
656 else if (c == '\\') {
657 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000659 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000660 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000661 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000662 return ERRORTOKEN;
663 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000664 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000665 else
666 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000668 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000669 *p_end = tok->cur;
670 return STRING;
671 }
672
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000673 /* Line continuation */
674 if (c == '\\') {
675 c = tok_nextc(tok);
676 if (c != '\n') {
677 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000678 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000679 return ERRORTOKEN;
680 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000681 goto again; /* Read next line */
682 }
683
Guido van Rossumfbab9051991-10-20 20:25:03 +0000684 /* Check for two-character token */
685 {
686 int c2 = tok_nextc(tok);
687 int token = tok_2char(c, c2);
688 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000689 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000690 *p_end = tok->cur;
691 return token;
692 }
693 tok_backup(tok, c2);
694 }
695
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000696 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000697 switch (c) {
698 case '(':
699 case '[':
700 case '{':
701 tok->level++;
702 break;
703 case ')':
704 case ']':
705 case '}':
706 tok->level--;
707 break;
708 }
709
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000711 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712 *p_end = tok->cur;
713 return tok_1char(c);
714}
715
716
717#ifdef DEBUG
718
719void
720tok_dump(type, start, end)
721 int type;
722 char *start, *end;
723{
724 printf("%s", tok_name[type]);
725 if (type == NAME || type == NUMBER || type == STRING || type == OP)
726 printf("(%.*s)", (int)(end - start), start);
727}
728
729#endif