blob: 942072dc1f0ee03d4e85f459c2e37c5202e27870 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* Tokenizer implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "pgenheaders.h"
28
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000029#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000030
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000031#include "tokenizer.h"
32#include "errcode.h"
33
Guido van Rossumf4b1a641994-08-29 12:43:07 +000034extern char *my_readline PROTO((char *));
35/* Return malloc'ed string including trailing \n;
36 empty malloc'ed string for EOF;
37 NULL if interrupted */
38
Guido van Rossum4fe87291992-02-26 15:24:44 +000039/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041
Guido van Rossum3f5da241990-12-20 15:06:42 +000042/* Forward */
43static struct tok_state *tok_new PROTO((void));
44static int tok_nextc PROTO((struct tok_state *tok));
45static void tok_backup PROTO((struct tok_state *tok, int c));
46
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047/* Token names */
48
49char *tok_name[] = {
50 "ENDMARKER",
51 "NAME",
52 "NUMBER",
53 "STRING",
54 "NEWLINE",
55 "INDENT",
56 "DEDENT",
57 "LPAR",
58 "RPAR",
59 "LSQB",
60 "RSQB",
61 "COLON",
62 "COMMA",
63 "SEMI",
64 "PLUS",
65 "MINUS",
66 "STAR",
67 "SLASH",
68 "VBAR",
69 "AMPER",
70 "LESS",
71 "GREATER",
72 "EQUAL",
73 "DOT",
74 "PERCENT",
75 "BACKQUOTE",
76 "LBRACE",
77 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000078 "EQEQUAL",
79 "NOTEQUAL",
80 "LESSEQUAL",
81 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000082 "TILDE",
83 "CIRCUMFLEX",
84 "LEFTSHIFT",
85 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +000086 "DOUBLESTAR",
Guido van Rossumfbab9051991-10-20 20:25:03 +000087 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088 "OP",
89 "<ERRORTOKEN>",
90 "<N_TOKENS>"
91};
92
93
94/* Create and initialize a new tok_state structure */
95
96static struct tok_state *
97tok_new()
98{
99 struct tok_state *tok = NEW(struct tok_state, 1);
100 if (tok == NULL)
101 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000102 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103 tok->done = E_OK;
104 tok->fp = NULL;
105 tok->tabsize = TABSIZE;
106 tok->indent = 0;
107 tok->indstack[0] = 0;
108 tok->atbol = 1;
109 tok->pendin = 0;
110 tok->prompt = tok->nextprompt = NULL;
111 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000112 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000113 return tok;
114}
115
116
117/* Set up tokenizer for string */
118
119struct tok_state *
120tok_setups(str)
121 char *str;
122{
123 struct tok_state *tok = tok_new();
124 if (tok == NULL)
125 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000126 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127 return tok;
128}
129
130
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000131/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000132
133struct tok_state *
134tok_setupf(fp, ps1, ps2)
135 FILE *fp;
136 char *ps1, *ps2;
137{
138 struct tok_state *tok = tok_new();
139 if (tok == NULL)
140 return NULL;
141 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
142 DEL(tok);
143 return NULL;
144 }
145 tok->cur = tok->inp = tok->buf;
146 tok->end = tok->buf + BUFSIZ;
147 tok->fp = fp;
148 tok->prompt = ps1;
149 tok->nextprompt = ps2;
150 return tok;
151}
152
153
154/* Free a tok_state structure */
155
156void
157tok_free(tok)
158 struct tok_state *tok;
159{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000160 if (tok->fp != NULL && tok->buf != NULL)
161 DEL(tok->buf);
162 DEL(tok);
163}
164
165
166/* Get next char, updating state; error code goes into tok->done */
167
168static int
169tok_nextc(tok)
170 register struct tok_state *tok;
171{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000172 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000173 if (tok->cur != tok->inp) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000174 return *tok->cur++; /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000175 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000176 if (tok->done != E_OK)
177 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000178 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000179 char *end = strchr(tok->inp, '\n');
180 if (end != NULL)
181 end++;
182 else {
183 end = strchr(tok->inp, '\0');
184 if (end == tok->inp) {
185 tok->done = E_EOF;
186 return EOF;
187 }
188 }
189 if (tok->start == NULL)
190 tok->buf = tok->cur;
191 tok->lineno++;
192 tok->inp = end;
193 return *tok->cur++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000194 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000196 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000197 if (tok->nextprompt != NULL)
198 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000199 if (new == NULL)
200 tok->done = E_INTR;
201 else if (*new == '\0') {
202 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203 tok->done = E_EOF;
204 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000205 else if (tok->start != NULL) {
206 int start = tok->start - tok->buf;
207 int oldlen = tok->cur - tok->buf;
208 int newlen = oldlen + strlen(new);
209 char *buf = realloc(tok->buf, newlen+1);
210 tok->lineno++;
211 if (buf == NULL) {
212 free(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000213 tok->buf = NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000214 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000215 tok->done = E_NOMEM;
216 return EOF;
217 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000218 tok->buf = buf;
219 tok->cur = tok->buf + oldlen;
220 strcpy(tok->buf + oldlen, new);
221 free(new);
222 tok->inp = tok->buf + newlen;
223 tok->end = tok->inp + 1;
224 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000225 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000226 else {
227 tok->lineno++;
228 if (tok->buf != NULL)
229 free(tok->buf);
230 tok->buf = new;
231 tok->cur = tok->buf;
232 tok->inp = strchr(tok->buf, '\0');
233 tok->end = tok->inp + 1;
234 }
235 }
236 else {
237 int done = 0;
238 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000239 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000240 if (tok->start == NULL) {
241 if (tok->buf == NULL) {
242 tok->buf = NEW(char, BUFSIZ);
243 if (tok->buf == NULL) {
244 tok->done = E_NOMEM;
245 return EOF;
246 }
247 tok->end = tok->buf + BUFSIZ;
248 }
249 if (fgets(tok->buf, (int)(tok->end - tok->buf),
250 tok->fp) == NULL) {
251 tok->done = E_EOF;
252 done = 1;
253 }
254 else {
255 tok->done = E_OK;
256 tok->inp = strchr(tok->buf, '\0');
257 done = tok->inp[-1] == '\n';
258 }
259 }
260 else {
261 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000262 if (feof(tok->fp)) {
263 tok->done = E_EOF;
264 done = 1;
265 }
266 else
267 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000268 }
269 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000270 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000271 while (!done) {
272 int curstart = tok->start == NULL ? -1 :
273 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000274 int curvalid = tok->inp - tok->buf;
275 int cursize = tok->end - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000276 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000277 char *newbuf = tok->buf;
278 RESIZE(newbuf, char, newsize);
279 if (newbuf == NULL) {
280 tok->done = E_NOMEM;
281 tok->cur = tok->inp;
282 return EOF;
283 }
284 tok->buf = newbuf;
285 tok->inp = tok->buf + curvalid;
286 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000287 tok->start = curstart < 0 ? NULL :
288 tok->buf + curstart;
289 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000290 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000291 tok->fp) == NULL) {
292 /* Last line does not end in \n,
293 fake one */
294 strcpy(tok->inp, "\n");
295 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000296 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000297 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000298 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000299 tok->cur = tok->buf + cur;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000300 /* replace "\r\n" with "\n" */
301 pt = tok->inp - 2;
302 if (pt >= tok->buf && *pt == '\r') {
303 *pt++ = '\n';
304 *pt = '\0';
305 tok->inp = pt;
306 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307 }
308 if (tok->done != E_OK) {
309 if (tok->prompt != NULL)
310 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000311 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000312 return EOF;
313 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000315 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000316}
317
318
319/* Back-up one character */
320
321static void
322tok_backup(tok, c)
323 register struct tok_state *tok;
324 register int c;
325{
326 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000327 if (--tok->cur < tok->buf)
328 fatal("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000329 if (*tok->cur != c)
330 *tok->cur = c;
331 }
332}
333
334
335/* Return the token corresponding to a single character */
336
337int
338tok_1char(c)
339 int c;
340{
341 switch (c) {
342 case '(': return LPAR;
343 case ')': return RPAR;
344 case '[': return LSQB;
345 case ']': return RSQB;
346 case ':': return COLON;
347 case ',': return COMMA;
348 case ';': return SEMI;
349 case '+': return PLUS;
350 case '-': return MINUS;
351 case '*': return STAR;
352 case '/': return SLASH;
353 case '|': return VBAR;
354 case '&': return AMPER;
355 case '<': return LESS;
356 case '>': return GREATER;
357 case '=': return EQUAL;
358 case '.': return DOT;
359 case '%': return PERCENT;
360 case '`': return BACKQUOTE;
361 case '{': return LBRACE;
362 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000363 case '^': return CIRCUMFLEX;
364 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000365 default: return OP;
366 }
367}
368
369
Guido van Rossumfbab9051991-10-20 20:25:03 +0000370int
371tok_2char(c1, c2)
372 int c1, c2;
373{
374 switch (c1) {
375 case '=':
376 switch (c2) {
377 case '=': return EQEQUAL;
378 }
379 break;
380 case '!':
381 switch (c2) {
382 case '=': return NOTEQUAL;
383 }
384 break;
385 case '<':
386 switch (c2) {
387 case '>': return NOTEQUAL;
388 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000389 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000390 }
391 break;
392 case '>':
393 switch (c2) {
394 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000395 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000396 }
397 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000398 case '*':
399 switch (c2) {
400 case '*': return DOUBLESTAR;
401 }
402 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000403 }
404 return OP;
405}
406
407
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408/* Get next token, after space stripping etc. */
409
410int
411tok_get(tok, p_start, p_end)
412 register struct tok_state *tok; /* In/out: tokenizer state */
413 char **p_start, **p_end; /* Out: point to start/end of token */
414{
415 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000416 int blankline;
417
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000418 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000419 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000420 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000421 blankline = 0;
422
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423 /* Get indentation level */
424 if (tok->atbol) {
425 register int col = 0;
426 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427 for (;;) {
428 c = tok_nextc(tok);
429 if (c == ' ')
430 col++;
431 else if (c == '\t')
432 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum94d32b11995-07-07 22:27:27 +0000433 else if (c == '\014') /* Control-L (formfeed) */
434 col = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435 else
436 break;
437 }
438 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000439 if (c == '#' || c == '\n') {
440 /* Lines with only whitespace and/or comments
441 shouldn't affect the indentation and are
442 not passed to the parser as NEWLINE tokens,
443 except *totally* empty lines in interactive
444 mode, which signal the end of a command group. */
445 if (col == 0 && c == '\n' && tok->prompt != NULL)
446 blankline = 0; /* Let it through */
447 else
448 blankline = 1; /* Ignore completely */
449 /* We can't jump back right here since we still
450 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000452 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000453 if (col == tok->indstack[tok->indent]) {
454 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000455 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000456 else if (col > tok->indstack[tok->indent]) {
457 /* Indent -- always one */
458 if (tok->indent+1 >= MAXINDENT) {
459 fprintf(stderr, "excessive indent\n");
460 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000461 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000462 return ERRORTOKEN;
463 }
464 tok->pendin++;
465 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000466 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000467 else /* col < tok->indstack[tok->indent] */ {
468 /* Dedent -- any number, must be consistent */
469 while (tok->indent > 0 &&
470 col < tok->indstack[tok->indent]) {
471 tok->indent--;
472 tok->pendin--;
473 }
474 if (col != tok->indstack[tok->indent]) {
475 fprintf(stderr, "inconsistent dedent\n");
476 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000477 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000478 return ERRORTOKEN;
479 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000480 }
481 }
482 }
483
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000484 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000485
486 /* Return pending indents/dedents */
487 if (tok->pendin != 0) {
488 if (tok->pendin < 0) {
489 tok->pendin++;
490 return DEDENT;
491 }
492 else {
493 tok->pendin--;
494 return INDENT;
495 }
496 }
497
498 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000499 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000500 /* Skip spaces */
501 do {
502 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000503 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504
505 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000506 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507
508 /* Skip comment */
509 if (c == '#') {
510 /* Hack to allow overriding the tabsize in the file.
511 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000512 beginning or end of the file. (Will vi never die...?)
513 For Python it must be at the beginning of the file! */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000514 /* XXX The real vi syntax is actually different :-( */
515 /* XXX Should recognize Emacs syntax, too */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516 int x;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000517 if (sscanf(tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000518 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000520 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 tok->tabsize = x;
522 }
523 do {
524 c = tok_nextc(tok);
525 } while (c != EOF && c != '\n');
526 }
527
528 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000529 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000530 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000531 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000532
533 /* Identifier (most frequent token!) */
534 if (isalpha(c) || c == '_') {
535 do {
536 c = tok_nextc(tok);
537 } while (isalnum(c) || c == '_');
538 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000539 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 *p_end = tok->cur;
541 return NAME;
542 }
543
544 /* Newline */
545 if (c == '\n') {
546 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000547 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000548 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000549 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000550 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
551 return NEWLINE;
552 }
553
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000554 /* Period or number starting with period? */
555 if (c == '.') {
556 c = tok_nextc(tok);
557 if (isdigit(c)) {
558 goto fraction;
559 }
560 else {
561 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000562 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000563 *p_end = tok->cur;
564 return DOT;
565 }
566 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000567
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 /* Number */
569 if (isdigit(c)) {
570 if (c == '0') {
571 /* Hex or octal */
572 c = tok_nextc(tok);
573 if (c == '.')
574 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000575#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000576 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000577 goto imaginary;
578#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 if (c == 'x' || c == 'X') {
580 /* Hex */
581 do {
582 c = tok_nextc(tok);
583 } while (isxdigit(c));
584 }
585 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000586 /* XXX This is broken! E.g.,
587 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 /* Octal; c is first char of it */
589 /* There's no 'isoctdigit' macro, sigh */
590 while ('0' <= c && c < '8') {
591 c = tok_nextc(tok);
592 }
593 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000594 if (c == 'l' || c == 'L')
595 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000596 }
597 else {
598 /* Decimal */
599 do {
600 c = tok_nextc(tok);
601 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000602 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000603 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000604 else {
605 /* Accept floating point numbers.
606 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000607 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000608 if (c == '.') {
609 fraction:
610 /* Fraction */
611 do {
612 c = tok_nextc(tok);
613 } while (isdigit(c));
614 }
615 if (c == 'e' || c == 'E') {
616 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000617 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000618 if (c == '+' || c == '-')
619 c = tok_nextc(tok);
620 while (isdigit(c)) {
621 c = tok_nextc(tok);
622 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000624#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000625 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000626 /* Imaginary part */
627 imaginary:
628 c = tok_nextc(tok);
629#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630 }
631 }
632 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000633 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 *p_end = tok->cur;
635 return NUMBER;
636 }
637
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000638 /* String */
639 if (c == '\'' || c == '"') {
640 int quote = c;
641 int triple = 0;
642 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 for (;;) {
644 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000645 if (c == '\n') {
646 if (!triple) {
647 tok->done = E_TOKEN;
648 tok_backup(tok, c);
649 return ERRORTOKEN;
650 }
651 tripcount = 0;
652 }
653 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000654 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000655 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000656 return ERRORTOKEN;
657 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000658 else if (c == quote) {
659 tripcount++;
660 if (tok->cur == tok->start+2) {
661 c = tok_nextc(tok);
662 if (c == quote) {
663 triple = 1;
664 tripcount = 0;
665 continue;
666 }
667 tok_backup(tok, c);
668 }
669 if (!triple || tripcount == 3)
670 break;
671 }
672 else if (c == '\\') {
673 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000675 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000676 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000677 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000678 return ERRORTOKEN;
679 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000680 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000681 else
682 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000683 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000684 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000685 *p_end = tok->cur;
686 return STRING;
687 }
688
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689 /* Line continuation */
690 if (c == '\\') {
691 c = tok_nextc(tok);
692 if (c != '\n') {
693 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000694 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695 return ERRORTOKEN;
696 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697 goto again; /* Read next line */
698 }
699
Guido van Rossumfbab9051991-10-20 20:25:03 +0000700 /* Check for two-character token */
701 {
702 int c2 = tok_nextc(tok);
703 int token = tok_2char(c, c2);
704 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000705 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000706 *p_end = tok->cur;
707 return token;
708 }
709 tok_backup(tok, c2);
710 }
711
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000712 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000713 switch (c) {
714 case '(':
715 case '[':
716 case '{':
717 tok->level++;
718 break;
719 case ')':
720 case ']':
721 case '}':
722 tok->level--;
723 break;
724 }
725
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000727 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000728 *p_end = tok->cur;
729 return tok_1char(c);
730}
731
732
733#ifdef DEBUG
734
735void
736tok_dump(type, start, end)
737 int type;
738 char *start, *end;
739{
740 printf("%s", tok_name[type]);
741 if (type == NAME || type == NUMBER || type == STRING || type == OP)
742 printf("(%.*s)", (int)(end - start), start);
743}
744
745#endif