blob: cff9179fc97a7de32551ec4bc7fbd8344db2b92c [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Tokenizer implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "pgenheaders.h"
35
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000036#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "tokenizer.h"
39#include "errcode.h"
40
Guido van Rossumf4b1a641994-08-29 12:43:07 +000041extern char *my_readline PROTO((char *));
42/* Return malloc'ed string including trailing \n;
43 empty malloc'ed string for EOF;
44 NULL if interrupted */
45
Guido van Rossum4fe87291992-02-26 15:24:44 +000046/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048
Guido van Rossum3f5da241990-12-20 15:06:42 +000049/* Forward */
50static struct tok_state *tok_new PROTO((void));
51static int tok_nextc PROTO((struct tok_state *tok));
52static void tok_backup PROTO((struct tok_state *tok, int c));
53
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054/* Token names */
55
56char *tok_name[] = {
57 "ENDMARKER",
58 "NAME",
59 "NUMBER",
60 "STRING",
61 "NEWLINE",
62 "INDENT",
63 "DEDENT",
64 "LPAR",
65 "RPAR",
66 "LSQB",
67 "RSQB",
68 "COLON",
69 "COMMA",
70 "SEMI",
71 "PLUS",
72 "MINUS",
73 "STAR",
74 "SLASH",
75 "VBAR",
76 "AMPER",
77 "LESS",
78 "GREATER",
79 "EQUAL",
80 "DOT",
81 "PERCENT",
82 "BACKQUOTE",
83 "LBRACE",
84 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000085 "EQEQUAL",
86 "NOTEQUAL",
87 "LESSEQUAL",
88 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000089 "TILDE",
90 "CIRCUMFLEX",
91 "LEFTSHIFT",
92 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +000093 "DOUBLESTAR",
Guido van Rossumfbab9051991-10-20 20:25:03 +000094 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095 "OP",
96 "<ERRORTOKEN>",
97 "<N_TOKENS>"
98};
99
100
101/* Create and initialize a new tok_state structure */
102
103static struct tok_state *
104tok_new()
105{
106 struct tok_state *tok = NEW(struct tok_state, 1);
107 if (tok == NULL)
108 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000109 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000110 tok->done = E_OK;
111 tok->fp = NULL;
112 tok->tabsize = TABSIZE;
113 tok->indent = 0;
114 tok->indstack[0] = 0;
115 tok->atbol = 1;
116 tok->pendin = 0;
117 tok->prompt = tok->nextprompt = NULL;
118 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000119 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 return tok;
121}
122
123
124/* Set up tokenizer for string */
125
126struct tok_state *
127tok_setups(str)
128 char *str;
129{
130 struct tok_state *tok = tok_new();
131 if (tok == NULL)
132 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000133 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134 return tok;
135}
136
137
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000138/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000139
140struct tok_state *
141tok_setupf(fp, ps1, ps2)
142 FILE *fp;
143 char *ps1, *ps2;
144{
145 struct tok_state *tok = tok_new();
146 if (tok == NULL)
147 return NULL;
148 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
149 DEL(tok);
150 return NULL;
151 }
152 tok->cur = tok->inp = tok->buf;
153 tok->end = tok->buf + BUFSIZ;
154 tok->fp = fp;
155 tok->prompt = ps1;
156 tok->nextprompt = ps2;
157 return tok;
158}
159
160
161/* Free a tok_state structure */
162
163void
164tok_free(tok)
165 struct tok_state *tok;
166{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000167 if (tok->fp != NULL && tok->buf != NULL)
168 DEL(tok->buf);
169 DEL(tok);
170}
171
172
173/* Get next char, updating state; error code goes into tok->done */
174
175static int
176tok_nextc(tok)
177 register struct tok_state *tok;
178{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000180 if (tok->cur != tok->inp) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000181 return *tok->cur++; /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000182 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000183 if (tok->done != E_OK)
184 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000185 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000186 char *end = strchr(tok->inp, '\n');
187 if (end != NULL)
188 end++;
189 else {
190 end = strchr(tok->inp, '\0');
191 if (end == tok->inp) {
192 tok->done = E_EOF;
193 return EOF;
194 }
195 }
196 if (tok->start == NULL)
197 tok->buf = tok->cur;
198 tok->lineno++;
199 tok->inp = end;
200 return *tok->cur++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000202 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000203 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000204 if (tok->nextprompt != NULL)
205 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000206 if (new == NULL)
207 tok->done = E_INTR;
208 else if (*new == '\0') {
209 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 tok->done = E_EOF;
211 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000212 else if (tok->start != NULL) {
213 int start = tok->start - tok->buf;
214 int oldlen = tok->cur - tok->buf;
215 int newlen = oldlen + strlen(new);
216 char *buf = realloc(tok->buf, newlen+1);
217 tok->lineno++;
218 if (buf == NULL) {
219 free(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000220 tok->buf = NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000221 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000222 tok->done = E_NOMEM;
223 return EOF;
224 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 tok->buf = buf;
226 tok->cur = tok->buf + oldlen;
227 strcpy(tok->buf + oldlen, new);
228 free(new);
229 tok->inp = tok->buf + newlen;
230 tok->end = tok->inp + 1;
231 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000232 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000233 else {
234 tok->lineno++;
235 if (tok->buf != NULL)
236 free(tok->buf);
237 tok->buf = new;
238 tok->cur = tok->buf;
239 tok->inp = strchr(tok->buf, '\0');
240 tok->end = tok->inp + 1;
241 }
242 }
243 else {
244 int done = 0;
245 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000246 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000247 if (tok->start == NULL) {
248 if (tok->buf == NULL) {
249 tok->buf = NEW(char, BUFSIZ);
250 if (tok->buf == NULL) {
251 tok->done = E_NOMEM;
252 return EOF;
253 }
254 tok->end = tok->buf + BUFSIZ;
255 }
256 if (fgets(tok->buf, (int)(tok->end - tok->buf),
257 tok->fp) == NULL) {
258 tok->done = E_EOF;
259 done = 1;
260 }
261 else {
262 tok->done = E_OK;
263 tok->inp = strchr(tok->buf, '\0');
264 done = tok->inp[-1] == '\n';
265 }
266 }
267 else {
268 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000269 if (feof(tok->fp)) {
270 tok->done = E_EOF;
271 done = 1;
272 }
273 else
274 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000275 }
276 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000277 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000278 while (!done) {
279 int curstart = tok->start == NULL ? -1 :
280 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000281 int curvalid = tok->inp - tok->buf;
282 int cursize = tok->end - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000283 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000284 char *newbuf = tok->buf;
285 RESIZE(newbuf, char, newsize);
286 if (newbuf == NULL) {
287 tok->done = E_NOMEM;
288 tok->cur = tok->inp;
289 return EOF;
290 }
291 tok->buf = newbuf;
292 tok->inp = tok->buf + curvalid;
293 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000294 tok->start = curstart < 0 ? NULL :
295 tok->buf + curstart;
296 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000297 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000298 tok->fp) == NULL) {
299 /* Last line does not end in \n,
300 fake one */
301 strcpy(tok->inp, "\n");
302 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000303 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000304 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000305 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000306 tok->cur = tok->buf + cur;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000307 /* replace "\r\n" with "\n" */
308 pt = tok->inp - 2;
309 if (pt >= tok->buf && *pt == '\r') {
310 *pt++ = '\n';
311 *pt = '\0';
312 tok->inp = pt;
313 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314 }
315 if (tok->done != E_OK) {
316 if (tok->prompt != NULL)
317 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000318 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000319 return EOF;
320 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000322 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000323}
324
325
326/* Back-up one character */
327
328static void
329tok_backup(tok, c)
330 register struct tok_state *tok;
331 register int c;
332{
333 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000334 if (--tok->cur < tok->buf)
335 fatal("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 if (*tok->cur != c)
337 *tok->cur = c;
338 }
339}
340
341
342/* Return the token corresponding to a single character */
343
344int
345tok_1char(c)
346 int c;
347{
348 switch (c) {
349 case '(': return LPAR;
350 case ')': return RPAR;
351 case '[': return LSQB;
352 case ']': return RSQB;
353 case ':': return COLON;
354 case ',': return COMMA;
355 case ';': return SEMI;
356 case '+': return PLUS;
357 case '-': return MINUS;
358 case '*': return STAR;
359 case '/': return SLASH;
360 case '|': return VBAR;
361 case '&': return AMPER;
362 case '<': return LESS;
363 case '>': return GREATER;
364 case '=': return EQUAL;
365 case '.': return DOT;
366 case '%': return PERCENT;
367 case '`': return BACKQUOTE;
368 case '{': return LBRACE;
369 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000370 case '^': return CIRCUMFLEX;
371 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000372 default: return OP;
373 }
374}
375
376
Guido van Rossumfbab9051991-10-20 20:25:03 +0000377int
378tok_2char(c1, c2)
379 int c1, c2;
380{
381 switch (c1) {
382 case '=':
383 switch (c2) {
384 case '=': return EQEQUAL;
385 }
386 break;
387 case '!':
388 switch (c2) {
389 case '=': return NOTEQUAL;
390 }
391 break;
392 case '<':
393 switch (c2) {
394 case '>': return NOTEQUAL;
395 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000396 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000397 }
398 break;
399 case '>':
400 switch (c2) {
401 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000402 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000403 }
404 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000405 case '*':
406 switch (c2) {
407 case '*': return DOUBLESTAR;
408 }
409 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000410 }
411 return OP;
412}
413
414
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000415/* Get next token, after space stripping etc. */
416
417int
418tok_get(tok, p_start, p_end)
419 register struct tok_state *tok; /* In/out: tokenizer state */
420 char **p_start, **p_end; /* Out: point to start/end of token */
421{
422 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000423 int blankline;
424
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000425 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000426 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000427 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000428 blankline = 0;
429
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000430 /* Get indentation level */
431 if (tok->atbol) {
432 register int col = 0;
433 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000434 for (;;) {
435 c = tok_nextc(tok);
436 if (c == ' ')
437 col++;
438 else if (c == '\t')
439 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum94d32b11995-07-07 22:27:27 +0000440 else if (c == '\014') /* Control-L (formfeed) */
441 col = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000442 else
443 break;
444 }
445 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000446 if (c == '#' || c == '\n') {
447 /* Lines with only whitespace and/or comments
448 shouldn't affect the indentation and are
449 not passed to the parser as NEWLINE tokens,
450 except *totally* empty lines in interactive
451 mode, which signal the end of a command group. */
452 if (col == 0 && c == '\n' && tok->prompt != NULL)
453 blankline = 0; /* Let it through */
454 else
455 blankline = 1; /* Ignore completely */
456 /* We can't jump back right here since we still
457 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000459 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000460 if (col == tok->indstack[tok->indent]) {
461 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000462 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000463 else if (col > tok->indstack[tok->indent]) {
464 /* Indent -- always one */
465 if (tok->indent+1 >= MAXINDENT) {
466 fprintf(stderr, "excessive indent\n");
467 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000468 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000469 return ERRORTOKEN;
470 }
471 tok->pendin++;
472 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000474 else /* col < tok->indstack[tok->indent] */ {
475 /* Dedent -- any number, must be consistent */
476 while (tok->indent > 0 &&
477 col < tok->indstack[tok->indent]) {
478 tok->indent--;
479 tok->pendin--;
480 }
481 if (col != tok->indstack[tok->indent]) {
482 fprintf(stderr, "inconsistent dedent\n");
483 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000484 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000485 return ERRORTOKEN;
486 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000487 }
488 }
489 }
490
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000491 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000492
493 /* Return pending indents/dedents */
494 if (tok->pendin != 0) {
495 if (tok->pendin < 0) {
496 tok->pendin++;
497 return DEDENT;
498 }
499 else {
500 tok->pendin--;
501 return INDENT;
502 }
503 }
504
505 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000506 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507 /* Skip spaces */
508 do {
509 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000510 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000511
512 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000513 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514
515 /* Skip comment */
516 if (c == '#') {
517 /* Hack to allow overriding the tabsize in the file.
518 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000519 beginning or end of the file. (Will vi never die...?)
520 For Python it must be at the beginning of the file! */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000521 /* XXX The real vi syntax is actually different :-( */
522 /* XXX Should recognize Emacs syntax, too */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000523 int x;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000524 if (sscanf(tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000525 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000527 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528 tok->tabsize = x;
529 }
530 do {
531 c = tok_nextc(tok);
532 } while (c != EOF && c != '\n');
533 }
534
535 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000536 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000537 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000538 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000539
540 /* Identifier (most frequent token!) */
541 if (isalpha(c) || c == '_') {
542 do {
543 c = tok_nextc(tok);
544 } while (isalnum(c) || c == '_');
545 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000546 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000547 *p_end = tok->cur;
548 return NAME;
549 }
550
551 /* Newline */
552 if (c == '\n') {
553 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000554 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000555 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000556 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000557 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
558 return NEWLINE;
559 }
560
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000561 /* Period or number starting with period? */
562 if (c == '.') {
563 c = tok_nextc(tok);
564 if (isdigit(c)) {
565 goto fraction;
566 }
567 else {
568 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000569 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000570 *p_end = tok->cur;
571 return DOT;
572 }
573 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000574
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000575 /* Number */
576 if (isdigit(c)) {
577 if (c == '0') {
578 /* Hex or octal */
579 c = tok_nextc(tok);
580 if (c == '.')
581 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000582#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000583 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000584 goto imaginary;
585#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000586 if (c == 'x' || c == 'X') {
587 /* Hex */
588 do {
589 c = tok_nextc(tok);
590 } while (isxdigit(c));
591 }
592 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000593 /* XXX This is broken! E.g.,
594 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595 /* Octal; c is first char of it */
596 /* There's no 'isoctdigit' macro, sigh */
597 while ('0' <= c && c < '8') {
598 c = tok_nextc(tok);
599 }
600 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000601 if (c == 'l' || c == 'L')
602 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000603 }
604 else {
605 /* Decimal */
606 do {
607 c = tok_nextc(tok);
608 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000609 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000611 else {
612 /* Accept floating point numbers.
613 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000614 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000615 if (c == '.') {
616 fraction:
617 /* Fraction */
618 do {
619 c = tok_nextc(tok);
620 } while (isdigit(c));
621 }
622 if (c == 'e' || c == 'E') {
623 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000625 if (c == '+' || c == '-')
626 c = tok_nextc(tok);
627 while (isdigit(c)) {
628 c = tok_nextc(tok);
629 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000631#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000632 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000633 /* Imaginary part */
634 imaginary:
635 c = tok_nextc(tok);
636#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000637 }
638 }
639 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000640 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000641 *p_end = tok->cur;
642 return NUMBER;
643 }
644
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000645 /* String */
646 if (c == '\'' || c == '"') {
647 int quote = c;
648 int triple = 0;
649 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 for (;;) {
651 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000652 if (c == '\n') {
653 if (!triple) {
654 tok->done = E_TOKEN;
655 tok_backup(tok, c);
656 return ERRORTOKEN;
657 }
658 tripcount = 0;
659 }
660 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000662 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663 return ERRORTOKEN;
664 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000665 else if (c == quote) {
666 tripcount++;
667 if (tok->cur == tok->start+2) {
668 c = tok_nextc(tok);
669 if (c == quote) {
670 triple = 1;
671 tripcount = 0;
672 continue;
673 }
674 tok_backup(tok, c);
675 }
676 if (!triple || tripcount == 3)
677 break;
678 }
679 else if (c == '\\') {
680 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000681 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000682 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000683 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000684 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000685 return ERRORTOKEN;
686 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000688 else
689 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000690 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000691 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000692 *p_end = tok->cur;
693 return STRING;
694 }
695
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696 /* Line continuation */
697 if (c == '\\') {
698 c = tok_nextc(tok);
699 if (c != '\n') {
700 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000701 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702 return ERRORTOKEN;
703 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704 goto again; /* Read next line */
705 }
706
Guido van Rossumfbab9051991-10-20 20:25:03 +0000707 /* Check for two-character token */
708 {
709 int c2 = tok_nextc(tok);
710 int token = tok_2char(c, c2);
711 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000712 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000713 *p_end = tok->cur;
714 return token;
715 }
716 tok_backup(tok, c2);
717 }
718
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000719 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000720 switch (c) {
721 case '(':
722 case '[':
723 case '{':
724 tok->level++;
725 break;
726 case ')':
727 case ']':
728 case '}':
729 tok->level--;
730 break;
731 }
732
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000733 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000734 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000735 *p_end = tok->cur;
736 return tok_1char(c);
737}
738
739
740#ifdef DEBUG
741
742void
743tok_dump(type, start, end)
744 int type;
745 char *start, *end;
746{
747 printf("%s", tok_name[type]);
748 if (type == NAME || type == NUMBER || type == STRING || type == OP)
749 printf("(%.*s)", (int)(end - start), start);
750}
751
752#endif