blob: 8a726e0ea3f13fc2d3469f82bd89937dd52c9bfd [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Tokenizer implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "pgenheaders.h"
35
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000036#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "tokenizer.h"
39#include "errcode.h"
40
Guido van Rossumf4b1a641994-08-29 12:43:07 +000041extern char *my_readline PROTO((char *));
42/* Return malloc'ed string including trailing \n;
43 empty malloc'ed string for EOF;
44 NULL if interrupted */
45
Guido van Rossum4fe87291992-02-26 15:24:44 +000046/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048
Guido van Rossum3f5da241990-12-20 15:06:42 +000049/* Forward */
50static struct tok_state *tok_new PROTO((void));
51static int tok_nextc PROTO((struct tok_state *tok));
52static void tok_backup PROTO((struct tok_state *tok, int c));
53
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054/* Token names */
55
56char *tok_name[] = {
57 "ENDMARKER",
58 "NAME",
59 "NUMBER",
60 "STRING",
61 "NEWLINE",
62 "INDENT",
63 "DEDENT",
64 "LPAR",
65 "RPAR",
66 "LSQB",
67 "RSQB",
68 "COLON",
69 "COMMA",
70 "SEMI",
71 "PLUS",
72 "MINUS",
73 "STAR",
74 "SLASH",
75 "VBAR",
76 "AMPER",
77 "LESS",
78 "GREATER",
79 "EQUAL",
80 "DOT",
81 "PERCENT",
82 "BACKQUOTE",
83 "LBRACE",
84 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000085 "EQEQUAL",
86 "NOTEQUAL",
87 "LESSEQUAL",
88 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000089 "TILDE",
90 "CIRCUMFLEX",
91 "LEFTSHIFT",
92 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +000093 "DOUBLESTAR",
Guido van Rossumfbab9051991-10-20 20:25:03 +000094 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095 "OP",
96 "<ERRORTOKEN>",
97 "<N_TOKENS>"
98};
99
100
101/* Create and initialize a new tok_state structure */
102
103static struct tok_state *
104tok_new()
105{
106 struct tok_state *tok = NEW(struct tok_state, 1);
107 if (tok == NULL)
108 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000109 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000110 tok->done = E_OK;
111 tok->fp = NULL;
112 tok->tabsize = TABSIZE;
113 tok->indent = 0;
114 tok->indstack[0] = 0;
115 tok->atbol = 1;
116 tok->pendin = 0;
117 tok->prompt = tok->nextprompt = NULL;
118 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000119 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 return tok;
121}
122
123
124/* Set up tokenizer for string */
125
126struct tok_state *
127tok_setups(str)
128 char *str;
129{
130 struct tok_state *tok = tok_new();
131 if (tok == NULL)
132 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000133 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134 return tok;
135}
136
137
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000138/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000139
140struct tok_state *
141tok_setupf(fp, ps1, ps2)
142 FILE *fp;
143 char *ps1, *ps2;
144{
145 struct tok_state *tok = tok_new();
146 if (tok == NULL)
147 return NULL;
148 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
149 DEL(tok);
150 return NULL;
151 }
152 tok->cur = tok->inp = tok->buf;
153 tok->end = tok->buf + BUFSIZ;
154 tok->fp = fp;
155 tok->prompt = ps1;
156 tok->nextprompt = ps2;
157 return tok;
158}
159
160
161/* Free a tok_state structure */
162
163void
164tok_free(tok)
165 struct tok_state *tok;
166{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000167 if (tok->fp != NULL && tok->buf != NULL)
168 DEL(tok->buf);
169 DEL(tok);
170}
171
172
173/* Get next char, updating state; error code goes into tok->done */
174
175static int
176tok_nextc(tok)
177 register struct tok_state *tok;
178{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000180 if (tok->cur != tok->inp) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000181 return *tok->cur++; /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000182 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000183 if (tok->done != E_OK)
184 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000185 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000186 char *end = strchr(tok->inp, '\n');
187 if (end != NULL)
188 end++;
189 else {
190 end = strchr(tok->inp, '\0');
191 if (end == tok->inp) {
192 tok->done = E_EOF;
193 return EOF;
194 }
195 }
196 if (tok->start == NULL)
197 tok->buf = tok->cur;
198 tok->lineno++;
199 tok->inp = end;
200 return *tok->cur++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000202 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000203 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000204 if (tok->nextprompt != NULL)
205 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000206 if (new == NULL)
207 tok->done = E_INTR;
208 else if (*new == '\0') {
209 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 tok->done = E_EOF;
211 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000212 else if (tok->start != NULL) {
213 int start = tok->start - tok->buf;
214 int oldlen = tok->cur - tok->buf;
215 int newlen = oldlen + strlen(new);
216 char *buf = realloc(tok->buf, newlen+1);
217 tok->lineno++;
218 if (buf == NULL) {
219 free(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000220 tok->buf = NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000221 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000222 tok->done = E_NOMEM;
223 return EOF;
224 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 tok->buf = buf;
226 tok->cur = tok->buf + oldlen;
227 strcpy(tok->buf + oldlen, new);
228 free(new);
229 tok->inp = tok->buf + newlen;
230 tok->end = tok->inp + 1;
231 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000232 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000233 else {
234 tok->lineno++;
235 if (tok->buf != NULL)
236 free(tok->buf);
237 tok->buf = new;
238 tok->cur = tok->buf;
239 tok->inp = strchr(tok->buf, '\0');
240 tok->end = tok->inp + 1;
241 }
242 }
243 else {
244 int done = 0;
245 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000246 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000247 if (tok->start == NULL) {
248 if (tok->buf == NULL) {
249 tok->buf = NEW(char, BUFSIZ);
250 if (tok->buf == NULL) {
251 tok->done = E_NOMEM;
252 return EOF;
253 }
254 tok->end = tok->buf + BUFSIZ;
255 }
256 if (fgets(tok->buf, (int)(tok->end - tok->buf),
257 tok->fp) == NULL) {
258 tok->done = E_EOF;
259 done = 1;
260 }
261 else {
262 tok->done = E_OK;
263 tok->inp = strchr(tok->buf, '\0');
264 done = tok->inp[-1] == '\n';
265 }
266 }
267 else {
268 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000269 if (feof(tok->fp)) {
270 tok->done = E_EOF;
271 done = 1;
272 }
273 else
274 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000275 }
276 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000277 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000278 while (!done) {
279 int curstart = tok->start == NULL ? -1 :
280 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000281 int curvalid = tok->inp - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000282 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000283 char *newbuf = tok->buf;
284 RESIZE(newbuf, char, newsize);
285 if (newbuf == NULL) {
286 tok->done = E_NOMEM;
287 tok->cur = tok->inp;
288 return EOF;
289 }
290 tok->buf = newbuf;
291 tok->inp = tok->buf + curvalid;
292 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000293 tok->start = curstart < 0 ? NULL :
294 tok->buf + curstart;
295 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000296 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000297 tok->fp) == NULL) {
298 /* Last line does not end in \n,
299 fake one */
300 strcpy(tok->inp, "\n");
301 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000302 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000303 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000304 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000305 tok->cur = tok->buf + cur;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000306 /* replace "\r\n" with "\n" */
307 pt = tok->inp - 2;
308 if (pt >= tok->buf && *pt == '\r') {
309 *pt++ = '\n';
310 *pt = '\0';
311 tok->inp = pt;
312 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000313 }
314 if (tok->done != E_OK) {
315 if (tok->prompt != NULL)
316 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000317 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318 return EOF;
319 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000320 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000321 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322}
323
324
325/* Back-up one character */
326
327static void
328tok_backup(tok, c)
329 register struct tok_state *tok;
330 register int c;
331{
332 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000333 if (--tok->cur < tok->buf)
334 fatal("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000335 if (*tok->cur != c)
336 *tok->cur = c;
337 }
338}
339
340
341/* Return the token corresponding to a single character */
342
343int
344tok_1char(c)
345 int c;
346{
347 switch (c) {
348 case '(': return LPAR;
349 case ')': return RPAR;
350 case '[': return LSQB;
351 case ']': return RSQB;
352 case ':': return COLON;
353 case ',': return COMMA;
354 case ';': return SEMI;
355 case '+': return PLUS;
356 case '-': return MINUS;
357 case '*': return STAR;
358 case '/': return SLASH;
359 case '|': return VBAR;
360 case '&': return AMPER;
361 case '<': return LESS;
362 case '>': return GREATER;
363 case '=': return EQUAL;
364 case '.': return DOT;
365 case '%': return PERCENT;
366 case '`': return BACKQUOTE;
367 case '{': return LBRACE;
368 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000369 case '^': return CIRCUMFLEX;
370 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371 default: return OP;
372 }
373}
374
375
Guido van Rossumfbab9051991-10-20 20:25:03 +0000376int
377tok_2char(c1, c2)
378 int c1, c2;
379{
380 switch (c1) {
381 case '=':
382 switch (c2) {
383 case '=': return EQEQUAL;
384 }
385 break;
386 case '!':
387 switch (c2) {
388 case '=': return NOTEQUAL;
389 }
390 break;
391 case '<':
392 switch (c2) {
393 case '>': return NOTEQUAL;
394 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000395 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000396 }
397 break;
398 case '>':
399 switch (c2) {
400 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000401 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000402 }
403 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000404 case '*':
405 switch (c2) {
406 case '*': return DOUBLESTAR;
407 }
408 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000409 }
410 return OP;
411}
412
413
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000414/* Get next token, after space stripping etc. */
415
416int
417tok_get(tok, p_start, p_end)
418 register struct tok_state *tok; /* In/out: tokenizer state */
419 char **p_start, **p_end; /* Out: point to start/end of token */
420{
421 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000422 int blankline;
423
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000424 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000425 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000426 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000427 blankline = 0;
428
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000429 /* Get indentation level */
430 if (tok->atbol) {
431 register int col = 0;
432 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000433 for (;;) {
434 c = tok_nextc(tok);
435 if (c == ' ')
436 col++;
437 else if (c == '\t')
438 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum94d32b11995-07-07 22:27:27 +0000439 else if (c == '\014') /* Control-L (formfeed) */
440 col = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000441 else
442 break;
443 }
444 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000445 if (c == '#' || c == '\n') {
446 /* Lines with only whitespace and/or comments
447 shouldn't affect the indentation and are
448 not passed to the parser as NEWLINE tokens,
449 except *totally* empty lines in interactive
450 mode, which signal the end of a command group. */
451 if (col == 0 && c == '\n' && tok->prompt != NULL)
452 blankline = 0; /* Let it through */
453 else
454 blankline = 1; /* Ignore completely */
455 /* We can't jump back right here since we still
456 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000457 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000458 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000459 if (col == tok->indstack[tok->indent]) {
460 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000461 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000462 else if (col > tok->indstack[tok->indent]) {
463 /* Indent -- always one */
464 if (tok->indent+1 >= MAXINDENT) {
465 fprintf(stderr, "excessive indent\n");
466 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000467 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000468 return ERRORTOKEN;
469 }
470 tok->pendin++;
471 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000472 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000473 else /* col < tok->indstack[tok->indent] */ {
474 /* Dedent -- any number, must be consistent */
475 while (tok->indent > 0 &&
476 col < tok->indstack[tok->indent]) {
477 tok->indent--;
478 tok->pendin--;
479 }
480 if (col != tok->indstack[tok->indent]) {
481 fprintf(stderr, "inconsistent dedent\n");
482 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000483 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000484 return ERRORTOKEN;
485 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000486 }
487 }
488 }
489
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000490 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000491
492 /* Return pending indents/dedents */
493 if (tok->pendin != 0) {
494 if (tok->pendin < 0) {
495 tok->pendin++;
496 return DEDENT;
497 }
498 else {
499 tok->pendin--;
500 return INDENT;
501 }
502 }
503
504 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000505 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000506 /* Skip spaces */
507 do {
508 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000509 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510
511 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000512 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513
514 /* Skip comment */
515 if (c == '#') {
516 /* Hack to allow overriding the tabsize in the file.
517 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000518 beginning or end of the file. (Will vi never die...?)
519 For Python it must be at the beginning of the file! */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000520 /* XXX The real vi syntax is actually different :-( */
521 /* XXX Should recognize Emacs syntax, too */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522 int x;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000523 if (sscanf(tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000524 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000525 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000526 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000527 tok->tabsize = x;
528 }
529 do {
530 c = tok_nextc(tok);
531 } while (c != EOF && c != '\n');
532 }
533
534 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000535 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000536 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000537 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000538
539 /* Identifier (most frequent token!) */
540 if (isalpha(c) || c == '_') {
541 do {
542 c = tok_nextc(tok);
543 } while (isalnum(c) || c == '_');
544 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000545 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000546 *p_end = tok->cur;
547 return NAME;
548 }
549
550 /* Newline */
551 if (c == '\n') {
552 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000553 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000554 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000555 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
557 return NEWLINE;
558 }
559
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000560 /* Period or number starting with period? */
561 if (c == '.') {
562 c = tok_nextc(tok);
563 if (isdigit(c)) {
564 goto fraction;
565 }
566 else {
567 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000568 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000569 *p_end = tok->cur;
570 return DOT;
571 }
572 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000573
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000574 /* Number */
575 if (isdigit(c)) {
576 if (c == '0') {
577 /* Hex or octal */
578 c = tok_nextc(tok);
579 if (c == '.')
580 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000581#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000582 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000583 goto imaginary;
584#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 if (c == 'x' || c == 'X') {
586 /* Hex */
587 do {
588 c = tok_nextc(tok);
589 } while (isxdigit(c));
590 }
591 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000592 /* XXX This is broken! E.g.,
593 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 /* Octal; c is first char of it */
595 /* There's no 'isoctdigit' macro, sigh */
596 while ('0' <= c && c < '8') {
597 c = tok_nextc(tok);
598 }
599 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000600 if (c == 'l' || c == 'L')
601 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000602 }
603 else {
604 /* Decimal */
605 do {
606 c = tok_nextc(tok);
607 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000608 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000609 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000610 else {
611 /* Accept floating point numbers.
612 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000613 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000614 if (c == '.') {
615 fraction:
616 /* Fraction */
617 do {
618 c = tok_nextc(tok);
619 } while (isdigit(c));
620 }
621 if (c == 'e' || c == 'E') {
622 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000624 if (c == '+' || c == '-')
625 c = tok_nextc(tok);
626 while (isdigit(c)) {
627 c = tok_nextc(tok);
628 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000630#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000631 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000632 /* Imaginary part */
633 imaginary:
634 c = tok_nextc(tok);
635#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000636 }
637 }
638 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000639 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 *p_end = tok->cur;
641 return NUMBER;
642 }
643
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000644 /* String */
645 if (c == '\'' || c == '"') {
646 int quote = c;
647 int triple = 0;
648 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649 for (;;) {
650 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000651 if (c == '\n') {
652 if (!triple) {
653 tok->done = E_TOKEN;
654 tok_backup(tok, c);
655 return ERRORTOKEN;
656 }
657 tripcount = 0;
658 }
659 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000660 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000661 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000662 return ERRORTOKEN;
663 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000664 else if (c == quote) {
665 tripcount++;
666 if (tok->cur == tok->start+2) {
667 c = tok_nextc(tok);
668 if (c == quote) {
669 triple = 1;
670 tripcount = 0;
671 continue;
672 }
673 tok_backup(tok, c);
674 }
675 if (!triple || tripcount == 3)
676 break;
677 }
678 else if (c == '\\') {
679 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000680 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000681 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000682 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000683 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000684 return ERRORTOKEN;
685 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000686 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000687 else
688 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000690 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000691 *p_end = tok->cur;
692 return STRING;
693 }
694
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695 /* Line continuation */
696 if (c == '\\') {
697 c = tok_nextc(tok);
698 if (c != '\n') {
699 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000700 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701 return ERRORTOKEN;
702 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703 goto again; /* Read next line */
704 }
705
Guido van Rossumfbab9051991-10-20 20:25:03 +0000706 /* Check for two-character token */
707 {
708 int c2 = tok_nextc(tok);
709 int token = tok_2char(c, c2);
710 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000711 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000712 *p_end = tok->cur;
713 return token;
714 }
715 tok_backup(tok, c2);
716 }
717
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000718 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000719 switch (c) {
720 case '(':
721 case '[':
722 case '{':
723 tok->level++;
724 break;
725 case ')':
726 case ']':
727 case '}':
728 tok->level--;
729 break;
730 }
731
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000733 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000734 *p_end = tok->cur;
735 return tok_1char(c);
736}
737
738
739#ifdef DEBUG
740
741void
742tok_dump(type, start, end)
743 int type;
744 char *start, *end;
745{
746 printf("%s", tok_name[type]);
747 if (type == NAME || type == NUMBER || type == STRING || type == OP)
748 printf("(%.*s)", (int)(end - start), start);
749}
750
751#endif