blob: 3bf0fee0c70f20fa66b215cbdfeac6d1286e9ad3 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Tokenizer implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "pgenheaders.h"
35
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000036#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "tokenizer.h"
39#include "errcode.h"
40
Guido van Rossumf4b1a641994-08-29 12:43:07 +000041extern char *my_readline PROTO((char *));
42/* Return malloc'ed string including trailing \n;
43 empty malloc'ed string for EOF;
44 NULL if interrupted */
45
Guido van Rossum4fe87291992-02-26 15:24:44 +000046/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048
Guido van Rossum3f5da241990-12-20 15:06:42 +000049/* Forward */
50static struct tok_state *tok_new PROTO((void));
51static int tok_nextc PROTO((struct tok_state *tok));
52static void tok_backup PROTO((struct tok_state *tok, int c));
53
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054/* Token names */
55
56char *tok_name[] = {
57 "ENDMARKER",
58 "NAME",
59 "NUMBER",
60 "STRING",
61 "NEWLINE",
62 "INDENT",
63 "DEDENT",
64 "LPAR",
65 "RPAR",
66 "LSQB",
67 "RSQB",
68 "COLON",
69 "COMMA",
70 "SEMI",
71 "PLUS",
72 "MINUS",
73 "STAR",
74 "SLASH",
75 "VBAR",
76 "AMPER",
77 "LESS",
78 "GREATER",
79 "EQUAL",
80 "DOT",
81 "PERCENT",
82 "BACKQUOTE",
83 "LBRACE",
84 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000085 "EQEQUAL",
86 "NOTEQUAL",
87 "LESSEQUAL",
88 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000089 "TILDE",
90 "CIRCUMFLEX",
91 "LEFTSHIFT",
92 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +000093 "DOUBLESTAR",
Guido van Rossumfbab9051991-10-20 20:25:03 +000094 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095 "OP",
96 "<ERRORTOKEN>",
97 "<N_TOKENS>"
98};
99
100
101/* Create and initialize a new tok_state structure */
102
103static struct tok_state *
104tok_new()
105{
106 struct tok_state *tok = NEW(struct tok_state, 1);
107 if (tok == NULL)
108 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000109 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000110 tok->done = E_OK;
111 tok->fp = NULL;
112 tok->tabsize = TABSIZE;
113 tok->indent = 0;
114 tok->indstack[0] = 0;
115 tok->atbol = 1;
116 tok->pendin = 0;
117 tok->prompt = tok->nextprompt = NULL;
118 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000119 tok->level = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120 return tok;
121}
122
123
124/* Set up tokenizer for string */
125
126struct tok_state *
127tok_setups(str)
128 char *str;
129{
130 struct tok_state *tok = tok_new();
131 if (tok == NULL)
132 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000133 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134 return tok;
135}
136
137
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000138/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000139
140struct tok_state *
141tok_setupf(fp, ps1, ps2)
142 FILE *fp;
143 char *ps1, *ps2;
144{
145 struct tok_state *tok = tok_new();
146 if (tok == NULL)
147 return NULL;
148 if ((tok->buf = NEW(char, BUFSIZ)) == NULL) {
149 DEL(tok);
150 return NULL;
151 }
152 tok->cur = tok->inp = tok->buf;
153 tok->end = tok->buf + BUFSIZ;
154 tok->fp = fp;
155 tok->prompt = ps1;
156 tok->nextprompt = ps2;
157 return tok;
158}
159
160
161/* Free a tok_state structure */
162
163void
164tok_free(tok)
165 struct tok_state *tok;
166{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000167 if (tok->fp != NULL && tok->buf != NULL)
168 DEL(tok->buf);
169 DEL(tok);
170}
171
172
173/* Get next char, updating state; error code goes into tok->done */
174
175static int
176tok_nextc(tok)
177 register struct tok_state *tok;
178{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000180 if (tok->cur != tok->inp) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000181 return *tok->cur++; /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000182 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000183 if (tok->done != E_OK)
184 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000185 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000186 char *end = strchr(tok->inp, '\n');
187 if (end != NULL)
188 end++;
189 else {
190 end = strchr(tok->inp, '\0');
191 if (end == tok->inp) {
192 tok->done = E_EOF;
193 return EOF;
194 }
195 }
196 if (tok->start == NULL)
197 tok->buf = tok->cur;
198 tok->lineno++;
199 tok->inp = end;
200 return *tok->cur++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000202 if (tok->prompt != NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000203 char *new = my_readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000204 if (tok->nextprompt != NULL)
205 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000206 if (new == NULL)
207 tok->done = E_INTR;
208 else if (*new == '\0') {
209 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 tok->done = E_EOF;
211 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000212 else if (tok->start != NULL) {
213 int start = tok->start - tok->buf;
214 int oldlen = tok->cur - tok->buf;
215 int newlen = oldlen + strlen(new);
216 char *buf = realloc(tok->buf, newlen+1);
217 tok->lineno++;
218 if (buf == NULL) {
219 free(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000220 tok->buf = NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000221 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000222 tok->done = E_NOMEM;
223 return EOF;
224 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 tok->buf = buf;
226 tok->cur = tok->buf + oldlen;
227 strcpy(tok->buf + oldlen, new);
228 free(new);
229 tok->inp = tok->buf + newlen;
230 tok->end = tok->inp + 1;
231 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000232 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000233 else {
234 tok->lineno++;
235 if (tok->buf != NULL)
236 free(tok->buf);
237 tok->buf = new;
238 tok->cur = tok->buf;
239 tok->inp = strchr(tok->buf, '\0');
240 tok->end = tok->inp + 1;
241 }
242 }
243 else {
244 int done = 0;
245 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000246 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000247 if (tok->start == NULL) {
248 if (tok->buf == NULL) {
249 tok->buf = NEW(char, BUFSIZ);
250 if (tok->buf == NULL) {
251 tok->done = E_NOMEM;
252 return EOF;
253 }
254 tok->end = tok->buf + BUFSIZ;
255 }
256 if (fgets(tok->buf, (int)(tok->end - tok->buf),
257 tok->fp) == NULL) {
258 tok->done = E_EOF;
259 done = 1;
260 }
261 else {
262 tok->done = E_OK;
263 tok->inp = strchr(tok->buf, '\0');
264 done = tok->inp[-1] == '\n';
265 }
266 }
267 else {
268 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000269 if (feof(tok->fp)) {
270 tok->done = E_EOF;
271 done = 1;
272 }
273 else
274 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000275 }
276 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000277 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000278 while (!done) {
279 int curstart = tok->start == NULL ? -1 :
280 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000281 int curvalid = tok->inp - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000282 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000283 char *newbuf = tok->buf;
284 RESIZE(newbuf, char, newsize);
285 if (newbuf == NULL) {
286 tok->done = E_NOMEM;
287 tok->cur = tok->inp;
288 return EOF;
289 }
290 tok->buf = newbuf;
291 tok->inp = tok->buf + curvalid;
292 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000293 tok->start = curstart < 0 ? NULL :
294 tok->buf + curstart;
295 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000296 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000297 tok->fp) == NULL) {
298 /* Last line does not end in \n,
299 fake one */
300 strcpy(tok->inp, "\n");
301 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000302 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000303 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000304 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000305 tok->cur = tok->buf + cur;
Guido van Rossum2d45be11997-04-11 19:16:25 +0000306#ifndef macintosh
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000307 /* replace "\r\n" with "\n" */
Guido van Rossum2d45be11997-04-11 19:16:25 +0000308 /* For Mac we leave the \r, giving a syntax error */
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000309 pt = tok->inp - 2;
310 if (pt >= tok->buf && *pt == '\r') {
311 *pt++ = '\n';
312 *pt = '\0';
313 tok->inp = pt;
314 }
Guido van Rossum2d45be11997-04-11 19:16:25 +0000315#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000316 }
317 if (tok->done != E_OK) {
318 if (tok->prompt != NULL)
319 fprintf(stderr, "\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000320 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321 return EOF;
322 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000323 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000324 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000325}
326
327
328/* Back-up one character */
329
330static void
331tok_backup(tok, c)
332 register struct tok_state *tok;
333 register int c;
334{
335 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000336 if (--tok->cur < tok->buf)
337 fatal("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 if (*tok->cur != c)
339 *tok->cur = c;
340 }
341}
342
343
344/* Return the token corresponding to a single character */
345
346int
347tok_1char(c)
348 int c;
349{
350 switch (c) {
351 case '(': return LPAR;
352 case ')': return RPAR;
353 case '[': return LSQB;
354 case ']': return RSQB;
355 case ':': return COLON;
356 case ',': return COMMA;
357 case ';': return SEMI;
358 case '+': return PLUS;
359 case '-': return MINUS;
360 case '*': return STAR;
361 case '/': return SLASH;
362 case '|': return VBAR;
363 case '&': return AMPER;
364 case '<': return LESS;
365 case '>': return GREATER;
366 case '=': return EQUAL;
367 case '.': return DOT;
368 case '%': return PERCENT;
369 case '`': return BACKQUOTE;
370 case '{': return LBRACE;
371 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000372 case '^': return CIRCUMFLEX;
373 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374 default: return OP;
375 }
376}
377
378
Guido van Rossumfbab9051991-10-20 20:25:03 +0000379int
380tok_2char(c1, c2)
381 int c1, c2;
382{
383 switch (c1) {
384 case '=':
385 switch (c2) {
386 case '=': return EQEQUAL;
387 }
388 break;
389 case '!':
390 switch (c2) {
391 case '=': return NOTEQUAL;
392 }
393 break;
394 case '<':
395 switch (c2) {
396 case '>': return NOTEQUAL;
397 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000398 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000399 }
400 break;
401 case '>':
402 switch (c2) {
403 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000404 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000405 }
406 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000407 case '*':
408 switch (c2) {
409 case '*': return DOUBLESTAR;
410 }
411 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000412 }
413 return OP;
414}
415
416
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417/* Get next token, after space stripping etc. */
418
419int
420tok_get(tok, p_start, p_end)
421 register struct tok_state *tok; /* In/out: tokenizer state */
422 char **p_start, **p_end; /* Out: point to start/end of token */
423{
424 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000425 int blankline;
426
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000427 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000428 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000429 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000430 blankline = 0;
431
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000432 /* Get indentation level */
433 if (tok->atbol) {
434 register int col = 0;
435 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436 for (;;) {
437 c = tok_nextc(tok);
438 if (c == ' ')
439 col++;
440 else if (c == '\t')
441 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum94d32b11995-07-07 22:27:27 +0000442 else if (c == '\014') /* Control-L (formfeed) */
443 col = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444 else
445 break;
446 }
447 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000448 if (c == '#' || c == '\n') {
449 /* Lines with only whitespace and/or comments
450 shouldn't affect the indentation and are
451 not passed to the parser as NEWLINE tokens,
452 except *totally* empty lines in interactive
453 mode, which signal the end of a command group. */
454 if (col == 0 && c == '\n' && tok->prompt != NULL)
455 blankline = 0; /* Let it through */
456 else
457 blankline = 1; /* Ignore completely */
458 /* We can't jump back right here since we still
459 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000460 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000461 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000462 if (col == tok->indstack[tok->indent]) {
463 /* No change */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000465 else if (col > tok->indstack[tok->indent]) {
466 /* Indent -- always one */
467 if (tok->indent+1 >= MAXINDENT) {
468 fprintf(stderr, "excessive indent\n");
469 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000470 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000471 return ERRORTOKEN;
472 }
473 tok->pendin++;
474 tok->indstack[++tok->indent] = col;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000475 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000476 else /* col < tok->indstack[tok->indent] */ {
477 /* Dedent -- any number, must be consistent */
478 while (tok->indent > 0 &&
479 col < tok->indstack[tok->indent]) {
480 tok->indent--;
481 tok->pendin--;
482 }
483 if (col != tok->indstack[tok->indent]) {
484 fprintf(stderr, "inconsistent dedent\n");
485 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000486 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000487 return ERRORTOKEN;
488 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000489 }
490 }
491 }
492
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000493 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000494
495 /* Return pending indents/dedents */
496 if (tok->pendin != 0) {
497 if (tok->pendin < 0) {
498 tok->pendin++;
499 return DEDENT;
500 }
501 else {
502 tok->pendin--;
503 return INDENT;
504 }
505 }
506
507 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000508 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509 /* Skip spaces */
510 do {
511 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000512 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000513
514 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000515 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516
517 /* Skip comment */
518 if (c == '#') {
519 /* Hack to allow overriding the tabsize in the file.
520 This is also recognized by vi, when it occurs near the
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000521 beginning or end of the file. (Will vi never die...?)
522 For Python it must be at the beginning of the file! */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000523 /* XXX The real vi syntax is actually different :-( */
524 /* XXX Should recognize Emacs syntax, too */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000525 int x;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000526 if (sscanf(tok->cur,
Guido van Rossum3f5da241990-12-20 15:06:42 +0000527 " vi:set tabsize=%d:", &x) == 1 &&
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528 x >= 1 && x <= 40) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000529 /* fprintf(stderr, "# vi:set tabsize=%d:\n", x); */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000530 tok->tabsize = x;
531 }
532 do {
533 c = tok_nextc(tok);
534 } while (c != EOF && c != '\n');
535 }
536
537 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000538 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000539 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000540 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000541
542 /* Identifier (most frequent token!) */
543 if (isalpha(c) || c == '_') {
Guido van Rossum24dacb31997-04-06 03:46:20 +0000544 c = tok_nextc(tok);
545 if (c == '"' || c == '\'')
546 goto letter_quote;
547 while (isalnum(c) || c == '_') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000548 c = tok_nextc(tok);
Guido van Rossum24dacb31997-04-06 03:46:20 +0000549 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000550 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000551 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000552 *p_end = tok->cur;
553 return NAME;
554 }
555
556 /* Newline */
557 if (c == '\n') {
558 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000559 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000560 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000561 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
563 return NEWLINE;
564 }
565
Guido van Rossum2d45be11997-04-11 19:16:25 +0000566#ifdef macintosh
567 if (c == '\r') {
568 fprintf(stderr, "File contains \\r characters (incorrect line endings?)\n");
569 tok->done = E_TOKEN;
570 tok->cur = tok->inp;
571 return ERRORTOKEN;
572 }
573#endif
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000574 /* Period or number starting with period? */
575 if (c == '.') {
576 c = tok_nextc(tok);
577 if (isdigit(c)) {
578 goto fraction;
579 }
580 else {
581 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000582 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000583 *p_end = tok->cur;
584 return DOT;
585 }
586 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000587
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 /* Number */
589 if (isdigit(c)) {
590 if (c == '0') {
591 /* Hex or octal */
592 c = tok_nextc(tok);
593 if (c == '.')
594 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000595#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000596 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000597 goto imaginary;
598#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000599 if (c == 'x' || c == 'X') {
600 /* Hex */
601 do {
602 c = tok_nextc(tok);
603 } while (isxdigit(c));
604 }
605 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000606 /* XXX This is broken! E.g.,
607 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608 /* Octal; c is first char of it */
609 /* There's no 'isoctdigit' macro, sigh */
610 while ('0' <= c && c < '8') {
611 c = tok_nextc(tok);
612 }
613 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000614 if (c == 'l' || c == 'L')
615 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000616 }
617 else {
618 /* Decimal */
619 do {
620 c = tok_nextc(tok);
621 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000622 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000624 else {
625 /* Accept floating point numbers.
626 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000627 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000628 if (c == '.') {
629 fraction:
630 /* Fraction */
631 do {
632 c = tok_nextc(tok);
633 } while (isdigit(c));
634 }
635 if (c == 'e' || c == 'E') {
636 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000637 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000638 if (c == '+' || c == '-')
639 c = tok_nextc(tok);
640 while (isdigit(c)) {
641 c = tok_nextc(tok);
642 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000644#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000645 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000646 /* Imaginary part */
647 imaginary:
648 c = tok_nextc(tok);
649#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 }
651 }
652 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000653 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000654 *p_end = tok->cur;
655 return NUMBER;
656 }
Guido van Rossum24dacb31997-04-06 03:46:20 +0000657
658 letter_quote:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000659 /* String */
660 if (c == '\'' || c == '"') {
Guido van Rossum24dacb31997-04-06 03:46:20 +0000661 char *quote2 = tok->cur+1;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000662 int quote = c;
663 int triple = 0;
664 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 for (;;) {
666 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000667 if (c == '\n') {
668 if (!triple) {
669 tok->done = E_TOKEN;
670 tok_backup(tok, c);
671 return ERRORTOKEN;
672 }
673 tripcount = 0;
674 }
675 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000676 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000677 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000678 return ERRORTOKEN;
679 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000680 else if (c == quote) {
681 tripcount++;
Guido van Rossum24dacb31997-04-06 03:46:20 +0000682 if (tok->cur == quote2) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000683 c = tok_nextc(tok);
684 if (c == quote) {
685 triple = 1;
686 tripcount = 0;
687 continue;
688 }
689 tok_backup(tok, c);
690 }
691 if (!triple || tripcount == 3)
692 break;
693 }
694 else if (c == '\\') {
695 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000697 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000699 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700 return ERRORTOKEN;
701 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000703 else
704 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000705 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000706 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000707 *p_end = tok->cur;
708 return STRING;
709 }
710
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000711 /* Line continuation */
712 if (c == '\\') {
713 c = tok_nextc(tok);
714 if (c != '\n') {
715 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000716 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717 return ERRORTOKEN;
718 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719 goto again; /* Read next line */
720 }
721
Guido van Rossumfbab9051991-10-20 20:25:03 +0000722 /* Check for two-character token */
723 {
724 int c2 = tok_nextc(tok);
725 int token = tok_2char(c, c2);
726 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000727 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000728 *p_end = tok->cur;
729 return token;
730 }
731 tok_backup(tok, c2);
732 }
733
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000734 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000735 switch (c) {
736 case '(':
737 case '[':
738 case '{':
739 tok->level++;
740 break;
741 case ')':
742 case ']':
743 case '}':
744 tok->level--;
745 break;
746 }
747
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000749 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000750 *p_end = tok->cur;
751 return tok_1char(c);
752}
753
754
Guido van Rossum408027e1996-12-30 16:17:54 +0000755#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756
757void
758tok_dump(type, start, end)
759 int type;
760 char *start, *end;
761{
762 printf("%s", tok_name[type]);
763 if (type == NAME || type == NUMBER || type == STRING || type == OP)
764 printf("(%.*s)", (int)(end - start), start);
765}
766
767#endif