blob: e4b058e08577ee78944dce7597848573130b870e [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Tokenizer implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "pgenheaders.h"
35
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000036#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "tokenizer.h"
39#include "errcode.h"
40
Guido van Rossum86bea461997-04-29 21:03:06 +000041extern char *PyOS_Readline Py_PROTO((char *));
Guido van Rossumf4b1a641994-08-29 12:43:07 +000042/* Return malloc'ed string including trailing \n;
43 empty malloc'ed string for EOF;
44 NULL if interrupted */
45
Guido van Rossum4fe87291992-02-26 15:24:44 +000046/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048
Guido van Rossumcf57d8b1998-01-19 22:07:46 +000049/* Convert a possibly signed character to a nonnegative int */
50/* XXX This assumes characters are 8 bits wide */
51#ifdef __CHAR_UNSIGNED__
52#define Py_CHARMASK(c) (c)
53#else
54#define Py_CHARMASK(c) ((c) & 0xff)
55#endif
56
Guido van Rossum3f5da241990-12-20 15:06:42 +000057/* Forward */
Guido van Rossum86bea461997-04-29 21:03:06 +000058static struct tok_state *tok_new Py_PROTO((void));
59static int tok_nextc Py_PROTO((struct tok_state *tok));
60static void tok_backup Py_PROTO((struct tok_state *tok, int c));
Guido van Rossum3f5da241990-12-20 15:06:42 +000061
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000062/* Token names */
63
Guido van Rossum86bea461997-04-29 21:03:06 +000064char *_PyParser_TokenNames[] = {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065 "ENDMARKER",
66 "NAME",
67 "NUMBER",
68 "STRING",
69 "NEWLINE",
70 "INDENT",
71 "DEDENT",
72 "LPAR",
73 "RPAR",
74 "LSQB",
75 "RSQB",
76 "COLON",
77 "COMMA",
78 "SEMI",
79 "PLUS",
80 "MINUS",
81 "STAR",
82 "SLASH",
83 "VBAR",
84 "AMPER",
85 "LESS",
86 "GREATER",
87 "EQUAL",
88 "DOT",
89 "PERCENT",
90 "BACKQUOTE",
91 "LBRACE",
92 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000093 "EQEQUAL",
94 "NOTEQUAL",
95 "LESSEQUAL",
96 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000097 "TILDE",
98 "CIRCUMFLEX",
99 "LEFTSHIFT",
100 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +0000101 "DOUBLESTAR",
Guido van Rossumfbab9051991-10-20 20:25:03 +0000102 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103 "OP",
104 "<ERRORTOKEN>",
105 "<N_TOKENS>"
106};
107
108
109/* Create and initialize a new tok_state structure */
110
111static struct tok_state *
112tok_new()
113{
Guido van Rossum86bea461997-04-29 21:03:06 +0000114 struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115 if (tok == NULL)
116 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000117 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000118 tok->done = E_OK;
119 tok->fp = NULL;
120 tok->tabsize = TABSIZE;
121 tok->indent = 0;
122 tok->indstack[0] = 0;
123 tok->atbol = 1;
124 tok->pendin = 0;
125 tok->prompt = tok->nextprompt = NULL;
126 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000127 tok->level = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000128 tok->filename = NULL;
129 tok->altwarning = 0;
130 tok->alterror = 0;
131 tok->alttabsize = 1;
132 tok->altindstack[0] = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133 return tok;
134}
135
136
137/* Set up tokenizer for string */
138
139struct tok_state *
Guido van Rossum86bea461997-04-29 21:03:06 +0000140PyTokenizer_FromString(str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000141 char *str;
142{
143 struct tok_state *tok = tok_new();
144 if (tok == NULL)
145 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000146 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000147 return tok;
148}
149
150
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000151/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152
153struct tok_state *
Guido van Rossum86bea461997-04-29 21:03:06 +0000154PyTokenizer_FromFile(fp, ps1, ps2)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155 FILE *fp;
156 char *ps1, *ps2;
157{
158 struct tok_state *tok = tok_new();
159 if (tok == NULL)
160 return NULL;
Guido van Rossum86bea461997-04-29 21:03:06 +0000161 if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
162 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000163 return NULL;
164 }
165 tok->cur = tok->inp = tok->buf;
166 tok->end = tok->buf + BUFSIZ;
167 tok->fp = fp;
168 tok->prompt = ps1;
169 tok->nextprompt = ps2;
170 return tok;
171}
172
173
174/* Free a tok_state structure */
175
176void
Guido van Rossum86bea461997-04-29 21:03:06 +0000177PyTokenizer_Free(tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000178 struct tok_state *tok;
179{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000180 if (tok->fp != NULL && tok->buf != NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000181 PyMem_DEL(tok->buf);
182 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000183}
184
185
186/* Get next char, updating state; error code goes into tok->done */
187
188static int
189tok_nextc(tok)
190 register struct tok_state *tok;
191{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000193 if (tok->cur != tok->inp) {
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000194 return Py_CHARMASK(*tok->cur++); /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000195 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000196 if (tok->done != E_OK)
197 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000199 char *end = strchr(tok->inp, '\n');
200 if (end != NULL)
201 end++;
202 else {
203 end = strchr(tok->inp, '\0');
204 if (end == tok->inp) {
205 tok->done = E_EOF;
206 return EOF;
207 }
208 }
209 if (tok->start == NULL)
210 tok->buf = tok->cur;
211 tok->lineno++;
212 tok->inp = end;
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000213 return Py_CHARMASK(*tok->cur++);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000214 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 if (tok->prompt != NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000216 char *new = PyOS_Readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000217 if (tok->nextprompt != NULL)
218 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000219 if (new == NULL)
220 tok->done = E_INTR;
221 else if (*new == '\0') {
222 free(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 tok->done = E_EOF;
224 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 else if (tok->start != NULL) {
226 int start = tok->start - tok->buf;
227 int oldlen = tok->cur - tok->buf;
228 int newlen = oldlen + strlen(new);
229 char *buf = realloc(tok->buf, newlen+1);
230 tok->lineno++;
231 if (buf == NULL) {
232 free(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000233 tok->buf = NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000234 free(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000235 tok->done = E_NOMEM;
236 return EOF;
237 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000238 tok->buf = buf;
239 tok->cur = tok->buf + oldlen;
240 strcpy(tok->buf + oldlen, new);
241 free(new);
242 tok->inp = tok->buf + newlen;
243 tok->end = tok->inp + 1;
244 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000245 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000246 else {
247 tok->lineno++;
248 if (tok->buf != NULL)
249 free(tok->buf);
250 tok->buf = new;
251 tok->cur = tok->buf;
252 tok->inp = strchr(tok->buf, '\0');
253 tok->end = tok->inp + 1;
254 }
255 }
256 else {
257 int done = 0;
258 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000259 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000260 if (tok->start == NULL) {
261 if (tok->buf == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000262 tok->buf = PyMem_NEW(char, BUFSIZ);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000263 if (tok->buf == NULL) {
264 tok->done = E_NOMEM;
265 return EOF;
266 }
267 tok->end = tok->buf + BUFSIZ;
268 }
269 if (fgets(tok->buf, (int)(tok->end - tok->buf),
270 tok->fp) == NULL) {
271 tok->done = E_EOF;
272 done = 1;
273 }
274 else {
275 tok->done = E_OK;
276 tok->inp = strchr(tok->buf, '\0');
277 done = tok->inp[-1] == '\n';
278 }
279 }
280 else {
281 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000282 if (feof(tok->fp)) {
283 tok->done = E_EOF;
284 done = 1;
285 }
286 else
287 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000288 }
289 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000290 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000291 while (!done) {
292 int curstart = tok->start == NULL ? -1 :
293 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000294 int curvalid = tok->inp - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000295 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000296 char *newbuf = tok->buf;
Guido van Rossum86bea461997-04-29 21:03:06 +0000297 PyMem_RESIZE(newbuf, char, newsize);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000298 if (newbuf == NULL) {
299 tok->done = E_NOMEM;
300 tok->cur = tok->inp;
301 return EOF;
302 }
303 tok->buf = newbuf;
304 tok->inp = tok->buf + curvalid;
305 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000306 tok->start = curstart < 0 ? NULL :
307 tok->buf + curstart;
308 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000309 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000310 tok->fp) == NULL) {
311 /* Last line does not end in \n,
312 fake one */
313 strcpy(tok->inp, "\n");
314 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000315 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000316 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000317 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000318 tok->cur = tok->buf + cur;
Guido van Rossum2d45be11997-04-11 19:16:25 +0000319#ifndef macintosh
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000320 /* replace "\r\n" with "\n" */
Guido van Rossum2d45be11997-04-11 19:16:25 +0000321 /* For Mac we leave the \r, giving a syntax error */
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000322 pt = tok->inp - 2;
323 if (pt >= tok->buf && *pt == '\r') {
324 *pt++ = '\n';
325 *pt = '\0';
326 tok->inp = pt;
327 }
Guido van Rossum2d45be11997-04-11 19:16:25 +0000328#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000329 }
330 if (tok->done != E_OK) {
331 if (tok->prompt != NULL)
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000332 PySys_WriteStderr("\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000333 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 return EOF;
335 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000337 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338}
339
340
341/* Back-up one character */
342
343static void
344tok_backup(tok, c)
345 register struct tok_state *tok;
346 register int c;
347{
348 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000349 if (--tok->cur < tok->buf)
Guido van Rossum86bea461997-04-29 21:03:06 +0000350 Py_FatalError("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351 if (*tok->cur != c)
352 *tok->cur = c;
353 }
354}
355
356
357/* Return the token corresponding to a single character */
358
359int
Guido van Rossum86bea461997-04-29 21:03:06 +0000360PyToken_OneChar(c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 int c;
362{
363 switch (c) {
364 case '(': return LPAR;
365 case ')': return RPAR;
366 case '[': return LSQB;
367 case ']': return RSQB;
368 case ':': return COLON;
369 case ',': return COMMA;
370 case ';': return SEMI;
371 case '+': return PLUS;
372 case '-': return MINUS;
373 case '*': return STAR;
374 case '/': return SLASH;
375 case '|': return VBAR;
376 case '&': return AMPER;
377 case '<': return LESS;
378 case '>': return GREATER;
379 case '=': return EQUAL;
380 case '.': return DOT;
381 case '%': return PERCENT;
382 case '`': return BACKQUOTE;
383 case '{': return LBRACE;
384 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000385 case '^': return CIRCUMFLEX;
386 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387 default: return OP;
388 }
389}
390
391
Guido van Rossumfbab9051991-10-20 20:25:03 +0000392int
Guido van Rossum86bea461997-04-29 21:03:06 +0000393PyToken_TwoChars(c1, c2)
Guido van Rossumfbab9051991-10-20 20:25:03 +0000394 int c1, c2;
395{
396 switch (c1) {
397 case '=':
398 switch (c2) {
399 case '=': return EQEQUAL;
400 }
401 break;
402 case '!':
403 switch (c2) {
404 case '=': return NOTEQUAL;
405 }
406 break;
407 case '<':
408 switch (c2) {
409 case '>': return NOTEQUAL;
410 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000411 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000412 }
413 break;
414 case '>':
415 switch (c2) {
416 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000417 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000418 }
419 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000420 case '*':
421 switch (c2) {
422 case '*': return DOUBLESTAR;
423 }
424 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000425 }
426 return OP;
427}
428
429
Guido van Rossum926f13a1998-04-09 21:38:06 +0000430static int
431indenterror(tok)
432 struct tok_state *tok;
433{
434 if (tok->alterror) {
435 tok->done = E_INDENT;
436 tok->cur = tok->inp;
437 return 1;
438 }
439 if (tok->altwarning) {
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000440 PySys_WriteStderr("%s: inconsistent tab/space usage\n",
Guido van Rossum926f13a1998-04-09 21:38:06 +0000441 tok->filename);
442 tok->altwarning = 0;
443 }
444 return 0;
445}
446
447
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000448/* Get next token, after space stripping etc. */
449
450int
Guido van Rossum86bea461997-04-29 21:03:06 +0000451PyTokenizer_Get(tok, p_start, p_end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000452 register struct tok_state *tok; /* In/out: tokenizer state */
453 char **p_start, **p_end; /* Out: point to start/end of token */
454{
455 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000456 int blankline;
457
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000458 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000459 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000460 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000461 blankline = 0;
462
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000463 /* Get indentation level */
464 if (tok->atbol) {
465 register int col = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000466 register int altcol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000468 for (;;) {
469 c = tok_nextc(tok);
470 if (c == ' ')
Guido van Rossum926f13a1998-04-09 21:38:06 +0000471 col++, altcol++;
472 else if (c == '\t') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000474 altcol = (altcol/tok->alttabsize + 1)
475 * tok->alttabsize;
476 }
Guido van Rossum94d32b11995-07-07 22:27:27 +0000477 else if (c == '\014') /* Control-L (formfeed) */
Guido van Rossum926f13a1998-04-09 21:38:06 +0000478 col = altcol = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479 else
480 break;
481 }
482 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000483 if (c == '#' || c == '\n') {
484 /* Lines with only whitespace and/or comments
485 shouldn't affect the indentation and are
486 not passed to the parser as NEWLINE tokens,
487 except *totally* empty lines in interactive
488 mode, which signal the end of a command group. */
489 if (col == 0 && c == '\n' && tok->prompt != NULL)
490 blankline = 0; /* Let it through */
491 else
492 blankline = 1; /* Ignore completely */
493 /* We can't jump back right here since we still
494 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000495 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000496 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000497 if (col == tok->indstack[tok->indent]) {
498 /* No change */
Guido van Rossum926f13a1998-04-09 21:38:06 +0000499 if (altcol != tok->altindstack[tok->indent]) {
500 if (indenterror(tok))
501 return ERRORTOKEN;
502 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000504 else if (col > tok->indstack[tok->indent]) {
505 /* Indent -- always one */
506 if (tok->indent+1 >= MAXINDENT) {
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000507 PySys_WriteStderr(
508 "excessive indent\n");
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000509 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000510 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000511 return ERRORTOKEN;
512 }
Guido van Rossum926f13a1998-04-09 21:38:06 +0000513 if (altcol <= tok->altindstack[tok->indent]) {
514 if (indenterror(tok))
515 return ERRORTOKEN;
516 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000517 tok->pendin++;
518 tok->indstack[++tok->indent] = col;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000519 tok->altindstack[tok->indent] = altcol;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000520 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000521 else /* col < tok->indstack[tok->indent] */ {
522 /* Dedent -- any number, must be consistent */
523 while (tok->indent > 0 &&
524 col < tok->indstack[tok->indent]) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000525 tok->pendin--;
Guido van Rossum54758fa1998-02-16 22:18:00 +0000526 tok->indent--;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000527 }
528 if (col != tok->indstack[tok->indent]) {
Guido van Rossumd5516bc1998-12-04 18:51:01 +0000529 PySys_WriteStderr(
Guido van Rossum86bea461997-04-29 21:03:06 +0000530 "inconsistent dedent\n");
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000531 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000532 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000533 return ERRORTOKEN;
534 }
Guido van Rossum926f13a1998-04-09 21:38:06 +0000535 if (altcol != tok->altindstack[tok->indent]) {
536 if (indenterror(tok))
537 return ERRORTOKEN;
538 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000539 }
540 }
541 }
542
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000543 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000544
545 /* Return pending indents/dedents */
546 if (tok->pendin != 0) {
547 if (tok->pendin < 0) {
548 tok->pendin++;
549 return DEDENT;
550 }
551 else {
552 tok->pendin--;
553 return INDENT;
554 }
555 }
556
557 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000558 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559 /* Skip spaces */
560 do {
561 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000562 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000563
564 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000565 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566
Guido van Rossumab5ca152000-03-31 00:52:27 +0000567 /* Skip comment, while looking for tab-setting magic */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 if (c == '#') {
Guido van Rossumab5ca152000-03-31 00:52:27 +0000569 static char *tabforms[] = {
570 "tab-width:", /* Emacs */
571 ":tabstop=", /* vim, full form */
572 ":ts=", /* vim, abbreviated form */
573 "set tabsize=", /* will vi never die? */
574 /* more templates can be added here to support other editors */
575 };
576 char cbuf[80];
577 char *tp, **cp;
578 tp = cbuf;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 do {
Guido van Rossumab5ca152000-03-31 00:52:27 +0000580 *tp++ = c = tok_nextc(tok);
581 } while (c != EOF && c != '\n' &&
582 tp - cbuf + 1 < sizeof(cbuf));
583 *tp = '\0';
584 for (cp = tabforms;
585 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
586 cp++) {
587 if ((tp = strstr(cbuf, *cp))) {
588 int newsize = atoi(tp + strlen(*cp));
589
590 if (newsize >= 1 && newsize <= 40) {
591 tok->tabsize = newsize;
Guido van Rossum6c981ad2000-04-03 23:02:17 +0000592 if (Py_VerboseFlag)
593 PySys_WriteStderr(
Guido van Rossumab5ca152000-03-31 00:52:27 +0000594 "Tab size set to %d\n",
595 newsize);
596 }
597 }
598 }
599 while (c != EOF && c != '\n')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000600 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000601 }
602
603 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000604 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000606 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607
608 /* Identifier (most frequent token!) */
609 if (isalpha(c) || c == '_') {
Guido van Rossum86016cb2000-03-10 22:56:54 +0000610 /* Process r"", u"" and ur"" */
Guido van Rossum5026cb41997-04-25 17:32:00 +0000611 switch (c) {
612 case 'r':
613 case 'R':
614 c = tok_nextc(tok);
615 if (c == '"' || c == '\'')
616 goto letter_quote;
Guido van Rossum86016cb2000-03-10 22:56:54 +0000617 break;
618 case 'u':
619 case 'U':
620 c = tok_nextc(tok);
621 if (c == 'r' || c == 'R')
622 c = tok_nextc(tok);
623 if (c == '"' || c == '\'')
624 goto letter_quote;
625 break;
Guido van Rossum5026cb41997-04-25 17:32:00 +0000626 }
Guido van Rossum24dacb31997-04-06 03:46:20 +0000627 while (isalnum(c) || c == '_') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000628 c = tok_nextc(tok);
Guido van Rossum24dacb31997-04-06 03:46:20 +0000629 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000631 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000632 *p_end = tok->cur;
633 return NAME;
634 }
635
636 /* Newline */
637 if (c == '\n') {
638 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000639 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000640 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000641 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
643 return NEWLINE;
644 }
645
Guido van Rossum2d45be11997-04-11 19:16:25 +0000646#ifdef macintosh
647 if (c == '\r') {
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000648 PySys_WriteStderr(
Guido van Rossum86bea461997-04-29 21:03:06 +0000649 "File contains \\r characters (incorrect line endings?)\n");
Guido van Rossum2d45be11997-04-11 19:16:25 +0000650 tok->done = E_TOKEN;
651 tok->cur = tok->inp;
652 return ERRORTOKEN;
653 }
654#endif
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000655 /* Period or number starting with period? */
656 if (c == '.') {
657 c = tok_nextc(tok);
658 if (isdigit(c)) {
659 goto fraction;
660 }
661 else {
662 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000663 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000664 *p_end = tok->cur;
665 return DOT;
666 }
667 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000668
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 /* Number */
670 if (isdigit(c)) {
671 if (c == '0') {
672 /* Hex or octal */
673 c = tok_nextc(tok);
674 if (c == '.')
675 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000676#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000677 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000678 goto imaginary;
679#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000680 if (c == 'x' || c == 'X') {
681 /* Hex */
682 do {
683 c = tok_nextc(tok);
684 } while (isxdigit(c));
685 }
686 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000687 /* XXX This is broken! E.g.,
688 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689 /* Octal; c is first char of it */
690 /* There's no 'isoctdigit' macro, sigh */
691 while ('0' <= c && c < '8') {
692 c = tok_nextc(tok);
693 }
694 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000695 if (c == 'l' || c == 'L')
696 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000697 }
698 else {
699 /* Decimal */
700 do {
701 c = tok_nextc(tok);
702 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000703 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000705 else {
706 /* Accept floating point numbers.
707 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000708 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000709 if (c == '.') {
710 fraction:
711 /* Fraction */
712 do {
713 c = tok_nextc(tok);
714 } while (isdigit(c));
715 }
716 if (c == 'e' || c == 'E') {
717 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000719 if (c == '+' || c == '-')
720 c = tok_nextc(tok);
721 while (isdigit(c)) {
722 c = tok_nextc(tok);
723 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000725#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000726 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000727 /* Imaginary part */
728 imaginary:
729 c = tok_nextc(tok);
730#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731 }
732 }
733 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000734 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000735 *p_end = tok->cur;
736 return NUMBER;
737 }
Guido van Rossum24dacb31997-04-06 03:46:20 +0000738
739 letter_quote:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000740 /* String */
741 if (c == '\'' || c == '"') {
Guido van Rossum35685241998-02-16 15:42:50 +0000742 int quote2 = tok->cur - tok->start + 1;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000743 int quote = c;
744 int triple = 0;
745 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746 for (;;) {
747 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000748 if (c == '\n') {
749 if (!triple) {
750 tok->done = E_TOKEN;
751 tok_backup(tok, c);
752 return ERRORTOKEN;
753 }
754 tripcount = 0;
755 }
756 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000757 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000758 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000759 return ERRORTOKEN;
760 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000761 else if (c == quote) {
762 tripcount++;
Guido van Rossum35685241998-02-16 15:42:50 +0000763 if (tok->cur - tok->start == quote2) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000764 c = tok_nextc(tok);
765 if (c == quote) {
766 triple = 1;
767 tripcount = 0;
768 continue;
769 }
770 tok_backup(tok, c);
771 }
772 if (!triple || tripcount == 3)
773 break;
774 }
775 else if (c == '\\') {
776 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000778 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000780 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000781 return ERRORTOKEN;
782 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000784 else
785 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000787 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000788 *p_end = tok->cur;
789 return STRING;
790 }
791
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 /* Line continuation */
793 if (c == '\\') {
794 c = tok_nextc(tok);
795 if (c != '\n') {
796 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000797 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798 return ERRORTOKEN;
799 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800 goto again; /* Read next line */
801 }
802
Guido van Rossumfbab9051991-10-20 20:25:03 +0000803 /* Check for two-character token */
804 {
805 int c2 = tok_nextc(tok);
Guido van Rossum86bea461997-04-29 21:03:06 +0000806 int token = PyToken_TwoChars(c, c2);
Guido van Rossumfbab9051991-10-20 20:25:03 +0000807 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000808 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000809 *p_end = tok->cur;
810 return token;
811 }
812 tok_backup(tok, c2);
813 }
814
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000815 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000816 switch (c) {
817 case '(':
818 case '[':
819 case '{':
820 tok->level++;
821 break;
822 case ')':
823 case ']':
824 case '}':
825 tok->level--;
826 break;
827 }
828
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000830 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 *p_end = tok->cur;
Guido van Rossum86bea461997-04-29 21:03:06 +0000832 return PyToken_OneChar(c);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000833}
834
835
Guido van Rossum408027e1996-12-30 16:17:54 +0000836#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837
838void
839tok_dump(type, start, end)
840 int type;
841 char *start, *end;
842{
Guido van Rossum86bea461997-04-29 21:03:06 +0000843 printf("%s", _PyParser_TokenNames[type]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 if (type == NAME || type == NUMBER || type == STRING || type == OP)
845 printf("(%.*s)", (int)(end - start), start);
846}
847
848#endif