blob: 3337c75a1b92b3cc3d41973a3ae03754929744f9 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumb9f8d6e1995-01-04 19:08:09 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* Tokenizer implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "pgenheaders.h"
35
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000036#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000037
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038#include "tokenizer.h"
39#include "errcode.h"
40
Guido van Rossum86bea461997-04-29 21:03:06 +000041extern char *PyOS_Readline Py_PROTO((char *));
Guido van Rossumf4b1a641994-08-29 12:43:07 +000042/* Return malloc'ed string including trailing \n;
43 empty malloc'ed string for EOF;
44 NULL if interrupted */
45
Guido van Rossum4fe87291992-02-26 15:24:44 +000046/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000047#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048
Guido van Rossumcf57d8b1998-01-19 22:07:46 +000049/* Convert a possibly signed character to a nonnegative int */
50/* XXX This assumes characters are 8 bits wide */
51#ifdef __CHAR_UNSIGNED__
52#define Py_CHARMASK(c) (c)
53#else
54#define Py_CHARMASK(c) ((c) & 0xff)
55#endif
56
Guido van Rossum3f5da241990-12-20 15:06:42 +000057/* Forward */
Guido van Rossum86bea461997-04-29 21:03:06 +000058static struct tok_state *tok_new Py_PROTO((void));
59static int tok_nextc Py_PROTO((struct tok_state *tok));
60static void tok_backup Py_PROTO((struct tok_state *tok, int c));
Guido van Rossum3f5da241990-12-20 15:06:42 +000061
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000062/* Token names */
63
Guido van Rossum86bea461997-04-29 21:03:06 +000064char *_PyParser_TokenNames[] = {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065 "ENDMARKER",
66 "NAME",
67 "NUMBER",
68 "STRING",
69 "NEWLINE",
70 "INDENT",
71 "DEDENT",
72 "LPAR",
73 "RPAR",
74 "LSQB",
75 "RSQB",
76 "COLON",
77 "COMMA",
78 "SEMI",
79 "PLUS",
80 "MINUS",
81 "STAR",
82 "SLASH",
83 "VBAR",
84 "AMPER",
85 "LESS",
86 "GREATER",
87 "EQUAL",
88 "DOT",
89 "PERCENT",
90 "BACKQUOTE",
91 "LBRACE",
92 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000093 "EQEQUAL",
94 "NOTEQUAL",
95 "LESSEQUAL",
96 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000097 "TILDE",
98 "CIRCUMFLEX",
99 "LEFTSHIFT",
100 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +0000101 "DOUBLESTAR",
Guido van Rossumfbab9051991-10-20 20:25:03 +0000102 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103 "OP",
104 "<ERRORTOKEN>",
105 "<N_TOKENS>"
106};
107
108
109/* Create and initialize a new tok_state structure */
110
111static struct tok_state *
112tok_new()
113{
Guido van Rossum86bea461997-04-29 21:03:06 +0000114 struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115 if (tok == NULL)
116 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000117 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000118 tok->done = E_OK;
119 tok->fp = NULL;
120 tok->tabsize = TABSIZE;
121 tok->indent = 0;
122 tok->indstack[0] = 0;
123 tok->atbol = 1;
124 tok->pendin = 0;
125 tok->prompt = tok->nextprompt = NULL;
126 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000127 tok->level = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000128 tok->filename = NULL;
129 tok->altwarning = 0;
130 tok->alterror = 0;
131 tok->alttabsize = 1;
132 tok->altindstack[0] = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133 return tok;
134}
135
136
137/* Set up tokenizer for string */
138
139struct tok_state *
Guido van Rossum86bea461997-04-29 21:03:06 +0000140PyTokenizer_FromString(str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000141 char *str;
142{
143 struct tok_state *tok = tok_new();
144 if (tok == NULL)
145 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000146 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000147 return tok;
148}
149
150
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000151/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152
153struct tok_state *
Guido van Rossum86bea461997-04-29 21:03:06 +0000154PyTokenizer_FromFile(fp, ps1, ps2)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155 FILE *fp;
156 char *ps1, *ps2;
157{
158 struct tok_state *tok = tok_new();
159 if (tok == NULL)
160 return NULL;
Guido van Rossum86bea461997-04-29 21:03:06 +0000161 if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
162 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000163 return NULL;
164 }
165 tok->cur = tok->inp = tok->buf;
166 tok->end = tok->buf + BUFSIZ;
167 tok->fp = fp;
168 tok->prompt = ps1;
169 tok->nextprompt = ps2;
170 return tok;
171}
172
173
174/* Free a tok_state structure */
175
176void
Guido van Rossum86bea461997-04-29 21:03:06 +0000177PyTokenizer_Free(tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000178 struct tok_state *tok;
179{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000180 if (tok->fp != NULL && tok->buf != NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000181 PyMem_DEL(tok->buf);
182 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000183}
184
185
186/* Get next char, updating state; error code goes into tok->done */
187
188static int
189tok_nextc(tok)
190 register struct tok_state *tok;
191{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000193 if (tok->cur != tok->inp) {
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000194 return Py_CHARMASK(*tok->cur++); /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000195 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000196 if (tok->done != E_OK)
197 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000199 char *end = strchr(tok->inp, '\n');
200 if (end != NULL)
201 end++;
202 else {
203 end = strchr(tok->inp, '\0');
204 if (end == tok->inp) {
205 tok->done = E_EOF;
206 return EOF;
207 }
208 }
209 if (tok->start == NULL)
210 tok->buf = tok->cur;
211 tok->lineno++;
212 tok->inp = end;
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000213 return Py_CHARMASK(*tok->cur++);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000214 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 if (tok->prompt != NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000216 char *new = PyOS_Readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000217 if (tok->nextprompt != NULL)
218 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000219 if (new == NULL)
220 tok->done = E_INTR;
221 else if (*new == '\0') {
Guido van Rossumb18618d2000-05-03 23:44:39 +0000222 PyMem_FREE(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 tok->done = E_EOF;
224 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 else if (tok->start != NULL) {
Guido van Rossum6da34342000-06-28 22:00:02 +0000226 size_t start = tok->start - tok->buf;
227 size_t oldlen = tok->cur - tok->buf;
228 size_t newlen = oldlen + strlen(new);
Guido van Rossumb18618d2000-05-03 23:44:39 +0000229 char *buf = tok->buf;
230 PyMem_RESIZE(buf, char, newlen+1);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000231 tok->lineno++;
232 if (buf == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +0000233 PyMem_DEL(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000234 tok->buf = NULL;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000235 PyMem_FREE(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000236 tok->done = E_NOMEM;
237 return EOF;
238 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000239 tok->buf = buf;
240 tok->cur = tok->buf + oldlen;
241 strcpy(tok->buf + oldlen, new);
Guido van Rossumb18618d2000-05-03 23:44:39 +0000242 PyMem_FREE(new);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000243 tok->inp = tok->buf + newlen;
244 tok->end = tok->inp + 1;
245 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000246 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000247 else {
248 tok->lineno++;
249 if (tok->buf != NULL)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000250 PyMem_DEL(tok->buf);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000251 tok->buf = new;
252 tok->cur = tok->buf;
253 tok->inp = strchr(tok->buf, '\0');
254 tok->end = tok->inp + 1;
255 }
256 }
257 else {
258 int done = 0;
259 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000260 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000261 if (tok->start == NULL) {
262 if (tok->buf == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000263 tok->buf = PyMem_NEW(char, BUFSIZ);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000264 if (tok->buf == NULL) {
265 tok->done = E_NOMEM;
266 return EOF;
267 }
268 tok->end = tok->buf + BUFSIZ;
269 }
270 if (fgets(tok->buf, (int)(tok->end - tok->buf),
271 tok->fp) == NULL) {
272 tok->done = E_EOF;
273 done = 1;
274 }
275 else {
276 tok->done = E_OK;
277 tok->inp = strchr(tok->buf, '\0');
278 done = tok->inp[-1] == '\n';
279 }
280 }
281 else {
282 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000283 if (feof(tok->fp)) {
284 tok->done = E_EOF;
285 done = 1;
286 }
287 else
288 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000289 }
290 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000291 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000292 while (!done) {
293 int curstart = tok->start == NULL ? -1 :
294 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000295 int curvalid = tok->inp - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000296 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000297 char *newbuf = tok->buf;
Guido van Rossum86bea461997-04-29 21:03:06 +0000298 PyMem_RESIZE(newbuf, char, newsize);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000299 if (newbuf == NULL) {
300 tok->done = E_NOMEM;
301 tok->cur = tok->inp;
302 return EOF;
303 }
304 tok->buf = newbuf;
305 tok->inp = tok->buf + curvalid;
306 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000307 tok->start = curstart < 0 ? NULL :
308 tok->buf + curstart;
309 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000310 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000311 tok->fp) == NULL) {
312 /* Last line does not end in \n,
313 fake one */
314 strcpy(tok->inp, "\n");
315 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000316 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000317 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000318 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000319 tok->cur = tok->buf + cur;
Guido van Rossum2d45be11997-04-11 19:16:25 +0000320#ifndef macintosh
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000321 /* replace "\r\n" with "\n" */
Guido van Rossum2d45be11997-04-11 19:16:25 +0000322 /* For Mac we leave the \r, giving a syntax error */
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000323 pt = tok->inp - 2;
324 if (pt >= tok->buf && *pt == '\r') {
325 *pt++ = '\n';
326 *pt = '\0';
327 tok->inp = pt;
328 }
Guido van Rossum2d45be11997-04-11 19:16:25 +0000329#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330 }
331 if (tok->done != E_OK) {
332 if (tok->prompt != NULL)
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000333 PySys_WriteStderr("\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000334 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000335 return EOF;
336 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000338 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339}
340
341
342/* Back-up one character */
343
344static void
345tok_backup(tok, c)
346 register struct tok_state *tok;
347 register int c;
348{
349 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000350 if (--tok->cur < tok->buf)
Guido van Rossum86bea461997-04-29 21:03:06 +0000351 Py_FatalError("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000352 if (*tok->cur != c)
353 *tok->cur = c;
354 }
355}
356
357
358/* Return the token corresponding to a single character */
359
360int
Guido van Rossum86bea461997-04-29 21:03:06 +0000361PyToken_OneChar(c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362 int c;
363{
364 switch (c) {
365 case '(': return LPAR;
366 case ')': return RPAR;
367 case '[': return LSQB;
368 case ']': return RSQB;
369 case ':': return COLON;
370 case ',': return COMMA;
371 case ';': return SEMI;
372 case '+': return PLUS;
373 case '-': return MINUS;
374 case '*': return STAR;
375 case '/': return SLASH;
376 case '|': return VBAR;
377 case '&': return AMPER;
378 case '<': return LESS;
379 case '>': return GREATER;
380 case '=': return EQUAL;
381 case '.': return DOT;
382 case '%': return PERCENT;
383 case '`': return BACKQUOTE;
384 case '{': return LBRACE;
385 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000386 case '^': return CIRCUMFLEX;
387 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000388 default: return OP;
389 }
390}
391
392
Guido van Rossumfbab9051991-10-20 20:25:03 +0000393int
Guido van Rossum86bea461997-04-29 21:03:06 +0000394PyToken_TwoChars(c1, c2)
Guido van Rossumfbab9051991-10-20 20:25:03 +0000395 int c1, c2;
396{
397 switch (c1) {
398 case '=':
399 switch (c2) {
400 case '=': return EQEQUAL;
401 }
402 break;
403 case '!':
404 switch (c2) {
405 case '=': return NOTEQUAL;
406 }
407 break;
408 case '<':
409 switch (c2) {
410 case '>': return NOTEQUAL;
411 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000412 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000413 }
414 break;
415 case '>':
416 switch (c2) {
417 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000418 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000419 }
420 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000421 case '*':
422 switch (c2) {
423 case '*': return DOUBLESTAR;
424 }
425 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000426 }
427 return OP;
428}
429
430
Guido van Rossum926f13a1998-04-09 21:38:06 +0000431static int
432indenterror(tok)
433 struct tok_state *tok;
434{
435 if (tok->alterror) {
436 tok->done = E_INDENT;
437 tok->cur = tok->inp;
438 return 1;
439 }
440 if (tok->altwarning) {
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000441 PySys_WriteStderr("%s: inconsistent tab/space usage\n",
Guido van Rossum926f13a1998-04-09 21:38:06 +0000442 tok->filename);
443 tok->altwarning = 0;
444 }
445 return 0;
446}
447
448
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449/* Get next token, after space stripping etc. */
450
451int
Guido van Rossum86bea461997-04-29 21:03:06 +0000452PyTokenizer_Get(tok, p_start, p_end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000453 register struct tok_state *tok; /* In/out: tokenizer state */
454 char **p_start, **p_end; /* Out: point to start/end of token */
455{
456 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000457 int blankline;
458
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000459 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000460 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000461 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000462 blankline = 0;
463
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464 /* Get indentation level */
465 if (tok->atbol) {
466 register int col = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000467 register int altcol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000468 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000469 for (;;) {
470 c = tok_nextc(tok);
471 if (c == ' ')
Guido van Rossum926f13a1998-04-09 21:38:06 +0000472 col++, altcol++;
473 else if (c == '\t') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000475 altcol = (altcol/tok->alttabsize + 1)
476 * tok->alttabsize;
477 }
Guido van Rossum94d32b11995-07-07 22:27:27 +0000478 else if (c == '\014') /* Control-L (formfeed) */
Guido van Rossum926f13a1998-04-09 21:38:06 +0000479 col = altcol = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000480 else
481 break;
482 }
483 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000484 if (c == '#' || c == '\n') {
485 /* Lines with only whitespace and/or comments
486 shouldn't affect the indentation and are
487 not passed to the parser as NEWLINE tokens,
488 except *totally* empty lines in interactive
489 mode, which signal the end of a command group. */
490 if (col == 0 && c == '\n' && tok->prompt != NULL)
491 blankline = 0; /* Let it through */
492 else
493 blankline = 1; /* Ignore completely */
494 /* We can't jump back right here since we still
495 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000497 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000498 if (col == tok->indstack[tok->indent]) {
499 /* No change */
Guido van Rossum926f13a1998-04-09 21:38:06 +0000500 if (altcol != tok->altindstack[tok->indent]) {
501 if (indenterror(tok))
502 return ERRORTOKEN;
503 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000505 else if (col > tok->indstack[tok->indent]) {
506 /* Indent -- always one */
507 if (tok->indent+1 >= MAXINDENT) {
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000508 PySys_WriteStderr(
509 "excessive indent\n");
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000510 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000511 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000512 return ERRORTOKEN;
513 }
Guido van Rossum926f13a1998-04-09 21:38:06 +0000514 if (altcol <= tok->altindstack[tok->indent]) {
515 if (indenterror(tok))
516 return ERRORTOKEN;
517 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000518 tok->pendin++;
519 tok->indstack[++tok->indent] = col;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000520 tok->altindstack[tok->indent] = altcol;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000522 else /* col < tok->indstack[tok->indent] */ {
523 /* Dedent -- any number, must be consistent */
524 while (tok->indent > 0 &&
525 col < tok->indstack[tok->indent]) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000526 tok->pendin--;
Guido van Rossum54758fa1998-02-16 22:18:00 +0000527 tok->indent--;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000528 }
529 if (col != tok->indstack[tok->indent]) {
Guido van Rossumd5516bc1998-12-04 18:51:01 +0000530 PySys_WriteStderr(
Guido van Rossum86bea461997-04-29 21:03:06 +0000531 "inconsistent dedent\n");
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000532 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000533 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000534 return ERRORTOKEN;
535 }
Guido van Rossum926f13a1998-04-09 21:38:06 +0000536 if (altcol != tok->altindstack[tok->indent]) {
537 if (indenterror(tok))
538 return ERRORTOKEN;
539 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 }
541 }
542 }
543
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000544 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000545
546 /* Return pending indents/dedents */
547 if (tok->pendin != 0) {
548 if (tok->pendin < 0) {
549 tok->pendin++;
550 return DEDENT;
551 }
552 else {
553 tok->pendin--;
554 return INDENT;
555 }
556 }
557
558 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000559 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000560 /* Skip spaces */
561 do {
562 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000563 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000564
565 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000566 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000567
Guido van Rossumab5ca152000-03-31 00:52:27 +0000568 /* Skip comment, while looking for tab-setting magic */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000569 if (c == '#') {
Guido van Rossumab5ca152000-03-31 00:52:27 +0000570 static char *tabforms[] = {
571 "tab-width:", /* Emacs */
572 ":tabstop=", /* vim, full form */
573 ":ts=", /* vim, abbreviated form */
574 "set tabsize=", /* will vi never die? */
575 /* more templates can be added here to support other editors */
576 };
577 char cbuf[80];
578 char *tp, **cp;
579 tp = cbuf;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000580 do {
Guido van Rossumab5ca152000-03-31 00:52:27 +0000581 *tp++ = c = tok_nextc(tok);
582 } while (c != EOF && c != '\n' &&
583 tp - cbuf + 1 < sizeof(cbuf));
584 *tp = '\0';
585 for (cp = tabforms;
586 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
587 cp++) {
588 if ((tp = strstr(cbuf, *cp))) {
589 int newsize = atoi(tp + strlen(*cp));
590
591 if (newsize >= 1 && newsize <= 40) {
592 tok->tabsize = newsize;
Guido van Rossum6c981ad2000-04-03 23:02:17 +0000593 if (Py_VerboseFlag)
594 PySys_WriteStderr(
Guido van Rossumab5ca152000-03-31 00:52:27 +0000595 "Tab size set to %d\n",
596 newsize);
597 }
598 }
599 }
600 while (c != EOF && c != '\n')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000601 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000602 }
603
604 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000605 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000607 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608
609 /* Identifier (most frequent token!) */
610 if (isalpha(c) || c == '_') {
Guido van Rossum86016cb2000-03-10 22:56:54 +0000611 /* Process r"", u"" and ur"" */
Guido van Rossum5026cb41997-04-25 17:32:00 +0000612 switch (c) {
613 case 'r':
614 case 'R':
615 c = tok_nextc(tok);
616 if (c == '"' || c == '\'')
617 goto letter_quote;
Guido van Rossum86016cb2000-03-10 22:56:54 +0000618 break;
619 case 'u':
620 case 'U':
621 c = tok_nextc(tok);
622 if (c == 'r' || c == 'R')
623 c = tok_nextc(tok);
624 if (c == '"' || c == '\'')
625 goto letter_quote;
626 break;
Guido van Rossum5026cb41997-04-25 17:32:00 +0000627 }
Guido van Rossum24dacb31997-04-06 03:46:20 +0000628 while (isalnum(c) || c == '_') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629 c = tok_nextc(tok);
Guido van Rossum24dacb31997-04-06 03:46:20 +0000630 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000632 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000633 *p_end = tok->cur;
634 return NAME;
635 }
636
637 /* Newline */
638 if (c == '\n') {
639 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000640 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000641 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000642 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
644 return NEWLINE;
645 }
646
Guido van Rossum2d45be11997-04-11 19:16:25 +0000647#ifdef macintosh
648 if (c == '\r') {
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000649 PySys_WriteStderr(
Guido van Rossum86bea461997-04-29 21:03:06 +0000650 "File contains \\r characters (incorrect line endings?)\n");
Guido van Rossum2d45be11997-04-11 19:16:25 +0000651 tok->done = E_TOKEN;
652 tok->cur = tok->inp;
653 return ERRORTOKEN;
654 }
655#endif
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000656 /* Period or number starting with period? */
657 if (c == '.') {
658 c = tok_nextc(tok);
659 if (isdigit(c)) {
660 goto fraction;
661 }
662 else {
663 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000664 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000665 *p_end = tok->cur;
666 return DOT;
667 }
668 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000669
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670 /* Number */
671 if (isdigit(c)) {
672 if (c == '0') {
673 /* Hex or octal */
674 c = tok_nextc(tok);
675 if (c == '.')
676 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000677#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000678 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000679 goto imaginary;
680#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000681 if (c == 'x' || c == 'X') {
682 /* Hex */
683 do {
684 c = tok_nextc(tok);
685 } while (isxdigit(c));
686 }
687 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000688 /* XXX This is broken! E.g.,
689 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000690 /* Octal; c is first char of it */
691 /* There's no 'isoctdigit' macro, sigh */
692 while ('0' <= c && c < '8') {
693 c = tok_nextc(tok);
694 }
695 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000696 if (c == 'l' || c == 'L')
697 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698 }
699 else {
700 /* Decimal */
701 do {
702 c = tok_nextc(tok);
703 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000704 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000705 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000706 else {
707 /* Accept floating point numbers.
708 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000709 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000710 if (c == '.') {
711 fraction:
712 /* Fraction */
713 do {
714 c = tok_nextc(tok);
715 } while (isdigit(c));
716 }
717 if (c == 'e' || c == 'E') {
718 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000720 if (c == '+' || c == '-')
721 c = tok_nextc(tok);
722 while (isdigit(c)) {
723 c = tok_nextc(tok);
724 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000726#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000727 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000728 /* Imaginary part */
729 imaginary:
730 c = tok_nextc(tok);
731#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732 }
733 }
734 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000735 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736 *p_end = tok->cur;
737 return NUMBER;
738 }
Guido van Rossum24dacb31997-04-06 03:46:20 +0000739
740 letter_quote:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000741 /* String */
742 if (c == '\'' || c == '"') {
Guido van Rossum35685241998-02-16 15:42:50 +0000743 int quote2 = tok->cur - tok->start + 1;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000744 int quote = c;
745 int triple = 0;
746 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 for (;;) {
748 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000749 if (c == '\n') {
750 if (!triple) {
751 tok->done = E_TOKEN;
752 tok_backup(tok, c);
753 return ERRORTOKEN;
754 }
755 tripcount = 0;
756 }
757 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000758 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000759 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760 return ERRORTOKEN;
761 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000762 else if (c == quote) {
763 tripcount++;
Guido van Rossum35685241998-02-16 15:42:50 +0000764 if (tok->cur - tok->start == quote2) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000765 c = tok_nextc(tok);
766 if (c == quote) {
767 triple = 1;
768 tripcount = 0;
769 continue;
770 }
771 tok_backup(tok, c);
772 }
773 if (!triple || tripcount == 3)
774 break;
775 }
776 else if (c == '\\') {
777 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000778 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000779 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000781 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782 return ERRORTOKEN;
783 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000784 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000785 else
786 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000788 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000789 *p_end = tok->cur;
790 return STRING;
791 }
792
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 /* Line continuation */
794 if (c == '\\') {
795 c = tok_nextc(tok);
796 if (c != '\n') {
797 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000798 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799 return ERRORTOKEN;
800 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000801 goto again; /* Read next line */
802 }
803
Guido van Rossumfbab9051991-10-20 20:25:03 +0000804 /* Check for two-character token */
805 {
806 int c2 = tok_nextc(tok);
Guido van Rossum86bea461997-04-29 21:03:06 +0000807 int token = PyToken_TwoChars(c, c2);
Guido van Rossumfbab9051991-10-20 20:25:03 +0000808 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000809 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000810 *p_end = tok->cur;
811 return token;
812 }
813 tok_backup(tok, c2);
814 }
815
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000816 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000817 switch (c) {
818 case '(':
819 case '[':
820 case '{':
821 tok->level++;
822 break;
823 case ')':
824 case ']':
825 case '}':
826 tok->level--;
827 break;
828 }
829
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000831 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 *p_end = tok->cur;
Guido van Rossum86bea461997-04-29 21:03:06 +0000833 return PyToken_OneChar(c);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834}
835
836
Guido van Rossum408027e1996-12-30 16:17:54 +0000837#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000838
839void
840tok_dump(type, start, end)
841 int type;
842 char *start, *end;
843{
Guido van Rossum86bea461997-04-29 21:03:06 +0000844 printf("%s", _PyParser_TokenNames[type]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 if (type == NAME || type == NUMBER || type == STRING || type == OP)
846 printf("(%.*s)", (int)(end - start), start);
847}
848
849#endif