blob: 160f6ba8e02908c63aa418d90d0aa846f4a0007b [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* Tokenizer implementation */
12
Guido van Rossum3f5da241990-12-20 15:06:42 +000013#include "pgenheaders.h"
14
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000015#include <ctype.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000016
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000017#include "tokenizer.h"
18#include "errcode.h"
19
Tim Petersdbd9ba62000-07-09 03:09:57 +000020extern char *PyOS_Readline(char *);
Guido van Rossumf4b1a641994-08-29 12:43:07 +000021/* Return malloc'ed string including trailing \n;
22 empty malloc'ed string for EOF;
23 NULL if interrupted */
24
Guido van Rossum4fe87291992-02-26 15:24:44 +000025/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000026#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000027
Guido van Rossumcf57d8b1998-01-19 22:07:46 +000028/* Convert a possibly signed character to a nonnegative int */
29/* XXX This assumes characters are 8 bits wide */
30#ifdef __CHAR_UNSIGNED__
31#define Py_CHARMASK(c) (c)
32#else
33#define Py_CHARMASK(c) ((c) & 0xff)
34#endif
35
Guido van Rossum3f5da241990-12-20 15:06:42 +000036/* Forward */
Tim Petersdbd9ba62000-07-09 03:09:57 +000037static struct tok_state *tok_new(void);
38static int tok_nextc(struct tok_state *tok);
39static void tok_backup(struct tok_state *tok, int c);
Guido van Rossum3f5da241990-12-20 15:06:42 +000040
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041/* Token names */
42
Guido van Rossum86bea461997-04-29 21:03:06 +000043char *_PyParser_TokenNames[] = {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 "ENDMARKER",
45 "NAME",
46 "NUMBER",
47 "STRING",
48 "NEWLINE",
49 "INDENT",
50 "DEDENT",
51 "LPAR",
52 "RPAR",
53 "LSQB",
54 "RSQB",
55 "COLON",
56 "COMMA",
57 "SEMI",
58 "PLUS",
59 "MINUS",
60 "STAR",
61 "SLASH",
62 "VBAR",
63 "AMPER",
64 "LESS",
65 "GREATER",
66 "EQUAL",
67 "DOT",
68 "PERCENT",
69 "BACKQUOTE",
70 "LBRACE",
71 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000072 "EQEQUAL",
73 "NOTEQUAL",
74 "LESSEQUAL",
75 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000076 "TILDE",
77 "CIRCUMFLEX",
78 "LEFTSHIFT",
79 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +000080 "DOUBLESTAR",
Guido van Rossumfbab9051991-10-20 20:25:03 +000081 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000082 "OP",
83 "<ERRORTOKEN>",
84 "<N_TOKENS>"
85};
86
87
88/* Create and initialize a new tok_state structure */
89
90static struct tok_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +000091tok_new(void)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092{
Guido van Rossum86bea461997-04-29 21:03:06 +000093 struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094 if (tok == NULL)
95 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +000096 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097 tok->done = E_OK;
98 tok->fp = NULL;
99 tok->tabsize = TABSIZE;
100 tok->indent = 0;
101 tok->indstack[0] = 0;
102 tok->atbol = 1;
103 tok->pendin = 0;
104 tok->prompt = tok->nextprompt = NULL;
105 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000106 tok->level = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000107 tok->filename = NULL;
108 tok->altwarning = 0;
109 tok->alterror = 0;
110 tok->alttabsize = 1;
111 tok->altindstack[0] = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 return tok;
113}
114
115
116/* Set up tokenizer for string */
117
118struct tok_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000119PyTokenizer_FromString(char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120{
121 struct tok_state *tok = tok_new();
122 if (tok == NULL)
123 return NULL;
Guido van Rossum1a817c01994-09-19 08:06:25 +0000124 tok->buf = tok->cur = tok->end = tok->inp = str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000125 return tok;
126}
127
128
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000129/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000130
131struct tok_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000132PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133{
134 struct tok_state *tok = tok_new();
135 if (tok == NULL)
136 return NULL;
Guido van Rossum86bea461997-04-29 21:03:06 +0000137 if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
138 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000139 return NULL;
140 }
141 tok->cur = tok->inp = tok->buf;
142 tok->end = tok->buf + BUFSIZ;
143 tok->fp = fp;
144 tok->prompt = ps1;
145 tok->nextprompt = ps2;
146 return tok;
147}
148
149
150/* Free a tok_state structure */
151
152void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000153PyTokenizer_Free(struct tok_state *tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000154{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000155 if (tok->fp != NULL && tok->buf != NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000156 PyMem_DEL(tok->buf);
157 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000158}
159
160
161/* Get next char, updating state; error code goes into tok->done */
162
163static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000164tok_nextc(register struct tok_state *tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000165{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000166 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000167 if (tok->cur != tok->inp) {
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000168 return Py_CHARMASK(*tok->cur++); /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000169 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000170 if (tok->done != E_OK)
171 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000172 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000173 char *end = strchr(tok->inp, '\n');
174 if (end != NULL)
175 end++;
176 else {
177 end = strchr(tok->inp, '\0');
178 if (end == tok->inp) {
179 tok->done = E_EOF;
180 return EOF;
181 }
182 }
183 if (tok->start == NULL)
184 tok->buf = tok->cur;
185 tok->lineno++;
186 tok->inp = end;
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000187 return Py_CHARMASK(*tok->cur++);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000188 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000189 if (tok->prompt != NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000190 char *new = PyOS_Readline(tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000191 if (tok->nextprompt != NULL)
192 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000193 if (new == NULL)
194 tok->done = E_INTR;
195 else if (*new == '\0') {
Guido van Rossumb18618d2000-05-03 23:44:39 +0000196 PyMem_FREE(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000197 tok->done = E_EOF;
198 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000199 else if (tok->start != NULL) {
Guido van Rossum6da34342000-06-28 22:00:02 +0000200 size_t start = tok->start - tok->buf;
201 size_t oldlen = tok->cur - tok->buf;
202 size_t newlen = oldlen + strlen(new);
Guido van Rossumb18618d2000-05-03 23:44:39 +0000203 char *buf = tok->buf;
204 PyMem_RESIZE(buf, char, newlen+1);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000205 tok->lineno++;
206 if (buf == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +0000207 PyMem_DEL(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000208 tok->buf = NULL;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000209 PyMem_FREE(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000210 tok->done = E_NOMEM;
211 return EOF;
212 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000213 tok->buf = buf;
214 tok->cur = tok->buf + oldlen;
215 strcpy(tok->buf + oldlen, new);
Guido van Rossumb18618d2000-05-03 23:44:39 +0000216 PyMem_FREE(new);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000217 tok->inp = tok->buf + newlen;
218 tok->end = tok->inp + 1;
219 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000220 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000221 else {
222 tok->lineno++;
223 if (tok->buf != NULL)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000224 PyMem_DEL(tok->buf);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000225 tok->buf = new;
226 tok->cur = tok->buf;
227 tok->inp = strchr(tok->buf, '\0');
228 tok->end = tok->inp + 1;
229 }
230 }
231 else {
232 int done = 0;
233 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000234 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000235 if (tok->start == NULL) {
236 if (tok->buf == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000237 tok->buf = PyMem_NEW(char, BUFSIZ);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000238 if (tok->buf == NULL) {
239 tok->done = E_NOMEM;
240 return EOF;
241 }
242 tok->end = tok->buf + BUFSIZ;
243 }
244 if (fgets(tok->buf, (int)(tok->end - tok->buf),
245 tok->fp) == NULL) {
246 tok->done = E_EOF;
247 done = 1;
248 }
249 else {
250 tok->done = E_OK;
251 tok->inp = strchr(tok->buf, '\0');
252 done = tok->inp[-1] == '\n';
253 }
254 }
255 else {
256 cur = tok->cur - tok->buf;
Guido van Rossum78c05351995-01-17 16:12:13 +0000257 if (feof(tok->fp)) {
258 tok->done = E_EOF;
259 done = 1;
260 }
261 else
262 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000263 }
264 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000265 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000266 while (!done) {
267 int curstart = tok->start == NULL ? -1 :
268 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000269 int curvalid = tok->inp - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000270 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000271 char *newbuf = tok->buf;
Guido van Rossum86bea461997-04-29 21:03:06 +0000272 PyMem_RESIZE(newbuf, char, newsize);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000273 if (newbuf == NULL) {
274 tok->done = E_NOMEM;
275 tok->cur = tok->inp;
276 return EOF;
277 }
278 tok->buf = newbuf;
279 tok->inp = tok->buf + curvalid;
280 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000281 tok->start = curstart < 0 ? NULL :
282 tok->buf + curstart;
283 if (fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000284 (int)(tok->end - tok->inp),
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000285 tok->fp) == NULL) {
286 /* Last line does not end in \n,
287 fake one */
288 strcpy(tok->inp, "\n");
289 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000290 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000291 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000292 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000293 tok->cur = tok->buf + cur;
Guido van Rossum2d45be11997-04-11 19:16:25 +0000294#ifndef macintosh
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000295 /* replace "\r\n" with "\n" */
Guido van Rossum2d45be11997-04-11 19:16:25 +0000296 /* For Mac we leave the \r, giving a syntax error */
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000297 pt = tok->inp - 2;
298 if (pt >= tok->buf && *pt == '\r') {
299 *pt++ = '\n';
300 *pt = '\0';
301 tok->inp = pt;
302 }
Guido van Rossum2d45be11997-04-11 19:16:25 +0000303#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304 }
305 if (tok->done != E_OK) {
306 if (tok->prompt != NULL)
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000307 PySys_WriteStderr("\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000308 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000309 return EOF;
310 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000311 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000312 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000313}
314
315
316/* Back-up one character */
317
318static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000319tok_backup(register struct tok_state *tok, register int c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000320{
321 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000322 if (--tok->cur < tok->buf)
Guido van Rossum86bea461997-04-29 21:03:06 +0000323 Py_FatalError("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 if (*tok->cur != c)
325 *tok->cur = c;
326 }
327}
328
329
330/* Return the token corresponding to a single character */
331
332int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000333PyToken_OneChar(int c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334{
335 switch (c) {
336 case '(': return LPAR;
337 case ')': return RPAR;
338 case '[': return LSQB;
339 case ']': return RSQB;
340 case ':': return COLON;
341 case ',': return COMMA;
342 case ';': return SEMI;
343 case '+': return PLUS;
344 case '-': return MINUS;
345 case '*': return STAR;
346 case '/': return SLASH;
347 case '|': return VBAR;
348 case '&': return AMPER;
349 case '<': return LESS;
350 case '>': return GREATER;
351 case '=': return EQUAL;
352 case '.': return DOT;
353 case '%': return PERCENT;
354 case '`': return BACKQUOTE;
355 case '{': return LBRACE;
356 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000357 case '^': return CIRCUMFLEX;
358 case '~': return TILDE;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359 default: return OP;
360 }
361}
362
363
Guido van Rossumfbab9051991-10-20 20:25:03 +0000364int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000365PyToken_TwoChars(int c1, int c2)
Guido van Rossumfbab9051991-10-20 20:25:03 +0000366{
367 switch (c1) {
368 case '=':
369 switch (c2) {
370 case '=': return EQEQUAL;
371 }
372 break;
373 case '!':
374 switch (c2) {
375 case '=': return NOTEQUAL;
376 }
377 break;
378 case '<':
379 switch (c2) {
380 case '>': return NOTEQUAL;
381 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000382 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000383 }
384 break;
385 case '>':
386 switch (c2) {
387 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000388 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000389 }
390 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000391 case '*':
392 switch (c2) {
393 case '*': return DOUBLESTAR;
394 }
395 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000396 }
397 return OP;
398}
399
400
Guido van Rossum926f13a1998-04-09 21:38:06 +0000401static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000402indenterror(struct tok_state *tok)
Guido van Rossum926f13a1998-04-09 21:38:06 +0000403{
404 if (tok->alterror) {
Fred Drake85f36392000-07-11 17:53:00 +0000405 tok->done = E_TABSPACE;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000406 tok->cur = tok->inp;
407 return 1;
408 }
409 if (tok->altwarning) {
Fred Drake85f36392000-07-11 17:53:00 +0000410 PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
411 "in indentation\n", tok->filename);
Guido van Rossum926f13a1998-04-09 21:38:06 +0000412 tok->altwarning = 0;
413 }
414 return 0;
415}
416
417
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000418/* Get next token, after space stripping etc. */
419
420int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000421PyTokenizer_Get(register struct tok_state *tok, char **p_start,
422 char **p_end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423{
424 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000425 int blankline;
426
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000427 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000428 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000429 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000430 blankline = 0;
431
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000432 /* Get indentation level */
433 if (tok->atbol) {
434 register int col = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000435 register int altcol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000437 for (;;) {
438 c = tok_nextc(tok);
439 if (c == ' ')
Guido van Rossum926f13a1998-04-09 21:38:06 +0000440 col++, altcol++;
441 else if (c == '\t') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000442 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000443 altcol = (altcol/tok->alttabsize + 1)
444 * tok->alttabsize;
445 }
Guido van Rossum94d32b11995-07-07 22:27:27 +0000446 else if (c == '\014') /* Control-L (formfeed) */
Guido van Rossum926f13a1998-04-09 21:38:06 +0000447 col = altcol = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000448 else
449 break;
450 }
451 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000452 if (c == '#' || c == '\n') {
453 /* Lines with only whitespace and/or comments
454 shouldn't affect the indentation and are
455 not passed to the parser as NEWLINE tokens,
456 except *totally* empty lines in interactive
457 mode, which signal the end of a command group. */
458 if (col == 0 && c == '\n' && tok->prompt != NULL)
459 blankline = 0; /* Let it through */
460 else
461 blankline = 1; /* Ignore completely */
462 /* We can't jump back right here since we still
463 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464 }
Guido van Rossuma849b831993-05-12 11:35:44 +0000465 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000466 if (col == tok->indstack[tok->indent]) {
467 /* No change */
Guido van Rossum926f13a1998-04-09 21:38:06 +0000468 if (altcol != tok->altindstack[tok->indent]) {
469 if (indenterror(tok))
470 return ERRORTOKEN;
471 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000472 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000473 else if (col > tok->indstack[tok->indent]) {
474 /* Indent -- always one */
475 if (tok->indent+1 >= MAXINDENT) {
Fred Drake85f36392000-07-11 17:53:00 +0000476 tok->done = E_TOODEEP;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000477 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000478 return ERRORTOKEN;
479 }
Guido van Rossum926f13a1998-04-09 21:38:06 +0000480 if (altcol <= tok->altindstack[tok->indent]) {
481 if (indenterror(tok))
482 return ERRORTOKEN;
483 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000484 tok->pendin++;
485 tok->indstack[++tok->indent] = col;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000486 tok->altindstack[tok->indent] = altcol;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000487 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000488 else /* col < tok->indstack[tok->indent] */ {
489 /* Dedent -- any number, must be consistent */
490 while (tok->indent > 0 &&
491 col < tok->indstack[tok->indent]) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000492 tok->pendin--;
Guido van Rossum54758fa1998-02-16 22:18:00 +0000493 tok->indent--;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000494 }
495 if (col != tok->indstack[tok->indent]) {
Fred Drake85f36392000-07-11 17:53:00 +0000496 tok->done = E_DEDENT;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000497 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000498 return ERRORTOKEN;
499 }
Guido van Rossum926f13a1998-04-09 21:38:06 +0000500 if (altcol != tok->altindstack[tok->indent]) {
501 if (indenterror(tok))
502 return ERRORTOKEN;
503 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504 }
505 }
506 }
507
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000508 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509
510 /* Return pending indents/dedents */
511 if (tok->pendin != 0) {
512 if (tok->pendin < 0) {
513 tok->pendin++;
514 return DEDENT;
515 }
516 else {
517 tok->pendin--;
518 return INDENT;
519 }
520 }
521
522 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000523 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524 /* Skip spaces */
525 do {
526 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +0000527 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528
529 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000530 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000531
Guido van Rossumab5ca152000-03-31 00:52:27 +0000532 /* Skip comment, while looking for tab-setting magic */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533 if (c == '#') {
Guido van Rossumab5ca152000-03-31 00:52:27 +0000534 static char *tabforms[] = {
535 "tab-width:", /* Emacs */
536 ":tabstop=", /* vim, full form */
537 ":ts=", /* vim, abbreviated form */
538 "set tabsize=", /* will vi never die? */
539 /* more templates can be added here to support other editors */
540 };
541 char cbuf[80];
542 char *tp, **cp;
543 tp = cbuf;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000544 do {
Guido van Rossumab5ca152000-03-31 00:52:27 +0000545 *tp++ = c = tok_nextc(tok);
546 } while (c != EOF && c != '\n' &&
547 tp - cbuf + 1 < sizeof(cbuf));
548 *tp = '\0';
549 for (cp = tabforms;
550 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
551 cp++) {
552 if ((tp = strstr(cbuf, *cp))) {
553 int newsize = atoi(tp + strlen(*cp));
554
555 if (newsize >= 1 && newsize <= 40) {
556 tok->tabsize = newsize;
Guido van Rossum6c981ad2000-04-03 23:02:17 +0000557 if (Py_VerboseFlag)
558 PySys_WriteStderr(
Guido van Rossumab5ca152000-03-31 00:52:27 +0000559 "Tab size set to %d\n",
560 newsize);
561 }
562 }
563 }
564 while (c != EOF && c != '\n')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000565 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566 }
567
568 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000569 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000570 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000571 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000572
573 /* Identifier (most frequent token!) */
574 if (isalpha(c) || c == '_') {
Guido van Rossum86016cb2000-03-10 22:56:54 +0000575 /* Process r"", u"" and ur"" */
Guido van Rossum5026cb41997-04-25 17:32:00 +0000576 switch (c) {
577 case 'r':
578 case 'R':
579 c = tok_nextc(tok);
580 if (c == '"' || c == '\'')
581 goto letter_quote;
Guido van Rossum86016cb2000-03-10 22:56:54 +0000582 break;
583 case 'u':
584 case 'U':
585 c = tok_nextc(tok);
586 if (c == 'r' || c == 'R')
587 c = tok_nextc(tok);
588 if (c == '"' || c == '\'')
589 goto letter_quote;
590 break;
Guido van Rossum5026cb41997-04-25 17:32:00 +0000591 }
Guido van Rossum24dacb31997-04-06 03:46:20 +0000592 while (isalnum(c) || c == '_') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 c = tok_nextc(tok);
Guido van Rossum24dacb31997-04-06 03:46:20 +0000594 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000596 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597 *p_end = tok->cur;
598 return NAME;
599 }
600
601 /* Newline */
602 if (c == '\n') {
603 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +0000604 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000605 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000606 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
608 return NEWLINE;
609 }
610
Guido van Rossum2d45be11997-04-11 19:16:25 +0000611#ifdef macintosh
612 if (c == '\r') {
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000613 PySys_WriteStderr(
Guido van Rossum86bea461997-04-29 21:03:06 +0000614 "File contains \\r characters (incorrect line endings?)\n");
Guido van Rossum2d45be11997-04-11 19:16:25 +0000615 tok->done = E_TOKEN;
616 tok->cur = tok->inp;
617 return ERRORTOKEN;
618 }
619#endif
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000620 /* Period or number starting with period? */
621 if (c == '.') {
622 c = tok_nextc(tok);
623 if (isdigit(c)) {
624 goto fraction;
625 }
626 else {
627 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000628 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000629 *p_end = tok->cur;
630 return DOT;
631 }
632 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000633
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 /* Number */
635 if (isdigit(c)) {
636 if (c == '0') {
637 /* Hex or octal */
638 c = tok_nextc(tok);
639 if (c == '.')
640 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000641#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000642 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000643 goto imaginary;
644#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000645 if (c == 'x' || c == 'X') {
646 /* Hex */
647 do {
648 c = tok_nextc(tok);
649 } while (isxdigit(c));
650 }
651 else {
Guido van Rossum94309451991-12-10 14:01:05 +0000652 /* XXX This is broken! E.g.,
653 09.9 should be accepted as float! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000654 /* Octal; c is first char of it */
655 /* There's no 'isoctdigit' macro, sigh */
656 while ('0' <= c && c < '8') {
657 c = tok_nextc(tok);
658 }
659 }
Guido van Rossumf023c461991-05-05 20:16:20 +0000660 if (c == 'l' || c == 'L')
661 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000662 }
663 else {
664 /* Decimal */
665 do {
666 c = tok_nextc(tok);
667 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +0000668 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000670 else {
671 /* Accept floating point numbers.
672 XXX This accepts incomplete things like
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000673 XXX 12e or 1e+; worry run-time */
Guido van Rossumf023c461991-05-05 20:16:20 +0000674 if (c == '.') {
675 fraction:
676 /* Fraction */
677 do {
678 c = tok_nextc(tok);
679 } while (isdigit(c));
680 }
681 if (c == 'e' || c == 'E') {
682 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000683 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +0000684 if (c == '+' || c == '-')
685 c = tok_nextc(tok);
686 while (isdigit(c)) {
687 c = tok_nextc(tok);
688 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689 }
Guido van Rossumf595fde1996-01-12 01:31:58 +0000690#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +0000691 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +0000692 /* Imaginary part */
693 imaginary:
694 c = tok_nextc(tok);
695#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696 }
697 }
698 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000699 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700 *p_end = tok->cur;
701 return NUMBER;
702 }
Guido van Rossum24dacb31997-04-06 03:46:20 +0000703
704 letter_quote:
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000705 /* String */
706 if (c == '\'' || c == '"') {
Guido van Rossum35685241998-02-16 15:42:50 +0000707 int quote2 = tok->cur - tok->start + 1;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000708 int quote = c;
709 int triple = 0;
710 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000711 for (;;) {
712 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000713 if (c == '\n') {
714 if (!triple) {
715 tok->done = E_TOKEN;
716 tok_backup(tok, c);
717 return ERRORTOKEN;
718 }
719 tripcount = 0;
720 }
721 else if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000723 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724 return ERRORTOKEN;
725 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000726 else if (c == quote) {
727 tripcount++;
Guido van Rossum35685241998-02-16 15:42:50 +0000728 if (tok->cur - tok->start == quote2) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000729 c = tok_nextc(tok);
730 if (c == quote) {
731 triple = 1;
732 tripcount = 0;
733 continue;
734 }
735 tok_backup(tok, c);
736 }
737 if (!triple || tripcount == 3)
738 break;
739 }
740 else if (c == '\\') {
741 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000743 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000744 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000745 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746 return ERRORTOKEN;
747 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000749 else
750 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000752 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +0000753 *p_end = tok->cur;
754 return STRING;
755 }
756
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000757 /* Line continuation */
758 if (c == '\\') {
759 c = tok_nextc(tok);
760 if (c != '\n') {
761 tok->done = E_TOKEN;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000762 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000763 return ERRORTOKEN;
764 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765 goto again; /* Read next line */
766 }
767
Guido van Rossumfbab9051991-10-20 20:25:03 +0000768 /* Check for two-character token */
769 {
770 int c2 = tok_nextc(tok);
Guido van Rossum86bea461997-04-29 21:03:06 +0000771 int token = PyToken_TwoChars(c, c2);
Guido van Rossumfbab9051991-10-20 20:25:03 +0000772 if (token != OP) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000773 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000774 *p_end = tok->cur;
775 return token;
776 }
777 tok_backup(tok, c2);
778 }
779
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000780 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +0000781 switch (c) {
782 case '(':
783 case '[':
784 case '{':
785 tok->level++;
786 break;
787 case ')':
788 case ']':
789 case '}':
790 tok->level--;
791 break;
792 }
793
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000795 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000796 *p_end = tok->cur;
Guido van Rossum86bea461997-04-29 21:03:06 +0000797 return PyToken_OneChar(c);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000798}
799
800
Guido van Rossum408027e1996-12-30 16:17:54 +0000801#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000802
803void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000804tok_dump(int type, char *start, char *end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000805{
Guido van Rossum86bea461997-04-29 21:03:06 +0000806 printf("%s", _PyParser_TokenNames[type]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 if (type == NAME || type == NUMBER || type == STRING || type == OP)
808 printf("(%.*s)", (int)(end - start), start);
809}
810
811#endif