blob: 5a9bcc04e7f4be12e2c3109bb41f29e1c69ecc18 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Tokenizer implementation */
3
Jack Jansen7b8c7542002-04-14 20:12:41 +00004#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00005#include "pgenheaders.h"
6
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00007#include <ctype.h>
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +00008#include <assert.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "tokenizer.h"
11#include "errcode.h"
12
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +000013#ifndef PGEN
14#include "unicodeobject.h"
15#include "stringobject.h"
16#include "fileobject.h"
17#include "codecs.h"
18#include "abstract.h"
19#endif /* PGEN */
20
Martin v. Löwis566f6af2002-10-26 14:39:10 +000021extern char *PyOS_Readline(FILE *, FILE *, char *);
Guido van Rossumf4b1a641994-08-29 12:43:07 +000022/* Return malloc'ed string including trailing \n;
23 empty malloc'ed string for EOF;
24 NULL if interrupted */
25
Guido van Rossum4fe87291992-02-26 15:24:44 +000026/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000027#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
Guido van Rossumcf57d8b1998-01-19 22:07:46 +000029/* Convert a possibly signed character to a nonnegative int */
30/* XXX This assumes characters are 8 bits wide */
31#ifdef __CHAR_UNSIGNED__
32#define Py_CHARMASK(c) (c)
33#else
34#define Py_CHARMASK(c) ((c) & 0xff)
35#endif
36
Guido van Rossum3f5da241990-12-20 15:06:42 +000037/* Forward */
Tim Petersdbd9ba62000-07-09 03:09:57 +000038static struct tok_state *tok_new(void);
39static int tok_nextc(struct tok_state *tok);
40static void tok_backup(struct tok_state *tok, int c);
Guido van Rossum3f5da241990-12-20 15:06:42 +000041
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000042/* Token names */
43
Guido van Rossum86bea461997-04-29 21:03:06 +000044char *_PyParser_TokenNames[] = {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000045 "ENDMARKER",
46 "NAME",
47 "NUMBER",
48 "STRING",
49 "NEWLINE",
50 "INDENT",
51 "DEDENT",
52 "LPAR",
53 "RPAR",
54 "LSQB",
55 "RSQB",
56 "COLON",
57 "COMMA",
58 "SEMI",
59 "PLUS",
60 "MINUS",
61 "STAR",
62 "SLASH",
63 "VBAR",
64 "AMPER",
65 "LESS",
66 "GREATER",
67 "EQUAL",
68 "DOT",
69 "PERCENT",
70 "BACKQUOTE",
71 "LBRACE",
72 "RBRACE",
Guido van Rossumfbab9051991-10-20 20:25:03 +000073 "EQEQUAL",
74 "NOTEQUAL",
75 "LESSEQUAL",
76 "GREATEREQUAL",
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +000077 "TILDE",
78 "CIRCUMFLEX",
79 "LEFTSHIFT",
80 "RIGHTSHIFT",
Guido van Rossumf595fde1996-01-12 01:31:58 +000081 "DOUBLESTAR",
Thomas Wouters434d0822000-08-24 20:11:32 +000082 "PLUSEQUAL",
83 "MINEQUAL",
84 "STAREQUAL",
85 "SLASHEQUAL",
86 "PERCENTEQUAL",
87 "AMPEREQUAL",
88 "VBAREQUAL",
89 "CIRCUMFLEXEQUAL",
90 "LEFTSHIFTEQUAL",
91 "RIGHTSHIFTEQUAL",
92 "DOUBLESTAREQUAL",
Guido van Rossum4668b002001-08-08 05:00:18 +000093 "DOUBLESLASH",
94 "DOUBLESLASHEQUAL",
Anthony Baxterc2a5a632004-08-02 06:10:11 +000095 "AT",
Guido van Rossumfbab9051991-10-20 20:25:03 +000096 /* This table must match the #defines in token.h! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000097 "OP",
98 "<ERRORTOKEN>",
99 "<N_TOKENS>"
100};
101
102
103/* Create and initialize a new tok_state structure */
104
105static struct tok_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000106tok_new(void)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107{
Guido van Rossum86bea461997-04-29 21:03:06 +0000108 struct tok_state *tok = PyMem_NEW(struct tok_state, 1);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000109 if (tok == NULL)
110 return NULL;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000111 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112 tok->done = E_OK;
113 tok->fp = NULL;
114 tok->tabsize = TABSIZE;
115 tok->indent = 0;
116 tok->indstack[0] = 0;
117 tok->atbol = 1;
118 tok->pendin = 0;
119 tok->prompt = tok->nextprompt = NULL;
120 tok->lineno = 0;
Guido van Rossuma849b831993-05-12 11:35:44 +0000121 tok->level = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +0000122 tok->filename = NULL;
123 tok->altwarning = 0;
124 tok->alterror = 0;
125 tok->alttabsize = 1;
126 tok->altindstack[0] = 0;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000127 tok->decoding_state = 0;
128 tok->decoding_erred = 0;
129 tok->read_coding_spec = 0;
130 tok->issued_encoding_warning = 0;
131 tok->encoding = NULL;
Martin v. Löwisf62a89b2002-09-03 11:52:44 +0000132 tok->cont_line = 0;
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000133#ifndef PGEN
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000134 tok->decoding_readline = NULL;
135 tok->decoding_buffer = NULL;
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000136#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000137 return tok;
138}
139
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000140#ifdef PGEN
141
142static char *
143decoding_fgets(char *s, int size, struct tok_state *tok)
144{
145 return fgets(s, size, tok->fp);
146}
147
148static int
149decoding_feof(struct tok_state *tok)
150{
151 return feof(tok->fp);
152}
153
154static const char *
155decode_str(const char *str, struct tok_state *tok)
156{
157 return str;
158}
159
160#else /* PGEN */
161
162static char *
163error_ret(struct tok_state *tok) /* XXX */
164{
165 tok->decoding_erred = 1;
166 if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
167 PyMem_DEL(tok->buf);
168 tok->buf = NULL;
169 return NULL; /* as if it were EOF */
170}
171
172static char *
173new_string(const char *s, int len)
174{
175 char* result = PyMem_NEW(char, len + 1);
176 if (result != NULL) {
177 memcpy(result, s, len);
178 result[len] = '\0';
179 }
180 return result;
181}
182
183static char *
184get_normal_name(char *s) /* for utf-8 and latin-1 */
185{
186 char buf[13];
187 int i;
188 for (i = 0; i < 12; i++) {
189 int c = s[i];
190 if (c == '\0') break;
191 else if (c == '_') buf[i] = '-';
192 else buf[i] = tolower(c);
193 }
194 buf[i] = '\0';
195 if (strcmp(buf, "utf-8") == 0 ||
196 strncmp(buf, "utf-8-", 6) == 0) return "utf-8";
197 else if (strcmp(buf, "latin-1") == 0 ||
198 strcmp(buf, "iso-8859-1") == 0 ||
199 strcmp(buf, "iso-latin-1") == 0 ||
200 strncmp(buf, "latin-1-", 8) == 0 ||
201 strncmp(buf, "iso-8859-1-", 11) == 0 ||
202 strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1";
203 else return s;
204}
205
206/* Return the coding spec in S, or NULL if none is found. */
207
208static char *
209get_coding_spec(const char *s, int size)
210{
211 int i;
Martin v. Löwisf62a89b2002-09-03 11:52:44 +0000212 /* Coding spec must be in a comment, and that comment must be
213 * the only statement on the source code line. */
214 for (i = 0; i < size - 6; i++) {
215 if (s[i] == '#')
216 break;
217 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
218 return NULL;
219 }
220 for (; i < size - 6; i++) { /* XXX inefficient search */
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000221 const char* t = s + i;
222 if (strncmp(t, "coding", 6) == 0) {
223 const char* begin = NULL;
224 t += 6;
225 if (t[0] != ':' && t[0] != '=')
226 continue;
227 do {
228 t++;
229 } while (t[0] == '\x20' || t[0] == '\t');
230
231 begin = t;
Neal Norwitze08e1bc2002-11-02 20:43:25 +0000232 while (isalnum((int)t[0]) ||
233 t[0] == '-' || t[0] == '_' || t[0] == '.')
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000234 t++;
235
236 if (begin < t) {
237 char* r = new_string(begin, t - begin);
238 char* q = get_normal_name(r);
239 if (r != q) {
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000240 PyMem_DEL(r);
241 r = new_string(q, strlen(q));
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000242 }
243 return r;
244 }
245 }
246 }
247 return NULL;
248}
249
250/* Check whether the line contains a coding spec. If it does,
251 invoke the set_readline function for the new encoding.
252 This function receives the tok_state and the new encoding.
253 Return 1 on success, 0 on failure. */
254
255static int
256check_coding_spec(const char* line, int size, struct tok_state *tok,
257 int set_readline(struct tok_state *, const char *))
258{
Tim Peters17db21f2002-09-03 15:39:58 +0000259 char * cs;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000260 int r = 1;
Tim Peters17db21f2002-09-03 15:39:58 +0000261
Martin v. Löwisf62a89b2002-09-03 11:52:44 +0000262 if (tok->cont_line)
263 /* It's a continuation line, so it can't be a coding spec. */
264 return 1;
Tim Peters17db21f2002-09-03 15:39:58 +0000265 cs = get_coding_spec(line, size);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000266 if (cs != NULL) {
267 tok->read_coding_spec = 1;
268 if (tok->encoding == NULL) {
269 assert(tok->decoding_state == 1); /* raw */
270 if (strcmp(cs, "utf-8") == 0 ||
271 strcmp(cs, "iso-8859-1") == 0) {
272 tok->encoding = cs;
273 } else {
Martin v. Löwis019934b2002-08-07 12:33:18 +0000274#ifdef Py_USING_UNICODE
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000275 r = set_readline(tok, cs);
276 if (r) {
277 tok->encoding = cs;
278 tok->decoding_state = -1;
279 }
Neal Norwitzc0d5faa2005-10-21 06:05:33 +0000280 else
281 PyMem_DEL(cs);
Martin v. Löwis019934b2002-08-07 12:33:18 +0000282#else
283 /* Without Unicode support, we cannot
284 process the coding spec. Since there
285 won't be any Unicode literals, that
286 won't matter. */
Neal Norwitzc0d5faa2005-10-21 06:05:33 +0000287 PyMem_DEL(cs);
Martin v. Löwis019934b2002-08-07 12:33:18 +0000288#endif
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000289 }
290 } else { /* then, compare cs with BOM */
291 r = (strcmp(tok->encoding, cs) == 0);
292 PyMem_DEL(cs);
293 }
294 }
295 return r;
296}
297
298/* See whether the file starts with a BOM. If it does,
299 invoke the set_readline function with the new encoding.
300 Return 1 on success, 0 on failure. */
301
302static int
303check_bom(int get_char(struct tok_state *),
304 void unget_char(int, struct tok_state *),
305 int set_readline(struct tok_state *, const char *),
306 struct tok_state *tok)
307{
308 int ch = get_char(tok);
309 tok->decoding_state = 1;
310 if (ch == EOF) {
311 return 1;
312 } else if (ch == 0xEF) {
313 ch = get_char(tok); if (ch != 0xBB) goto NON_BOM;
314 ch = get_char(tok); if (ch != 0xBF) goto NON_BOM;
315#if 0
316 /* Disable support for UTF-16 BOMs until a decision
317 is made whether this needs to be supported. */
318 } else if (ch == 0xFE) {
319 ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
320 if (!set_readline(tok, "utf-16-be")) return 0;
321 tok->decoding_state = -1;
322 } else if (ch == 0xFF) {
323 ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
324 if (!set_readline(tok, "utf-16-le")) return 0;
325 tok->decoding_state = -1;
326#endif
327 } else {
328 unget_char(ch, tok);
329 return 1;
330 }
331 tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
332 return 1;
333 NON_BOM:
334 /* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */
335 unget_char(0xFF, tok); /* XXX this will cause a syntax error */
336 return 1;
337}
338
339/* Read a line of text from TOK into S, using the stream in TOK.
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000340 Return NULL on failure, else S.
341
342 On entry, tok->decoding_buffer will be one of:
343 1) NULL: need to call tok->decoding_readline to get a new line
344 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
345 stored the result in tok->decoding_buffer
346 3) PyStringObject *: previous call to fp_readl did not have enough room
347 (in the s buffer) to copy entire contents of the line read
348 by tok->decoding_readline. tok->decoding_buffer has the overflow.
349 In this case, fp_readl is called in a loop (with an expanded buffer)
350 until the buffer ends with a '\n' (or until the end of the file is
351 reached): see tok_nextc and its calls to decoding_fgets.
352*/
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000353
354static char *
355fp_readl(char *s, int size, struct tok_state *tok)
356{
Martin v. Löwis019934b2002-08-07 12:33:18 +0000357#ifndef Py_USING_UNICODE
358 /* In a non-Unicode built, this should never be called. */
Martin v. Löwis2863c102002-08-07 15:18:57 +0000359 Py_FatalError("fp_readl should not be called in this build.");
Guido van Rossum84b2bed2002-08-16 17:01:09 +0000360 return NULL; /* Keep compiler happy (not reachable) */
Martin v. Löwis019934b2002-08-07 12:33:18 +0000361#else
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000362 PyObject* utf8 = NULL;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000363 PyObject* buf = tok->decoding_buffer;
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000364 char *str;
365 int utf8len;
366
367 /* Ask for one less byte so we can terminate it */
368 assert(size > 0);
369 size--;
370
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000371 if (buf == NULL) {
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000372 buf = PyObject_CallObject(tok->decoding_readline, NULL);
Martin v. Löwiscd280fb2002-08-04 18:28:44 +0000373 if (buf == NULL)
374 return error_ret(tok);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000375 } else {
376 tok->decoding_buffer = NULL;
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000377 if (PyString_CheckExact(buf))
378 utf8 = buf;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000379 }
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000380 if (utf8 == NULL) {
381 utf8 = PyUnicode_AsUTF8String(buf);
382 Py_DECREF(buf);
383 if (utf8 == NULL)
384 return error_ret(tok);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000385 }
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000386 str = PyString_AsString(utf8);
387 utf8len = PyString_GET_SIZE(utf8);
388 if (utf8len > size) {
389 tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
390 if (tok->decoding_buffer == NULL) {
391 Py_DECREF(utf8);
392 return error_ret(tok);
393 }
394 utf8len = size;
395 }
396 memcpy(s, str, utf8len);
397 s[utf8len] = '\0';
398 Py_DECREF(utf8);
399 if (utf8len == 0) return NULL; /* EOF */
400 return s;
Martin v. Löwis019934b2002-08-07 12:33:18 +0000401#endif
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000402}
403
404/* Set the readline function for TOK to a StreamReader's
405 readline function. The StreamReader is named ENC.
406
407 This function is called from check_bom and check_coding_spec.
408
409 ENC is usually identical to the future value of tok->encoding,
410 except for the (currently unsupported) case of UTF-16.
411
412 Return 1 on success, 0 on failure. */
413
414static int
415fp_setreadl(struct tok_state *tok, const char* enc)
416{
417 PyObject *reader, *stream, *readline;
418
Martin v. Löwis95292d62002-12-11 14:04:59 +0000419 /* XXX: constify filename argument. */
420 stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
Martin v. Löwiscd280fb2002-08-04 18:28:44 +0000421 if (stream == NULL)
422 return 0;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000423
424 reader = PyCodec_StreamReader(enc, stream, NULL);
425 Py_DECREF(stream);
Martin v. Löwiscd280fb2002-08-04 18:28:44 +0000426 if (reader == NULL)
427 return 0;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000428
429 readline = PyObject_GetAttrString(reader, "readline");
430 Py_DECREF(reader);
Martin v. Löwiscd280fb2002-08-04 18:28:44 +0000431 if (readline == NULL)
432 return 0;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000433
434 tok->decoding_readline = readline;
435 return 1;
436}
437
438/* Fetch the next byte from TOK. */
439
440static int fp_getc(struct tok_state *tok) {
441 return getc(tok->fp);
442}
443
444/* Unfetch the last byte back into TOK. */
445
446static void fp_ungetc(int c, struct tok_state *tok) {
447 ungetc(c, tok->fp);
448}
449
450/* Read a line of input from TOK. Determine encoding
451 if necessary. */
452
453static char *
454decoding_fgets(char *s, int size, struct tok_state *tok)
455{
Martin v. Löwis2863c102002-08-07 15:18:57 +0000456 char *line = NULL;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000457 int warn = 0, badchar = 0;
Martin v. Löwiscd280fb2002-08-04 18:28:44 +0000458 for (;;) {
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000459 if (tok->decoding_state < 0) {
460 /* We already have a codec associated with
461 this input. */
462 line = fp_readl(s, size, tok);
463 break;
464 } else if (tok->decoding_state > 0) {
465 /* We want a 'raw' read. */
466 line = Py_UniversalNewlineFgets(s, size,
467 tok->fp, NULL);
468 warn = 1;
469 break;
470 } else {
471 /* We have not yet determined the encoding.
472 If an encoding is found, use the file-pointer
473 reader functions from now on. */
474 if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
475 return error_ret(tok);
476 assert(tok->decoding_state != 0);
477 }
Martin v. Löwiscd280fb2002-08-04 18:28:44 +0000478 }
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000479 if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
480 if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
481 return error_ret(tok);
482 }
483 }
484#ifndef PGEN
485 if (warn && line && !tok->issued_encoding_warning && !tok->encoding) {
486 unsigned char *c;
Jack Jansencf0a2cf2002-08-05 14:14:05 +0000487 for (c = (unsigned char *)line; *c; c++)
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000488 if (*c > 127) {
489 badchar = *c;
490 break;
491 }
492 }
493 if (badchar) {
Marc-André Lemburg1fb14002003-02-17 18:31:57 +0000494 char buf[500];
Martin v. Löwis725bb232002-08-05 01:49:16 +0000495 /* Need to add 1 to the line number, since this line
496 has not been counted, yet. */
Marc-André Lemburg1fb14002003-02-17 18:31:57 +0000497 sprintf(buf,
498 "Non-ASCII character '\\x%.2x' "
499 "in file %.200s on line %i, "
500 "but no encoding declared; "
501 "see http://www.python.org/peps/pep-0263.html for details",
502 badchar, tok->filename, tok->lineno + 1);
503 /* We don't use PyErr_WarnExplicit() here because
504 printing the line in question to e.g. a log file
505 could result in sensitive information being
506 exposed. */
507 PyErr_Warn(PyExc_DeprecationWarning, buf);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000508 tok->issued_encoding_warning = 1;
509 }
510#endif
511 return line;
512}
513
514static int
515decoding_feof(struct tok_state *tok)
516{
517 if (tok->decoding_state >= 0) {
518 return feof(tok->fp);
519 } else {
520 PyObject* buf = tok->decoding_buffer;
521 if (buf == NULL) {
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000522 buf = PyObject_CallObject(tok->decoding_readline, NULL);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000523 if (buf == NULL) {
524 error_ret(tok);
525 return 1;
526 } else {
527 tok->decoding_buffer = buf;
528 }
529 }
530 return PyObject_Length(buf) == 0;
531 }
532}
533
534/* Fetch a byte from TOK, using the string buffer. */
535
536static int buf_getc(struct tok_state *tok) {
Just van Rossumf032f862003-02-09 20:38:48 +0000537 return Py_CHARMASK(*tok->str++);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000538}
539
540/* Unfetch a byte from TOK, using the string buffer. */
541
542static void buf_ungetc(int c, struct tok_state *tok) {
543 tok->str--;
Just van Rossumf032f862003-02-09 20:38:48 +0000544 assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000545}
546
547/* Set the readline function for TOK to ENC. For the string-based
548 tokenizer, this means to just record the encoding. */
549
550static int buf_setreadl(struct tok_state *tok, const char* enc) {
551 tok->enc = enc;
552 return 1;
553}
554
555/* Return a UTF-8 encoding Python string object from the
556 C byte string STR, which is encoded with ENC. */
557
Martin v. Löwis019934b2002-08-07 12:33:18 +0000558#ifdef Py_USING_UNICODE
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000559static PyObject *
560translate_into_utf8(const char* str, const char* enc) {
561 PyObject *utf8;
562 PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
563 if (buf == NULL)
564 return NULL;
565 utf8 = PyUnicode_AsUTF8String(buf);
566 Py_DECREF(buf);
567 return utf8;
568}
Martin v. Löwis019934b2002-08-07 12:33:18 +0000569#endif
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000570
571/* Decode a byte string STR for use as the buffer of TOK.
572 Look for encoding declarations inside STR, and record them
573 inside TOK. */
574
575static const char *
576decode_str(const char *str, struct tok_state *tok)
577{
578 PyObject* utf8 = NULL;
579 const char *s;
580 int lineno = 0;
581 tok->enc = NULL;
582 tok->str = str;
583 if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
584 return NULL;
585 str = tok->str; /* string after BOM if any */
Tim Peters2c3f9c62002-08-04 17:58:34 +0000586 assert(str);
Martin v. Löwis019934b2002-08-07 12:33:18 +0000587#ifdef Py_USING_UNICODE
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000588 if (tok->enc != NULL) {
589 utf8 = translate_into_utf8(str, tok->enc);
590 if (utf8 == NULL)
591 return NULL;
592 str = PyString_AsString(utf8);
593 }
Martin v. Löwis019934b2002-08-07 12:33:18 +0000594#endif
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000595 for (s = str;; s++) {
596 if (*s == '\0') break;
597 else if (*s == '\n') {
598 lineno++;
599 if (lineno == 2) break;
600 }
601 }
602 tok->enc = NULL;
603 if (!check_coding_spec(str, s - str, tok, buf_setreadl))
604 return NULL;
Martin v. Löwis019934b2002-08-07 12:33:18 +0000605#ifdef Py_USING_UNICODE
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000606 if (tok->enc != NULL) {
607 assert(utf8 == NULL);
608 utf8 = translate_into_utf8(str, tok->enc);
Neal Norwitz40d37812005-10-02 01:48:49 +0000609 if (utf8 == NULL) {
610 PyErr_Format(PyExc_SyntaxError,
611 "unknown encoding: %s", tok->enc);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000612 return NULL;
Neal Norwitz40d37812005-10-02 01:48:49 +0000613 }
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000614 str = PyString_AsString(utf8);
615 }
Martin v. Löwis019934b2002-08-07 12:33:18 +0000616#endif
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000617 assert(tok->decoding_buffer == NULL);
618 tok->decoding_buffer = utf8; /* CAUTION */
619 return str;
620}
621
622#endif /* PGEN */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623
624/* Set up tokenizer for string */
625
626struct tok_state *
Martin v. Löwis95292d62002-12-11 14:04:59 +0000627PyTokenizer_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000628{
629 struct tok_state *tok = tok_new();
630 if (tok == NULL)
631 return NULL;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000632 str = (char *)decode_str(str, tok);
633 if (str == NULL)
634 return NULL;
Martin v. Löwis95292d62002-12-11 14:04:59 +0000635 /* XXX: constify members. */
636 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000637 return tok;
638}
639
640
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000641/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642
643struct tok_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000644PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000645{
646 struct tok_state *tok = tok_new();
647 if (tok == NULL)
648 return NULL;
Guido van Rossum86bea461997-04-29 21:03:06 +0000649 if ((tok->buf = PyMem_NEW(char, BUFSIZ)) == NULL) {
650 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000651 return NULL;
652 }
653 tok->cur = tok->inp = tok->buf;
654 tok->end = tok->buf + BUFSIZ;
655 tok->fp = fp;
656 tok->prompt = ps1;
657 tok->nextprompt = ps2;
658 return tok;
659}
660
661
662/* Free a tok_state structure */
663
664void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000665PyTokenizer_Free(struct tok_state *tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000666{
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000667 if (tok->encoding != NULL)
668 PyMem_DEL(tok->encoding);
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000669#ifndef PGEN
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000670 Py_XDECREF(tok->decoding_readline);
671 Py_XDECREF(tok->decoding_buffer);
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000672#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000673 if (tok->fp != NULL && tok->buf != NULL)
Guido van Rossum86bea461997-04-29 21:03:06 +0000674 PyMem_DEL(tok->buf);
675 PyMem_DEL(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000676}
677
Hye-Shik Chang7df44b32004-08-04 17:36:41 +0000678#if !defined(PGEN) && defined(Py_USING_UNICODE)
679static int
680tok_stdin_decode(struct tok_state *tok, char **inp)
681{
682 PyObject *enc, *sysstdin, *decoded, *utf8;
683 const char *encoding;
684 char *converted;
685
686 if (PySys_GetFile((char *)"stdin", NULL) != stdin)
687 return 0;
688 sysstdin = PySys_GetObject("stdin");
689 if (sysstdin == NULL || !PyFile_Check(sysstdin))
690 return 0;
691
692 enc = ((PyFileObject *)sysstdin)->f_encoding;
693 if (enc == NULL || !PyString_Check(enc))
694 return 0;
695 Py_INCREF(enc);
696
697 encoding = PyString_AsString(enc);
698 decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
699 if (decoded == NULL)
700 goto error_clear;
701
702 utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
703 Py_DECREF(decoded);
704 if (utf8 == NULL)
705 goto error_clear;
706
707 converted = new_string(PyString_AsString(utf8), PyString_Size(utf8));
708 Py_DECREF(utf8);
709 if (converted == NULL)
710 goto error_nomem;
711
712 PyMem_FREE(*inp);
713 *inp = converted;
714 if (tok->encoding != NULL)
715 PyMem_DEL(tok->encoding);
716 tok->encoding = new_string(encoding, strlen(encoding));
717 if (tok->encoding == NULL)
718 goto error_nomem;
719
720 Py_DECREF(enc);
721 return 0;
722
723error_nomem:
724 Py_DECREF(enc);
725 tok->done = E_NOMEM;
726 return -1;
727
728error_clear:
729 /* Fallback to iso-8859-1: for backward compatibility */
730 Py_DECREF(enc);
731 PyErr_Clear();
732 return 0;
733}
734#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000735
736/* Get next char, updating state; error code goes into tok->done */
737
738static int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000739tok_nextc(register struct tok_state *tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000740{
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741 for (;;) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000742 if (tok->cur != tok->inp) {
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000743 return Py_CHARMASK(*tok->cur++); /* Fast path */
Guido van Rossum1a817c01994-09-19 08:06:25 +0000744 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000745 if (tok->done != E_OK)
746 return EOF;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 if (tok->fp == NULL) {
Guido van Rossum1a817c01994-09-19 08:06:25 +0000748 char *end = strchr(tok->inp, '\n');
749 if (end != NULL)
750 end++;
751 else {
752 end = strchr(tok->inp, '\0');
753 if (end == tok->inp) {
754 tok->done = E_EOF;
755 return EOF;
756 }
757 }
758 if (tok->start == NULL)
759 tok->buf = tok->cur;
760 tok->lineno++;
761 tok->inp = end;
Guido van Rossumcf57d8b1998-01-19 22:07:46 +0000762 return Py_CHARMASK(*tok->cur++);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000763 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000764 if (tok->prompt != NULL) {
Martin v. Löwis566f6af2002-10-26 14:39:10 +0000765 char *new = PyOS_Readline(stdin, stdout, tok->prompt);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000766 if (tok->nextprompt != NULL)
767 tok->prompt = tok->nextprompt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000768 if (new == NULL)
769 tok->done = E_INTR;
770 else if (*new == '\0') {
Guido van Rossumb18618d2000-05-03 23:44:39 +0000771 PyMem_FREE(new);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000772 tok->done = E_EOF;
773 }
Hye-Shik Chang7df44b32004-08-04 17:36:41 +0000774#if !defined(PGEN) && defined(Py_USING_UNICODE)
775 else if (tok_stdin_decode(tok, &new) != 0)
776 PyMem_FREE(new);
777#endif
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000778 else if (tok->start != NULL) {
Guido van Rossum6da34342000-06-28 22:00:02 +0000779 size_t start = tok->start - tok->buf;
780 size_t oldlen = tok->cur - tok->buf;
781 size_t newlen = oldlen + strlen(new);
Guido van Rossumb18618d2000-05-03 23:44:39 +0000782 char *buf = tok->buf;
783 PyMem_RESIZE(buf, char, newlen+1);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000784 tok->lineno++;
785 if (buf == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +0000786 PyMem_DEL(tok->buf);
Guido van Rossum588633d1994-12-30 15:46:02 +0000787 tok->buf = NULL;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000788 PyMem_FREE(new);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000789 tok->done = E_NOMEM;
790 return EOF;
791 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000792 tok->buf = buf;
793 tok->cur = tok->buf + oldlen;
794 strcpy(tok->buf + oldlen, new);
Guido van Rossumb18618d2000-05-03 23:44:39 +0000795 PyMem_FREE(new);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000796 tok->inp = tok->buf + newlen;
797 tok->end = tok->inp + 1;
798 tok->start = tok->buf + start;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000799 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000800 else {
801 tok->lineno++;
802 if (tok->buf != NULL)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000803 PyMem_DEL(tok->buf);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000804 tok->buf = new;
805 tok->cur = tok->buf;
806 tok->inp = strchr(tok->buf, '\0');
807 tok->end = tok->inp + 1;
808 }
809 }
810 else {
811 int done = 0;
812 int cur = 0;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000813 char *pt;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000814 if (tok->start == NULL) {
815 if (tok->buf == NULL) {
Guido van Rossum86bea461997-04-29 21:03:06 +0000816 tok->buf = PyMem_NEW(char, BUFSIZ);
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000817 if (tok->buf == NULL) {
818 tok->done = E_NOMEM;
819 return EOF;
820 }
821 tok->end = tok->buf + BUFSIZ;
822 }
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000823 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
824 tok) == NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000825 tok->done = E_EOF;
826 done = 1;
827 }
828 else {
829 tok->done = E_OK;
830 tok->inp = strchr(tok->buf, '\0');
831 done = tok->inp[-1] == '\n';
832 }
833 }
834 else {
835 cur = tok->cur - tok->buf;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000836 if (decoding_feof(tok)) {
Guido van Rossum78c05351995-01-17 16:12:13 +0000837 tok->done = E_EOF;
838 done = 1;
839 }
840 else
841 tok->done = E_OK;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000842 }
843 tok->lineno++;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000844 /* Read until '\n' or EOF */
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000845 while (!done) {
846 int curstart = tok->start == NULL ? -1 :
847 tok->start - tok->buf;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000848 int curvalid = tok->inp - tok->buf;
Guido van Rossum3f6bb861995-09-21 20:36:34 +0000849 int newsize = curvalid + BUFSIZ;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000850 char *newbuf = tok->buf;
Guido van Rossum86bea461997-04-29 21:03:06 +0000851 PyMem_RESIZE(newbuf, char, newsize);
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000852 if (newbuf == NULL) {
853 tok->done = E_NOMEM;
854 tok->cur = tok->inp;
855 return EOF;
856 }
857 tok->buf = newbuf;
858 tok->inp = tok->buf + curvalid;
859 tok->end = tok->buf + newsize;
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000860 tok->start = curstart < 0 ? NULL :
861 tok->buf + curstart;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000862 if (decoding_fgets(tok->inp,
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000863 (int)(tok->end - tok->inp),
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000864 tok) == NULL) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000865 /* Last line does not end in \n,
866 fake one */
867 strcpy(tok->inp, "\n");
868 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000869 tok->inp = strchr(tok->inp, '\0');
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000870 done = tok->inp[-1] == '\n';
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000871 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +0000872 tok->cur = tok->buf + cur;
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000873 /* replace "\r\n" with "\n" */
Guido van Rossum2d45be11997-04-11 19:16:25 +0000874 /* For Mac we leave the \r, giving a syntax error */
Guido van Rossum2e96eb91995-06-14 18:26:02 +0000875 pt = tok->inp - 2;
876 if (pt >= tok->buf && *pt == '\r') {
877 *pt++ = '\n';
878 *pt = '\0';
879 tok->inp = pt;
880 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881 }
882 if (tok->done != E_OK) {
883 if (tok->prompt != NULL)
Guido van Rossum6e73bf41998-08-25 18:13:04 +0000884 PySys_WriteStderr("\n");
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000885 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 return EOF;
887 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000889 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890}
891
892
893/* Back-up one character */
894
895static void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000896tok_backup(register struct tok_state *tok, register int c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897{
898 if (c != EOF) {
Guido van Rossum588633d1994-12-30 15:46:02 +0000899 if (--tok->cur < tok->buf)
Guido van Rossum86bea461997-04-29 21:03:06 +0000900 Py_FatalError("tok_backup: begin of buffer");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 if (*tok->cur != c)
902 *tok->cur = c;
903 }
904}
905
906
907/* Return the token corresponding to a single character */
908
909int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000910PyToken_OneChar(int c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911{
912 switch (c) {
913 case '(': return LPAR;
914 case ')': return RPAR;
915 case '[': return LSQB;
916 case ']': return RSQB;
917 case ':': return COLON;
918 case ',': return COMMA;
919 case ';': return SEMI;
920 case '+': return PLUS;
921 case '-': return MINUS;
922 case '*': return STAR;
923 case '/': return SLASH;
924 case '|': return VBAR;
925 case '&': return AMPER;
926 case '<': return LESS;
927 case '>': return GREATER;
928 case '=': return EQUAL;
929 case '.': return DOT;
930 case '%': return PERCENT;
931 case '`': return BACKQUOTE;
932 case '{': return LBRACE;
933 case '}': return RBRACE;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000934 case '^': return CIRCUMFLEX;
935 case '~': return TILDE;
Anthony Baxterc2a5a632004-08-02 06:10:11 +0000936 case '@': return AT;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 default: return OP;
938 }
939}
940
941
Guido van Rossumfbab9051991-10-20 20:25:03 +0000942int
Thomas Wouters23c9e002000-07-22 19:20:54 +0000943PyToken_TwoChars(int c1, int c2)
Guido van Rossumfbab9051991-10-20 20:25:03 +0000944{
945 switch (c1) {
946 case '=':
947 switch (c2) {
948 case '=': return EQEQUAL;
949 }
950 break;
951 case '!':
952 switch (c2) {
953 case '=': return NOTEQUAL;
954 }
955 break;
956 case '<':
957 switch (c2) {
958 case '>': return NOTEQUAL;
959 case '=': return LESSEQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000960 case '<': return LEFTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000961 }
962 break;
963 case '>':
964 switch (c2) {
965 case '=': return GREATEREQUAL;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +0000966 case '>': return RIGHTSHIFT;
Guido van Rossumfbab9051991-10-20 20:25:03 +0000967 }
968 break;
Thomas Wouters434d0822000-08-24 20:11:32 +0000969 case '+':
970 switch (c2) {
971 case '=': return PLUSEQUAL;
972 }
973 break;
974 case '-':
975 switch (c2) {
976 case '=': return MINEQUAL;
977 }
978 break;
Guido van Rossumf595fde1996-01-12 01:31:58 +0000979 case '*':
980 switch (c2) {
981 case '*': return DOUBLESTAR;
Thomas Wouters434d0822000-08-24 20:11:32 +0000982 case '=': return STAREQUAL;
983 }
984 break;
985 case '/':
986 switch (c2) {
Guido van Rossum4668b002001-08-08 05:00:18 +0000987 case '/': return DOUBLESLASH;
Thomas Wouters434d0822000-08-24 20:11:32 +0000988 case '=': return SLASHEQUAL;
989 }
990 break;
991 case '|':
992 switch (c2) {
993 case '=': return VBAREQUAL;
994 }
995 break;
996 case '%':
997 switch (c2) {
998 case '=': return PERCENTEQUAL;
999 }
1000 break;
1001 case '&':
1002 switch (c2) {
1003 case '=': return AMPEREQUAL;
1004 }
1005 break;
1006 case '^':
1007 switch (c2) {
1008 case '=': return CIRCUMFLEXEQUAL;
Guido van Rossumf595fde1996-01-12 01:31:58 +00001009 }
1010 break;
Guido van Rossumfbab9051991-10-20 20:25:03 +00001011 }
1012 return OP;
1013}
1014
Thomas Wouters434d0822000-08-24 20:11:32 +00001015int
1016PyToken_ThreeChars(int c1, int c2, int c3)
1017{
1018 switch (c1) {
1019 case '<':
1020 switch (c2) {
1021 case '<':
1022 switch (c3) {
1023 case '=':
1024 return LEFTSHIFTEQUAL;
Thomas Wouters434d0822000-08-24 20:11:32 +00001025 }
1026 break;
1027 }
1028 break;
1029 case '>':
1030 switch (c2) {
1031 case '>':
1032 switch (c3) {
1033 case '=':
1034 return RIGHTSHIFTEQUAL;
Thomas Wouters434d0822000-08-24 20:11:32 +00001035 }
1036 break;
1037 }
1038 break;
1039 case '*':
1040 switch (c2) {
1041 case '*':
1042 switch (c3) {
1043 case '=':
1044 return DOUBLESTAREQUAL;
Thomas Wouters434d0822000-08-24 20:11:32 +00001045 }
1046 break;
1047 }
1048 break;
Guido van Rossum4668b002001-08-08 05:00:18 +00001049 case '/':
1050 switch (c2) {
1051 case '/':
1052 switch (c3) {
1053 case '=':
1054 return DOUBLESLASHEQUAL;
1055 }
1056 break;
1057 }
1058 break;
Thomas Wouters434d0822000-08-24 20:11:32 +00001059 }
1060 return OP;
1061}
Guido van Rossumfbab9051991-10-20 20:25:03 +00001062
Guido van Rossum926f13a1998-04-09 21:38:06 +00001063static int
Thomas Wouters23c9e002000-07-22 19:20:54 +00001064indenterror(struct tok_state *tok)
Guido van Rossum926f13a1998-04-09 21:38:06 +00001065{
1066 if (tok->alterror) {
Fred Drake85f36392000-07-11 17:53:00 +00001067 tok->done = E_TABSPACE;
Guido van Rossum926f13a1998-04-09 21:38:06 +00001068 tok->cur = tok->inp;
1069 return 1;
1070 }
1071 if (tok->altwarning) {
Fred Drake85f36392000-07-11 17:53:00 +00001072 PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
1073 "in indentation\n", tok->filename);
Guido van Rossum926f13a1998-04-09 21:38:06 +00001074 tok->altwarning = 0;
1075 }
1076 return 0;
1077}
1078
1079
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001080/* Get next token, after space stripping etc. */
1081
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +00001082static int
1083tok_get(register struct tok_state *tok, char **p_start, char **p_end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001084{
1085 register int c;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001086 int blankline;
1087
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001088 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001089 nextline:
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001090 tok->start = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001091 blankline = 0;
1092
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093 /* Get indentation level */
1094 if (tok->atbol) {
1095 register int col = 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +00001096 register int altcol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097 tok->atbol = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001098 for (;;) {
1099 c = tok_nextc(tok);
1100 if (c == ' ')
Guido van Rossum926f13a1998-04-09 21:38:06 +00001101 col++, altcol++;
1102 else if (c == '\t') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001103 col = (col/tok->tabsize + 1) * tok->tabsize;
Guido van Rossum926f13a1998-04-09 21:38:06 +00001104 altcol = (altcol/tok->alttabsize + 1)
1105 * tok->alttabsize;
1106 }
Guido van Rossum94d32b11995-07-07 22:27:27 +00001107 else if (c == '\014') /* Control-L (formfeed) */
Guido van Rossum926f13a1998-04-09 21:38:06 +00001108 col = altcol = 0; /* For Emacs users */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001109 else
1110 break;
1111 }
1112 tok_backup(tok, c);
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001113 if (c == '#' || c == '\n') {
1114 /* Lines with only whitespace and/or comments
1115 shouldn't affect the indentation and are
1116 not passed to the parser as NEWLINE tokens,
1117 except *totally* empty lines in interactive
1118 mode, which signal the end of a command group. */
1119 if (col == 0 && c == '\n' && tok->prompt != NULL)
1120 blankline = 0; /* Let it through */
1121 else
1122 blankline = 1; /* Ignore completely */
1123 /* We can't jump back right here since we still
1124 may need to skip to the end of a comment */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001125 }
Guido van Rossuma849b831993-05-12 11:35:44 +00001126 if (!blankline && tok->level == 0) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001127 if (col == tok->indstack[tok->indent]) {
1128 /* No change */
Guido van Rossum926f13a1998-04-09 21:38:06 +00001129 if (altcol != tok->altindstack[tok->indent]) {
1130 if (indenterror(tok))
1131 return ERRORTOKEN;
1132 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001133 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001134 else if (col > tok->indstack[tok->indent]) {
1135 /* Indent -- always one */
1136 if (tok->indent+1 >= MAXINDENT) {
Fred Drake85f36392000-07-11 17:53:00 +00001137 tok->done = E_TOODEEP;
Guido van Rossum6ac258d1993-05-12 08:24:20 +00001138 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001139 return ERRORTOKEN;
1140 }
Guido van Rossum926f13a1998-04-09 21:38:06 +00001141 if (altcol <= tok->altindstack[tok->indent]) {
1142 if (indenterror(tok))
1143 return ERRORTOKEN;
1144 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001145 tok->pendin++;
1146 tok->indstack[++tok->indent] = col;
Guido van Rossum926f13a1998-04-09 21:38:06 +00001147 tok->altindstack[tok->indent] = altcol;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001148 }
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001149 else /* col < tok->indstack[tok->indent] */ {
1150 /* Dedent -- any number, must be consistent */
1151 while (tok->indent > 0 &&
1152 col < tok->indstack[tok->indent]) {
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001153 tok->pendin--;
Guido van Rossum54758fa1998-02-16 22:18:00 +00001154 tok->indent--;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001155 }
1156 if (col != tok->indstack[tok->indent]) {
Fred Drake85f36392000-07-11 17:53:00 +00001157 tok->done = E_DEDENT;
Guido van Rossum6ac258d1993-05-12 08:24:20 +00001158 tok->cur = tok->inp;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001159 return ERRORTOKEN;
1160 }
Guido van Rossum926f13a1998-04-09 21:38:06 +00001161 if (altcol != tok->altindstack[tok->indent]) {
1162 if (indenterror(tok))
1163 return ERRORTOKEN;
1164 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001165 }
1166 }
1167 }
1168
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001169 tok->start = tok->cur;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001170
1171 /* Return pending indents/dedents */
1172 if (tok->pendin != 0) {
1173 if (tok->pendin < 0) {
1174 tok->pendin++;
1175 return DEDENT;
1176 }
1177 else {
1178 tok->pendin--;
1179 return INDENT;
1180 }
1181 }
1182
1183 again:
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001184 tok->start = NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001185 /* Skip spaces */
1186 do {
1187 c = tok_nextc(tok);
Guido van Rossum94d32b11995-07-07 22:27:27 +00001188 } while (c == ' ' || c == '\t' || c == '\014');
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001189
1190 /* Set start of current token */
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001191 tok->start = tok->cur - 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001192
Guido van Rossumab5ca152000-03-31 00:52:27 +00001193 /* Skip comment, while looking for tab-setting magic */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001194 if (c == '#') {
Guido van Rossumab5ca152000-03-31 00:52:27 +00001195 static char *tabforms[] = {
1196 "tab-width:", /* Emacs */
1197 ":tabstop=", /* vim, full form */
1198 ":ts=", /* vim, abbreviated form */
1199 "set tabsize=", /* will vi never die? */
1200 /* more templates can be added here to support other editors */
1201 };
1202 char cbuf[80];
1203 char *tp, **cp;
1204 tp = cbuf;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001205 do {
Guido van Rossumab5ca152000-03-31 00:52:27 +00001206 *tp++ = c = tok_nextc(tok);
1207 } while (c != EOF && c != '\n' &&
1208 tp - cbuf + 1 < sizeof(cbuf));
1209 *tp = '\0';
1210 for (cp = tabforms;
1211 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
1212 cp++) {
1213 if ((tp = strstr(cbuf, *cp))) {
1214 int newsize = atoi(tp + strlen(*cp));
1215
1216 if (newsize >= 1 && newsize <= 40) {
1217 tok->tabsize = newsize;
Guido van Rossum6c981ad2000-04-03 23:02:17 +00001218 if (Py_VerboseFlag)
1219 PySys_WriteStderr(
Guido van Rossumab5ca152000-03-31 00:52:27 +00001220 "Tab size set to %d\n",
1221 newsize);
1222 }
1223 }
1224 }
1225 while (c != EOF && c != '\n')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001226 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001227 }
1228
1229 /* Check for EOF and errors now */
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +00001230 if (c == EOF) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001231 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +00001232 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001233
1234 /* Identifier (most frequent token!) */
1235 if (isalpha(c) || c == '_') {
Guido van Rossum86016cb2000-03-10 22:56:54 +00001236 /* Process r"", u"" and ur"" */
Guido van Rossum5026cb41997-04-25 17:32:00 +00001237 switch (c) {
1238 case 'r':
1239 case 'R':
1240 c = tok_nextc(tok);
1241 if (c == '"' || c == '\'')
1242 goto letter_quote;
Guido van Rossum86016cb2000-03-10 22:56:54 +00001243 break;
1244 case 'u':
1245 case 'U':
1246 c = tok_nextc(tok);
1247 if (c == 'r' || c == 'R')
1248 c = tok_nextc(tok);
1249 if (c == '"' || c == '\'')
1250 goto letter_quote;
1251 break;
Guido van Rossum5026cb41997-04-25 17:32:00 +00001252 }
Guido van Rossum24dacb31997-04-06 03:46:20 +00001253 while (isalnum(c) || c == '_') {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001254 c = tok_nextc(tok);
Guido van Rossum24dacb31997-04-06 03:46:20 +00001255 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001256 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001257 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001258 *p_end = tok->cur;
1259 return NAME;
1260 }
1261
1262 /* Newline */
1263 if (c == '\n') {
1264 tok->atbol = 1;
Guido van Rossuma849b831993-05-12 11:35:44 +00001265 if (blankline || tok->level > 0)
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001266 goto nextline;
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001267 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001268 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
Martin v. Löwisf62a89b2002-09-03 11:52:44 +00001269 tok->cont_line = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001270 return NEWLINE;
1271 }
1272
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +00001273 /* Period or number starting with period? */
1274 if (c == '.') {
1275 c = tok_nextc(tok);
1276 if (isdigit(c)) {
1277 goto fraction;
1278 }
1279 else {
1280 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001281 *p_start = tok->start;
Guido van Rossumbaf0ebf1991-10-24 14:59:40 +00001282 *p_end = tok->cur;
1283 return DOT;
1284 }
1285 }
Guido van Rossumf595fde1996-01-12 01:31:58 +00001286
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001287 /* Number */
1288 if (isdigit(c)) {
1289 if (c == '0') {
Tim Petersd507dab2001-08-30 20:51:59 +00001290 /* Hex or octal -- maybe. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001291 c = tok_nextc(tok);
1292 if (c == '.')
1293 goto fraction;
Guido van Rossumf595fde1996-01-12 01:31:58 +00001294#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +00001295 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +00001296 goto imaginary;
1297#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001298 if (c == 'x' || c == 'X') {
1299 /* Hex */
1300 do {
1301 c = tok_nextc(tok);
1302 } while (isxdigit(c));
1303 }
1304 else {
Tim Petersd507dab2001-08-30 20:51:59 +00001305 int found_decimal = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001306 /* Octal; c is first char of it */
1307 /* There's no 'isoctdigit' macro, sigh */
1308 while ('0' <= c && c < '8') {
1309 c = tok_nextc(tok);
1310 }
Tim Petersd507dab2001-08-30 20:51:59 +00001311 if (isdigit(c)) {
1312 found_decimal = 1;
1313 do {
1314 c = tok_nextc(tok);
1315 } while (isdigit(c));
1316 }
1317 if (c == '.')
1318 goto fraction;
1319 else if (c == 'e' || c == 'E')
1320 goto exponent;
1321#ifndef WITHOUT_COMPLEX
1322 else if (c == 'j' || c == 'J')
1323 goto imaginary;
1324#endif
1325 else if (found_decimal) {
1326 tok->done = E_TOKEN;
1327 tok_backup(tok, c);
1328 return ERRORTOKEN;
1329 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001330 }
Guido van Rossumf023c461991-05-05 20:16:20 +00001331 if (c == 'l' || c == 'L')
1332 c = tok_nextc(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001333 }
1334 else {
1335 /* Decimal */
1336 do {
1337 c = tok_nextc(tok);
1338 } while (isdigit(c));
Guido van Rossumf023c461991-05-05 20:16:20 +00001339 if (c == 'l' || c == 'L')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001340 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +00001341 else {
Tim Peters9aa70d92001-08-27 19:19:28 +00001342 /* Accept floating point numbers. */
Guido van Rossumf023c461991-05-05 20:16:20 +00001343 if (c == '.') {
1344 fraction:
1345 /* Fraction */
1346 do {
1347 c = tok_nextc(tok);
1348 } while (isdigit(c));
1349 }
1350 if (c == 'e' || c == 'E') {
Tim Petersd507dab2001-08-30 20:51:59 +00001351 exponent:
Guido van Rossumf023c461991-05-05 20:16:20 +00001352 /* Exponent part */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001353 c = tok_nextc(tok);
Guido van Rossumf023c461991-05-05 20:16:20 +00001354 if (c == '+' || c == '-')
1355 c = tok_nextc(tok);
Tim Peters9aa70d92001-08-27 19:19:28 +00001356 if (!isdigit(c)) {
1357 tok->done = E_TOKEN;
1358 tok_backup(tok, c);
1359 return ERRORTOKEN;
Guido van Rossumf023c461991-05-05 20:16:20 +00001360 }
Tim Peters9aa70d92001-08-27 19:19:28 +00001361 do {
1362 c = tok_nextc(tok);
1363 } while (isdigit(c));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001364 }
Guido van Rossumf595fde1996-01-12 01:31:58 +00001365#ifndef WITHOUT_COMPLEX
Guido van Rossumfaa436c1996-01-26 18:59:07 +00001366 if (c == 'j' || c == 'J')
Guido van Rossumf595fde1996-01-12 01:31:58 +00001367 /* Imaginary part */
1368 imaginary:
1369 c = tok_nextc(tok);
1370#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001371 }
1372 }
1373 tok_backup(tok, c);
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001374 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001375 *p_end = tok->cur;
1376 return NUMBER;
1377 }
Guido van Rossum24dacb31997-04-06 03:46:20 +00001378
1379 letter_quote:
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001380 /* String */
1381 if (c == '\'' || c == '"') {
Guido van Rossum35685241998-02-16 15:42:50 +00001382 int quote2 = tok->cur - tok->start + 1;
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001383 int quote = c;
1384 int triple = 0;
1385 int tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001386 for (;;) {
1387 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001388 if (c == '\n') {
1389 if (!triple) {
Skip Montanaro118ec702002-08-15 01:20:16 +00001390 tok->done = E_EOLS;
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001391 tok_backup(tok, c);
1392 return ERRORTOKEN;
1393 }
1394 tripcount = 0;
Martin v. Löwisf62a89b2002-09-03 11:52:44 +00001395 tok->cont_line = 1; /* multiline string. */
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001396 }
1397 else if (c == EOF) {
Skip Montanaro118ec702002-08-15 01:20:16 +00001398 if (triple)
1399 tok->done = E_EOFS;
1400 else
1401 tok->done = E_EOLS;
Guido van Rossum6ac258d1993-05-12 08:24:20 +00001402 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001403 return ERRORTOKEN;
1404 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001405 else if (c == quote) {
1406 tripcount++;
Guido van Rossum35685241998-02-16 15:42:50 +00001407 if (tok->cur - tok->start == quote2) {
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001408 c = tok_nextc(tok);
1409 if (c == quote) {
1410 triple = 1;
1411 tripcount = 0;
1412 continue;
1413 }
1414 tok_backup(tok, c);
1415 }
1416 if (!triple || tripcount == 3)
1417 break;
1418 }
1419 else if (c == '\\') {
1420 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001421 c = tok_nextc(tok);
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001422 if (c == EOF) {
Skip Montanaro118ec702002-08-15 01:20:16 +00001423 tok->done = E_EOLS;
Guido van Rossum6ac258d1993-05-12 08:24:20 +00001424 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001425 return ERRORTOKEN;
1426 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001427 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001428 else
1429 tripcount = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001430 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001431 *p_start = tok->start;
Guido van Rossum8054fad1993-10-26 15:19:44 +00001432 *p_end = tok->cur;
1433 return STRING;
1434 }
1435
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001436 /* Line continuation */
1437 if (c == '\\') {
1438 c = tok_nextc(tok);
1439 if (c != '\n') {
Martin v. Löwis4bf108d2005-03-03 11:45:45 +00001440 tok->done = E_LINECONT;
Guido van Rossum6ac258d1993-05-12 08:24:20 +00001441 tok->cur = tok->inp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001442 return ERRORTOKEN;
1443 }
Martin v. Löwisf62a89b2002-09-03 11:52:44 +00001444 tok->cont_line = 1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001445 goto again; /* Read next line */
1446 }
1447
Guido van Rossumfbab9051991-10-20 20:25:03 +00001448 /* Check for two-character token */
1449 {
1450 int c2 = tok_nextc(tok);
Guido van Rossum86bea461997-04-29 21:03:06 +00001451 int token = PyToken_TwoChars(c, c2);
Guido van Rossumfbab9051991-10-20 20:25:03 +00001452 if (token != OP) {
Thomas Wouters434d0822000-08-24 20:11:32 +00001453 int c3 = tok_nextc(tok);
1454 int token3 = PyToken_ThreeChars(c, c2, c3);
1455 if (token3 != OP) {
1456 token = token3;
1457 } else {
1458 tok_backup(tok, c3);
1459 }
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001460 *p_start = tok->start;
Guido van Rossumfbab9051991-10-20 20:25:03 +00001461 *p_end = tok->cur;
1462 return token;
1463 }
1464 tok_backup(tok, c2);
1465 }
1466
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001467 /* Keep track of parentheses nesting level */
Guido van Rossuma849b831993-05-12 11:35:44 +00001468 switch (c) {
1469 case '(':
1470 case '[':
1471 case '{':
1472 tok->level++;
1473 break;
1474 case ')':
1475 case ']':
1476 case '}':
1477 tok->level--;
1478 break;
1479 }
1480
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001481 /* Punctuation character */
Guido van Rossumf4b1a641994-08-29 12:43:07 +00001482 *p_start = tok->start;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001483 *p_end = tok->cur;
Guido van Rossum86bea461997-04-29 21:03:06 +00001484 return PyToken_OneChar(c);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001485}
1486
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +00001487int
1488PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1489{
1490 int result = tok_get(tok, p_start, p_end);
1491 if (tok->decoding_erred) {
1492 result = ERRORTOKEN;
1493 tok->done = E_DECODE;
1494 }
1495 return result;
1496}
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001497
Guido van Rossum408027e1996-12-30 16:17:54 +00001498#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001499
1500void
Thomas Wouters23c9e002000-07-22 19:20:54 +00001501tok_dump(int type, char *start, char *end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001502{
Guido van Rossum86bea461997-04-29 21:03:06 +00001503 printf("%s", _PyParser_TokenNames[type]);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001504 if (type == NAME || type == NUMBER || type == STRING || type == OP)
1505 printf("(%.*s)", (int)(end - start), start);
1506}
1507
1508#endif