blob: a29ba472aa3224ce467ae8da9c9d28d145131be3 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Tokenizer implementation */
3
Jack Jansen7b8c7542002-04-14 20:12:41 +00004#include "Python.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00005#include "pgenheaders.h"
6
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00007#include <ctype.h>
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +00008#include <assert.h>
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00009
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "tokenizer.h"
11#include "errcode.h"
12
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +000013#ifndef PGEN
14#include "unicodeobject.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +000015#include "bytesobject.h"
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +000016#include "fileobject.h"
17#include "codecs.h"
18#include "abstract.h"
19#endif /* PGEN */
20
Martin v. Löwis5b222132007-06-10 09:51:05 +000021#define is_potential_identifier_start(c) (\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000022 (c >= 'a' && c <= 'z')\
23 || (c >= 'A' && c <= 'Z')\
24 || c == '_'\
25 || (c >= 128))
Martin v. Löwis5b222132007-06-10 09:51:05 +000026
27#define is_potential_identifier_char(c) (\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000028 (c >= 'a' && c <= 'z')\
29 || (c >= 'A' && c <= 'Z')\
30 || (c >= '0' && c <= '9')\
31 || c == '_'\
32 || (c >= 128))
Martin v. Löwis5b222132007-06-10 09:51:05 +000033
Serhiy Storchakac6792272013-10-19 21:03:34 +030034extern char *PyOS_Readline(FILE *, FILE *, const char *);
Guido van Rossumf4b1a641994-08-29 12:43:07 +000035/* Return malloc'ed string including trailing \n;
36 empty malloc'ed string for EOF;
37 NULL if interrupted */
38
Guido van Rossum4fe87291992-02-26 15:24:44 +000039/* Don't ever change this -- it would break the portability of Python code */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000040#define TABSIZE 8
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000041
Guido van Rossum3f5da241990-12-20 15:06:42 +000042/* Forward */
Tim Petersdbd9ba62000-07-09 03:09:57 +000043static struct tok_state *tok_new(void);
44static int tok_nextc(struct tok_state *tok);
45static void tok_backup(struct tok_state *tok, int c);
Guido van Rossum3f5da241990-12-20 15:06:42 +000046
Brett Cannond5ec98c2007-10-20 02:54:14 +000047
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048/* Token names */
49
Benjamin Petersond0845582012-10-24 08:21:52 -070050const char *_PyParser_TokenNames[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 "ENDMARKER",
52 "NAME",
53 "NUMBER",
54 "STRING",
55 "NEWLINE",
56 "INDENT",
57 "DEDENT",
58 "LPAR",
59 "RPAR",
60 "LSQB",
61 "RSQB",
62 "COLON",
63 "COMMA",
64 "SEMI",
65 "PLUS",
66 "MINUS",
67 "STAR",
68 "SLASH",
69 "VBAR",
70 "AMPER",
71 "LESS",
72 "GREATER",
73 "EQUAL",
74 "DOT",
75 "PERCENT",
76 "LBRACE",
77 "RBRACE",
78 "EQEQUAL",
79 "NOTEQUAL",
80 "LESSEQUAL",
81 "GREATEREQUAL",
82 "TILDE",
83 "CIRCUMFLEX",
84 "LEFTSHIFT",
85 "RIGHTSHIFT",
86 "DOUBLESTAR",
87 "PLUSEQUAL",
88 "MINEQUAL",
89 "STAREQUAL",
90 "SLASHEQUAL",
91 "PERCENTEQUAL",
92 "AMPEREQUAL",
93 "VBAREQUAL",
94 "CIRCUMFLEXEQUAL",
95 "LEFTSHIFTEQUAL",
96 "RIGHTSHIFTEQUAL",
97 "DOUBLESTAREQUAL",
98 "DOUBLESLASH",
99 "DOUBLESLASHEQUAL",
100 "AT",
Benjamin Petersond51374e2014-04-09 23:55:56 -0400101 "ATEQUAL",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 "RARROW",
103 "ELLIPSIS",
104 /* This table must match the #defines in token.h! */
105 "OP",
Yury Selivanov75445082015-05-11 22:57:16 -0400106 "AWAIT",
107 "ASYNC",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 "<ERRORTOKEN>",
109 "<N_TOKENS>"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000110};
111
112
113/* Create and initialize a new tok_state structure */
114
115static struct tok_state *
Thomas Wouters23c9e002000-07-22 19:20:54 +0000116tok_new(void)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
119 sizeof(struct tok_state));
120 if (tok == NULL)
121 return NULL;
122 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
123 tok->done = E_OK;
124 tok->fp = NULL;
125 tok->input = NULL;
126 tok->tabsize = TABSIZE;
127 tok->indent = 0;
128 tok->indstack[0] = 0;
Yury Selivanov75445082015-05-11 22:57:16 -0400129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000130 tok->atbol = 1;
131 tok->pendin = 0;
132 tok->prompt = tok->nextprompt = NULL;
133 tok->lineno = 0;
134 tok->level = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 tok->altwarning = 1;
136 tok->alterror = 1;
137 tok->alttabsize = 1;
138 tok->altindstack[0] = 0;
139 tok->decoding_state = STATE_INIT;
140 tok->decoding_erred = 0;
141 tok->read_coding_spec = 0;
142 tok->enc = NULL;
143 tok->encoding = NULL;
144 tok->cont_line = 0;
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000145#ifndef PGEN
Victor Stinner7f2fee32011-04-05 00:39:01 +0200146 tok->filename = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 tok->decoding_readline = NULL;
148 tok->decoding_buffer = NULL;
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000149#endif
Yury Selivanov96ec9342015-07-23 15:01:58 +0300150
151 tok->async_def = 0;
152 tok->async_def_indent = 0;
153 tok->async_def_nl = 0;
154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 return tok;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000158static char *
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700159new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 char* result = (char *)PyMem_MALLOC(len + 1);
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700162 if (!result) {
163 tok->done = E_NOMEM;
164 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700166 memcpy(result, s, len);
167 result[len] = '\0';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 return result;
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000169}
170
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000171#ifdef PGEN
172
173static char *
174decoding_fgets(char *s, int size, struct tok_state *tok)
175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 return fgets(s, size, tok->fp);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000177}
178
179static int
180decoding_feof(struct tok_state *tok)
181{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 return feof(tok->fp);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000183}
184
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000185static char *
186decode_str(const char *str, int exec_input, struct tok_state *tok)
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000187{
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700188 return new_string(str, strlen(str), tok);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000189}
190
191#else /* PGEN */
192
193static char *
194error_ret(struct tok_state *tok) /* XXX */
195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 tok->decoding_erred = 1;
197 if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
198 PyMem_FREE(tok->buf);
Serhiy Storchaka0d441112015-11-14 15:10:35 +0200199 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
200 tok->done = E_DECODE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL; /* as if it were EOF */
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000202}
203
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000204
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200205static const char *
206get_normal_name(const char *s) /* for utf-8 and latin-1 */
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 char buf[13];
209 int i;
210 for (i = 0; i < 12; i++) {
211 int c = s[i];
212 if (c == '\0')
213 break;
214 else if (c == '_')
215 buf[i] = '-';
216 else
217 buf[i] = tolower(c);
218 }
219 buf[i] = '\0';
220 if (strcmp(buf, "utf-8") == 0 ||
221 strncmp(buf, "utf-8-", 6) == 0)
222 return "utf-8";
223 else if (strcmp(buf, "latin-1") == 0 ||
224 strcmp(buf, "iso-8859-1") == 0 ||
225 strcmp(buf, "iso-latin-1") == 0 ||
226 strncmp(buf, "latin-1-", 8) == 0 ||
227 strncmp(buf, "iso-8859-1-", 11) == 0 ||
228 strncmp(buf, "iso-latin-1-", 12) == 0)
229 return "iso-8859-1";
230 else
231 return s;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000232}
233
234/* Return the coding spec in S, or NULL if none is found. */
235
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700236static int
237get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000238{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t i;
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700240 *spec = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 /* Coding spec must be in a comment, and that comment must be
242 * the only statement on the source code line. */
243 for (i = 0; i < size - 6; i++) {
244 if (s[i] == '#')
245 break;
246 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700247 return 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 }
249 for (; i < size - 6; i++) { /* XXX inefficient search */
250 const char* t = s + i;
251 if (strncmp(t, "coding", 6) == 0) {
252 const char* begin = NULL;
253 t += 6;
254 if (t[0] != ':' && t[0] != '=')
255 continue;
256 do {
257 t++;
258 } while (t[0] == '\x20' || t[0] == '\t');
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000259
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 begin = t;
261 while (Py_ISALNUM(t[0]) ||
262 t[0] == '-' || t[0] == '_' || t[0] == '.')
263 t++;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 if (begin < t) {
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700266 char* r = new_string(begin, t - begin, tok);
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200267 const char* q;
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700268 if (!r)
269 return 0;
Benjamin Peterson265fba42013-07-15 20:50:22 -0700270 q = get_normal_name(r);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 if (r != q) {
272 PyMem_FREE(r);
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700273 r = new_string(q, strlen(q), tok);
274 if (!r)
275 return 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700277 *spec = r;
Serhiy Storchakae431d3c2016-03-20 23:36:29 +0200278 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 }
280 }
281 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700282 return 1;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000283}
284
285/* Check whether the line contains a coding spec. If it does,
286 invoke the set_readline function for the new encoding.
287 This function receives the tok_state and the new encoding.
288 Return 1 on success, 0 on failure. */
289
290static int
Martin v. Löwis18e16552006-02-15 17:27:45 +0000291check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 int set_readline(struct tok_state *, const char *))
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000293{
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700294 char *cs;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 int r = 1;
Tim Peters17db21f2002-09-03 15:39:58 +0000296
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200297 if (tok->cont_line) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 /* It's a continuation line, so it can't be a coding spec. */
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200299 tok->read_coding_spec = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 return 1;
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200301 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700302 if (!get_coding_spec(line, &cs, size, tok))
303 return 0;
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200304 if (!cs) {
305 Py_ssize_t i;
306 for (i = 0; i < size; i++) {
307 if (line[i] == '#' || line[i] == '\n' || line[i] == '\r')
308 break;
309 if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') {
310 /* Stop checking coding spec after a line containing
311 * anything except a comment. */
312 tok->read_coding_spec = 1;
313 break;
314 }
315 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700316 return 1;
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200317 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700318 tok->read_coding_spec = 1;
319 if (tok->encoding == NULL) {
320 assert(tok->decoding_state == STATE_RAW);
321 if (strcmp(cs, "utf-8") == 0) {
322 tok->encoding = cs;
323 } else {
324 r = set_readline(tok, cs);
325 if (r) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 tok->encoding = cs;
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700327 tok->decoding_state = STATE_NORMAL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700329 else {
Serhiy Storchaka3af14aa2013-06-09 16:51:52 +0300330 PyErr_Format(PyExc_SyntaxError,
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700331 "encoding problem: %s", cs);
332 PyMem_FREE(cs);
333 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 }
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700335 } else { /* then, compare cs with BOM */
336 r = (strcmp(tok->encoding, cs) == 0);
337 if (!r)
338 PyErr_Format(PyExc_SyntaxError,
339 "encoding problem: %s with BOM", cs);
340 PyMem_FREE(cs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 return r;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000343}
344
345/* See whether the file starts with a BOM. If it does,
346 invoke the set_readline function with the new encoding.
347 Return 1 on success, 0 on failure. */
348
349static int
350check_bom(int get_char(struct tok_state *),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 void unget_char(int, struct tok_state *),
352 int set_readline(struct tok_state *, const char *),
353 struct tok_state *tok)
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 int ch1, ch2, ch3;
356 ch1 = get_char(tok);
357 tok->decoding_state = STATE_RAW;
358 if (ch1 == EOF) {
359 return 1;
360 } else if (ch1 == 0xEF) {
361 ch2 = get_char(tok);
362 if (ch2 != 0xBB) {
363 unget_char(ch2, tok);
364 unget_char(ch1, tok);
365 return 1;
366 }
367 ch3 = get_char(tok);
368 if (ch3 != 0xBF) {
369 unget_char(ch3, tok);
370 unget_char(ch2, tok);
371 unget_char(ch1, tok);
372 return 1;
373 }
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000374#if 0
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 /* Disable support for UTF-16 BOMs until a decision
376 is made whether this needs to be supported. */
377 } else if (ch1 == 0xFE) {
378 ch2 = get_char(tok);
379 if (ch2 != 0xFF) {
380 unget_char(ch2, tok);
381 unget_char(ch1, tok);
382 return 1;
383 }
384 if (!set_readline(tok, "utf-16-be"))
385 return 0;
386 tok->decoding_state = STATE_NORMAL;
387 } else if (ch1 == 0xFF) {
388 ch2 = get_char(tok);
389 if (ch2 != 0xFE) {
390 unget_char(ch2, tok);
391 unget_char(ch1, tok);
392 return 1;
393 }
394 if (!set_readline(tok, "utf-16-le"))
395 return 0;
396 tok->decoding_state = STATE_NORMAL;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000397#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 } else {
399 unget_char(ch1, tok);
400 return 1;
401 }
402 if (tok->encoding != NULL)
403 PyMem_FREE(tok->encoding);
Benjamin Peterson2dbfd882013-07-15 19:15:34 -0700404 tok->encoding = new_string("utf-8", 5, tok);
405 if (!tok->encoding)
406 return 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000407 /* No need to set_readline: input is already utf-8 */
408 return 1;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000409}
410
411/* Read a line of text from TOK into S, using the stream in TOK.
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000412 Return NULL on failure, else S.
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000413
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000414 On entry, tok->decoding_buffer will be one of:
415 1) NULL: need to call tok->decoding_readline to get a new line
416 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 stored the result in tok->decoding_buffer
Christian Heimes9c4756e2008-05-26 13:22:05 +0000418 3) PyByteArrayObject *: previous call to fp_readl did not have enough room
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 (in the s buffer) to copy entire contents of the line read
420 by tok->decoding_readline. tok->decoding_buffer has the overflow.
421 In this case, fp_readl is called in a loop (with an expanded buffer)
422 until the buffer ends with a '\n' (or until the end of the file is
423 reached): see tok_nextc and its calls to decoding_fgets.
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000424*/
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000425
426static char *
427fp_readl(char *s, int size, struct tok_state *tok)
428{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 PyObject* bufobj;
430 const char *buf;
431 Py_ssize_t buflen;
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000432
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 /* Ask for one less byte so we can terminate it */
434 assert(size > 0);
435 size--;
Walter Dörwaldc1f5fff2005-07-12 21:53:43 +0000436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000437 if (tok->decoding_buffer) {
438 bufobj = tok->decoding_buffer;
439 Py_INCREF(bufobj);
440 }
441 else
442 {
443 bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
444 if (bufobj == NULL)
445 goto error;
446 }
447 if (PyUnicode_CheckExact(bufobj))
448 {
449 buf = _PyUnicode_AsStringAndSize(bufobj, &buflen);
450 if (buf == NULL) {
451 goto error;
452 }
453 }
454 else
455 {
456 buf = PyByteArray_AsString(bufobj);
457 if (buf == NULL) {
458 goto error;
459 }
460 buflen = PyByteArray_GET_SIZE(bufobj);
461 }
Amaury Forgeot d'Arc65f9ace2007-11-15 23:19:43 +0000462
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 Py_XDECREF(tok->decoding_buffer);
464 if (buflen > size) {
465 /* Too many chars, the rest goes into tok->decoding_buffer */
466 tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size,
467 buflen-size);
468 if (tok->decoding_buffer == NULL)
469 goto error;
470 buflen = size;
471 }
472 else
473 tok->decoding_buffer = NULL;
Amaury Forgeot d'Arc65f9ace2007-11-15 23:19:43 +0000474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 memcpy(s, buf, buflen);
476 s[buflen] = '\0';
477 if (buflen == 0) /* EOF */
478 s = NULL;
479 Py_DECREF(bufobj);
480 return s;
Neal Norwitz41eaedd2007-08-12 00:03:22 +0000481
482error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 Py_XDECREF(bufobj);
484 return error_ret(tok);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000485}
486
487/* Set the readline function for TOK to a StreamReader's
488 readline function. The StreamReader is named ENC.
489
490 This function is called from check_bom and check_coding_spec.
491
492 ENC is usually identical to the future value of tok->encoding,
493 except for the (currently unsupported) case of UTF-16.
494
495 Return 1 on success, 0 on failure. */
496
497static int
498fp_setreadl(struct tok_state *tok, const char* enc)
499{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 PyObject *readline = NULL, *stream = NULL, *io = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200501 _Py_IDENTIFIER(open);
502 _Py_IDENTIFIER(readline);
Victor Stinner22a351a2010-10-14 12:04:34 +0000503 int fd;
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200504 long pos;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 io = PyImport_ImportModuleNoBlock("io");
507 if (io == NULL)
508 goto cleanup;
Guido van Rossum9cbfffd2007-06-07 00:54:15 +0000509
Victor Stinner22a351a2010-10-14 12:04:34 +0000510 fd = fileno(tok->fp);
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200511 /* Due to buffering the file offset for fd can be different from the file
Martin v. Löwis815b41b2014-02-28 15:27:29 +0100512 * position of tok->fp. If tok->fp was opened in text mode on Windows,
513 * its file position counts CRLF as one char and can't be directly mapped
514 * to the file offset for fd. Instead we step back one byte and read to
515 * the end of line.*/
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200516 pos = ftell(tok->fp);
Martin v. Löwis815b41b2014-02-28 15:27:29 +0100517 if (pos == -1 ||
518 lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) {
Victor Stinner22a351a2010-10-14 12:04:34 +0000519 PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
520 goto cleanup;
521 }
522
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200523 stream = _PyObject_CallMethodId(io, &PyId_open, "isisOOO",
Victor Stinner22a351a2010-10-14 12:04:34 +0000524 fd, "r", -1, enc, Py_None, Py_None, Py_False);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 if (stream == NULL)
526 goto cleanup;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000527
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200528 readline = _PyObject_GetAttrId(stream, &PyId_readline);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300529 Py_XSETREF(tok->decoding_readline, readline);
Martin v. Löwis815b41b2014-02-28 15:27:29 +0100530 if (pos > 0) {
531 if (PyObject_CallObject(readline, NULL) == NULL) {
532 readline = NULL;
533 goto cleanup;
534 }
535 }
Guido van Rossum9cbfffd2007-06-07 00:54:15 +0000536
537 cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 Py_XDECREF(stream);
539 Py_XDECREF(io);
540 return readline != NULL;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000541}
542
543/* Fetch the next byte from TOK. */
544
545static int fp_getc(struct tok_state *tok) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 return getc(tok->fp);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000547}
548
549/* Unfetch the last byte back into TOK. */
550
551static void fp_ungetc(int c, struct tok_state *tok) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 ungetc(c, tok->fp);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000553}
554
Martin v. Löwis447d33e2007-07-29 18:10:01 +0000555/* Check whether the characters at s start a valid
556 UTF-8 sequence. Return the number of characters forming
557 the sequence if yes, 0 if not. */
558static int valid_utf8(const unsigned char* s)
559{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 int expected = 0;
561 int length;
562 if (*s < 0x80)
563 /* single-byte code */
564 return 1;
565 if (*s < 0xc0)
566 /* following byte */
567 return 0;
568 if (*s < 0xE0)
569 expected = 1;
570 else if (*s < 0xF0)
571 expected = 2;
572 else if (*s < 0xF8)
573 expected = 3;
574 else
575 return 0;
576 length = expected + 1;
577 for (; expected; expected--)
578 if (s[expected] < 0x80 || s[expected] >= 0xC0)
579 return 0;
580 return length;
Martin v. Löwis447d33e2007-07-29 18:10:01 +0000581}
582
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000583/* Read a line of input from TOK. Determine encoding
584 if necessary. */
585
586static char *
587decoding_fgets(char *s, int size, struct tok_state *tok)
588{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 char *line = NULL;
590 int badchar = 0;
591 for (;;) {
592 if (tok->decoding_state == STATE_NORMAL) {
593 /* We already have a codec associated with
594 this input. */
595 line = fp_readl(s, size, tok);
596 break;
597 } else if (tok->decoding_state == STATE_RAW) {
598 /* We want a 'raw' read. */
599 line = Py_UniversalNewlineFgets(s, size,
600 tok->fp, NULL);
601 break;
602 } else {
603 /* We have not yet determined the encoding.
604 If an encoding is found, use the file-pointer
605 reader functions from now on. */
606 if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
607 return error_ret(tok);
608 assert(tok->decoding_state != STATE_INIT);
609 }
610 }
611 if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
612 if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
613 return error_ret(tok);
614 }
615 }
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000616#ifndef PGEN
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 /* The default encoding is UTF-8, so make sure we don't have any
618 non-UTF-8 sequences in it. */
619 if (line && !tok->encoding) {
620 unsigned char *c;
621 int length;
622 for (c = (unsigned char *)line; *c; c += length)
623 if (!(length = valid_utf8(c))) {
624 badchar = *c;
625 break;
626 }
627 }
628 if (badchar) {
629 /* Need to add 1 to the line number, since this line
630 has not been counted, yet. */
Jesus Ceac1935d22011-04-25 04:03:58 +0200631 PyErr_Format(PyExc_SyntaxError,
632 "Non-UTF-8 code starting with '\\x%.2x' "
633 "in file %U on line %i, "
634 "but no encoding declared; "
635 "see http://python.org/dev/peps/pep-0263/ for details",
636 badchar, tok->filename, tok->lineno + 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 return error_ret(tok);
638 }
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000639#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000640 return line;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000641}
642
643static int
644decoding_feof(struct tok_state *tok)
645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 if (tok->decoding_state != STATE_NORMAL) {
647 return feof(tok->fp);
648 } else {
649 PyObject* buf = tok->decoding_buffer;
650 if (buf == NULL) {
651 buf = PyObject_CallObject(tok->decoding_readline, NULL);
652 if (buf == NULL) {
653 error_ret(tok);
654 return 1;
655 } else {
656 tok->decoding_buffer = buf;
657 }
658 }
659 return PyObject_Length(buf) == 0;
660 }
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000661}
662
663/* Fetch a byte from TOK, using the string buffer. */
664
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000665static int
666buf_getc(struct tok_state *tok) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 return Py_CHARMASK(*tok->str++);
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000668}
669
670/* Unfetch a byte from TOK, using the string buffer. */
671
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000672static void
673buf_ungetc(int c, struct tok_state *tok) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 tok->str--;
675 assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000676}
677
678/* Set the readline function for TOK to ENC. For the string-based
679 tokenizer, this means to just record the encoding. */
680
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000681static int
682buf_setreadl(struct tok_state *tok, const char* enc) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 tok->enc = enc;
684 return 1;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000685}
686
687/* Return a UTF-8 encoding Python string object from the
688 C byte string STR, which is encoded with ENC. */
689
690static PyObject *
691translate_into_utf8(const char* str, const char* enc) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000692 PyObject *utf8;
693 PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
694 if (buf == NULL)
695 return NULL;
696 utf8 = PyUnicode_AsUTF8String(buf);
697 Py_DECREF(buf);
698 return utf8;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000699}
700
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000701
702static char *
703translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
Victor Stinner79697732013-06-05 00:44:00 +0200704 int skip_next_lf = 0;
705 size_t needed_length = strlen(s) + 2, final_length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 char *buf, *current;
707 char c = '\0';
708 buf = PyMem_MALLOC(needed_length);
709 if (buf == NULL) {
710 tok->done = E_NOMEM;
711 return NULL;
712 }
713 for (current = buf; *s; s++, current++) {
714 c = *s;
715 if (skip_next_lf) {
716 skip_next_lf = 0;
717 if (c == '\n') {
718 c = *++s;
719 if (!c)
720 break;
721 }
722 }
723 if (c == '\r') {
724 skip_next_lf = 1;
725 c = '\n';
726 }
727 *current = c;
728 }
729 /* If this is exec input, add a newline to the end of the string if
730 there isn't one already. */
731 if (exec_input && c != '\n') {
732 *current = '\n';
733 current++;
734 }
735 *current = '\0';
736 final_length = current - buf + 1;
737 if (final_length < needed_length && final_length)
738 /* should never fail */
739 buf = PyMem_REALLOC(buf, final_length);
740 return buf;
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000741}
742
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000743/* Decode a byte string STR for use as the buffer of TOK.
744 Look for encoding declarations inside STR, and record them
745 inside TOK. */
746
747static const char *
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000748decode_str(const char *input, int single, struct tok_state *tok)
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000749{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 PyObject* utf8 = NULL;
751 const char *str;
752 const char *s;
753 const char *newl[2] = {NULL, NULL};
754 int lineno = 0;
755 tok->input = str = translate_newlines(input, single, tok);
756 if (str == NULL)
757 return NULL;
758 tok->enc = NULL;
759 tok->str = str;
760 if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
761 return error_ret(tok);
762 str = tok->str; /* string after BOM if any */
763 assert(str);
764 if (tok->enc != NULL) {
765 utf8 = translate_into_utf8(str, tok->enc);
766 if (utf8 == NULL)
767 return error_ret(tok);
768 str = PyBytes_AsString(utf8);
769 }
770 for (s = str;; s++) {
771 if (*s == '\0') break;
772 else if (*s == '\n') {
773 assert(lineno < 2);
774 newl[lineno] = s;
775 lineno++;
776 if (lineno == 2) break;
777 }
778 }
779 tok->enc = NULL;
780 /* need to check line 1 and 2 separately since check_coding_spec
781 assumes a single line as input */
782 if (newl[0]) {
783 if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
784 return error_ret(tok);
Serhiy Storchaka768c16c2014-01-09 18:36:09 +0200785 if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
787 tok, buf_setreadl))
788 return error_ret(tok);
789 }
790 }
791 if (tok->enc != NULL) {
792 assert(utf8 == NULL);
793 utf8 = translate_into_utf8(str, tok->enc);
794 if (utf8 == NULL)
795 return error_ret(tok);
796 str = PyBytes_AS_STRING(utf8);
797 }
798 assert(tok->decoding_buffer == NULL);
799 tok->decoding_buffer = utf8; /* CAUTION */
800 return str;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +0000801}
802
803#endif /* PGEN */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804
805/* Set up tokenizer for string */
806
807struct tok_state *
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000808PyTokenizer_FromString(const char *str, int exec_input)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 struct tok_state *tok = tok_new();
811 if (tok == NULL)
812 return NULL;
Serhiy Storchakac6792272013-10-19 21:03:34 +0300813 str = decode_str(str, exec_input, tok);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000814 if (str == NULL) {
815 PyTokenizer_Free(tok);
816 return NULL;
817 }
Neal Norwitzdee2fd52005-11-16 05:12:59 +0000818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 /* XXX: constify members. */
820 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
821 return tok;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822}
823
Benjamin Petersonf5b52242009-03-02 23:31:26 +0000824struct tok_state *
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000825PyTokenizer_FromUTF8(const char *str, int exec_input)
Benjamin Petersonf5b52242009-03-02 23:31:26 +0000826{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 struct tok_state *tok = tok_new();
828 if (tok == NULL)
829 return NULL;
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000830#ifndef PGEN
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 tok->input = str = translate_newlines(str, exec_input, tok);
Benjamin Petersonaeaa5922009-11-13 00:17:59 +0000832#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 if (str == NULL) {
834 PyTokenizer_Free(tok);
835 return NULL;
836 }
837 tok->decoding_state = STATE_RAW;
838 tok->read_coding_spec = 1;
839 tok->enc = NULL;
840 tok->str = str;
841 tok->encoding = (char *)PyMem_MALLOC(6);
842 if (!tok->encoding) {
843 PyTokenizer_Free(tok);
844 return NULL;
845 }
846 strcpy(tok->encoding, "utf-8");
Benjamin Petersonf5b52242009-03-02 23:31:26 +0000847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000848 /* XXX: constify members. */
849 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
850 return tok;
Benjamin Petersonf5b52242009-03-02 23:31:26 +0000851}
852
Guido van Rossum8c11a5c1991-07-27 21:42:56 +0000853/* Set up tokenizer for file */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854
855struct tok_state *
Serhiy Storchakac6792272013-10-19 21:03:34 +0300856PyTokenizer_FromFile(FILE *fp, const char* enc,
857 const char *ps1, const char *ps2)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 struct tok_state *tok = tok_new();
860 if (tok == NULL)
861 return NULL;
862 if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
863 PyTokenizer_Free(tok);
864 return NULL;
865 }
866 tok->cur = tok->inp = tok->buf;
867 tok->end = tok->buf + BUFSIZ;
868 tok->fp = fp;
869 tok->prompt = ps1;
870 tok->nextprompt = ps2;
871 if (enc != NULL) {
872 /* Must copy encoding declaration since it
873 gets copied into the parse tree. */
874 tok->encoding = PyMem_MALLOC(strlen(enc)+1);
875 if (!tok->encoding) {
876 PyTokenizer_Free(tok);
877 return NULL;
878 }
879 strcpy(tok->encoding, enc);
880 tok->decoding_state = STATE_NORMAL;
881 }
882 return tok;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000883}
884
885
886/* Free a tok_state structure */
887
888void
Thomas Wouters23c9e002000-07-22 19:20:54 +0000889PyTokenizer_Free(struct tok_state *tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 if (tok->encoding != NULL)
892 PyMem_FREE(tok->encoding);
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000893#ifndef PGEN
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000894 Py_XDECREF(tok->decoding_readline);
895 Py_XDECREF(tok->decoding_buffer);
Victor Stinner7f2fee32011-04-05 00:39:01 +0200896 Py_XDECREF(tok->filename);
Martin v. Löwis1ee99d32002-08-04 20:10:29 +0000897#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 if (tok->fp != NULL && tok->buf != NULL)
899 PyMem_FREE(tok->buf);
900 if (tok->input)
901 PyMem_FREE((char *)tok->input);
902 PyMem_FREE(tok);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903}
904
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000905/* Get next char, updating state; error code goes into tok->done */
906
907static int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200908tok_nextc(struct tok_state *tok)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 for (;;) {
911 if (tok->cur != tok->inp) {
912 return Py_CHARMASK(*tok->cur++); /* Fast path */
913 }
914 if (tok->done != E_OK)
915 return EOF;
916 if (tok->fp == NULL) {
917 char *end = strchr(tok->inp, '\n');
918 if (end != NULL)
919 end++;
920 else {
921 end = strchr(tok->inp, '\0');
922 if (end == tok->inp) {
923 tok->done = E_EOF;
924 return EOF;
925 }
926 }
927 if (tok->start == NULL)
928 tok->buf = tok->cur;
929 tok->line_start = tok->cur;
930 tok->lineno++;
931 tok->inp = end;
932 return Py_CHARMASK(*tok->cur++);
933 }
934 if (tok->prompt != NULL) {
935 char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
Victor Stinner034c7532011-01-07 18:56:19 +0000936#ifndef PGEN
Victor Stinner89e34362011-01-07 18:47:22 +0000937 if (newtok != NULL) {
938 char *translated = translate_newlines(newtok, 0, tok);
939 PyMem_FREE(newtok);
940 if (translated == NULL)
941 return EOF;
942 newtok = translated;
943 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 if (tok->encoding && newtok && *newtok) {
945 /* Recode to UTF-8 */
946 Py_ssize_t buflen;
947 const char* buf;
948 PyObject *u = translate_into_utf8(newtok, tok->encoding);
949 PyMem_FREE(newtok);
950 if (!u) {
951 tok->done = E_DECODE;
952 return EOF;
953 }
954 buflen = PyBytes_GET_SIZE(u);
955 buf = PyBytes_AS_STRING(u);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 newtok = PyMem_MALLOC(buflen+1);
957 strcpy(newtok, buf);
958 Py_DECREF(u);
959 }
Martin v. Löwis85bcc662007-09-04 09:18:06 +0000960#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 if (tok->nextprompt != NULL)
962 tok->prompt = tok->nextprompt;
963 if (newtok == NULL)
964 tok->done = E_INTR;
965 else if (*newtok == '\0') {
966 PyMem_FREE(newtok);
967 tok->done = E_EOF;
968 }
969 else if (tok->start != NULL) {
970 size_t start = tok->start - tok->buf;
971 size_t oldlen = tok->cur - tok->buf;
972 size_t newlen = oldlen + strlen(newtok);
973 char *buf = tok->buf;
974 buf = (char *)PyMem_REALLOC(buf, newlen+1);
975 tok->lineno++;
976 if (buf == NULL) {
977 PyMem_FREE(tok->buf);
978 tok->buf = NULL;
979 PyMem_FREE(newtok);
980 tok->done = E_NOMEM;
981 return EOF;
982 }
983 tok->buf = buf;
984 tok->cur = tok->buf + oldlen;
985 tok->line_start = tok->cur;
986 strcpy(tok->buf + oldlen, newtok);
987 PyMem_FREE(newtok);
988 tok->inp = tok->buf + newlen;
989 tok->end = tok->inp + 1;
990 tok->start = tok->buf + start;
991 }
992 else {
993 tok->lineno++;
994 if (tok->buf != NULL)
995 PyMem_FREE(tok->buf);
996 tok->buf = newtok;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 tok->cur = tok->buf;
998 tok->line_start = tok->buf;
999 tok->inp = strchr(tok->buf, '\0');
1000 tok->end = tok->inp + 1;
1001 }
1002 }
1003 else {
1004 int done = 0;
1005 Py_ssize_t cur = 0;
1006 char *pt;
1007 if (tok->start == NULL) {
1008 if (tok->buf == NULL) {
1009 tok->buf = (char *)
1010 PyMem_MALLOC(BUFSIZ);
1011 if (tok->buf == NULL) {
1012 tok->done = E_NOMEM;
1013 return EOF;
1014 }
1015 tok->end = tok->buf + BUFSIZ;
1016 }
1017 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
1018 tok) == NULL) {
Serhiy Storchaka0d441112015-11-14 15:10:35 +02001019 if (!tok->decoding_erred)
1020 tok->done = E_EOF;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 done = 1;
1022 }
1023 else {
1024 tok->done = E_OK;
1025 tok->inp = strchr(tok->buf, '\0');
1026 done = tok->inp[-1] == '\n';
1027 }
1028 }
1029 else {
1030 cur = tok->cur - tok->buf;
1031 if (decoding_feof(tok)) {
1032 tok->done = E_EOF;
1033 done = 1;
1034 }
1035 else
1036 tok->done = E_OK;
1037 }
1038 tok->lineno++;
1039 /* Read until '\n' or EOF */
1040 while (!done) {
1041 Py_ssize_t curstart = tok->start == NULL ? -1 :
1042 tok->start - tok->buf;
1043 Py_ssize_t curvalid = tok->inp - tok->buf;
1044 Py_ssize_t newsize = curvalid + BUFSIZ;
1045 char *newbuf = tok->buf;
1046 newbuf = (char *)PyMem_REALLOC(newbuf,
1047 newsize);
1048 if (newbuf == NULL) {
1049 tok->done = E_NOMEM;
1050 tok->cur = tok->inp;
1051 return EOF;
1052 }
1053 tok->buf = newbuf;
Serhiy Storchaka0d441112015-11-14 15:10:35 +02001054 tok->cur = tok->buf + cur;
1055 tok->line_start = tok->cur;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 tok->inp = tok->buf + curvalid;
1057 tok->end = tok->buf + newsize;
1058 tok->start = curstart < 0 ? NULL :
1059 tok->buf + curstart;
1060 if (decoding_fgets(tok->inp,
1061 (int)(tok->end - tok->inp),
1062 tok) == NULL) {
1063 /* Break out early on decoding
1064 errors, as tok->buf will be NULL
1065 */
1066 if (tok->decoding_erred)
1067 return EOF;
1068 /* Last line does not end in \n,
1069 fake one */
1070 strcpy(tok->inp, "\n");
1071 }
1072 tok->inp = strchr(tok->inp, '\0');
1073 done = tok->inp[-1] == '\n';
1074 }
1075 if (tok->buf != NULL) {
1076 tok->cur = tok->buf + cur;
1077 tok->line_start = tok->cur;
1078 /* replace "\r\n" with "\n" */
1079 /* For Mac leave the \r, giving a syntax error */
1080 pt = tok->inp - 2;
1081 if (pt >= tok->buf && *pt == '\r') {
1082 *pt++ = '\n';
1083 *pt = '\0';
1084 tok->inp = pt;
1085 }
1086 }
1087 }
1088 if (tok->done != E_OK) {
1089 if (tok->prompt != NULL)
1090 PySys_WriteStderr("\n");
1091 tok->cur = tok->inp;
1092 return EOF;
1093 }
1094 }
1095 /*NOTREACHED*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096}
1097
1098
1099/* Back-up one character */
1100
1101static void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001102tok_backup(struct tok_state *tok, int c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 if (c != EOF) {
1105 if (--tok->cur < tok->buf)
1106 Py_FatalError("tok_backup: beginning of buffer");
1107 if (*tok->cur != c)
1108 *tok->cur = c;
1109 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110}
1111
1112
1113/* Return the token corresponding to a single character */
1114
1115int
Thomas Wouters23c9e002000-07-22 19:20:54 +00001116PyToken_OneChar(int c)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 switch (c) {
1119 case '(': return LPAR;
1120 case ')': return RPAR;
1121 case '[': return LSQB;
1122 case ']': return RSQB;
1123 case ':': return COLON;
1124 case ',': return COMMA;
1125 case ';': return SEMI;
1126 case '+': return PLUS;
1127 case '-': return MINUS;
1128 case '*': return STAR;
1129 case '/': return SLASH;
1130 case '|': return VBAR;
1131 case '&': return AMPER;
1132 case '<': return LESS;
1133 case '>': return GREATER;
1134 case '=': return EQUAL;
1135 case '.': return DOT;
1136 case '%': return PERCENT;
1137 case '{': return LBRACE;
1138 case '}': return RBRACE;
1139 case '^': return CIRCUMFLEX;
1140 case '~': return TILDE;
Benjamin Petersond51374e2014-04-09 23:55:56 -04001141 case '@': return AT;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 default: return OP;
1143 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001144}
1145
1146
Guido van Rossumfbab9051991-10-20 20:25:03 +00001147int
Thomas Wouters23c9e002000-07-22 19:20:54 +00001148PyToken_TwoChars(int c1, int c2)
Guido van Rossumfbab9051991-10-20 20:25:03 +00001149{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 switch (c1) {
1151 case '=':
1152 switch (c2) {
1153 case '=': return EQEQUAL;
1154 }
1155 break;
1156 case '!':
1157 switch (c2) {
1158 case '=': return NOTEQUAL;
1159 }
1160 break;
1161 case '<':
1162 switch (c2) {
1163 case '>': return NOTEQUAL;
1164 case '=': return LESSEQUAL;
1165 case '<': return LEFTSHIFT;
1166 }
1167 break;
1168 case '>':
1169 switch (c2) {
1170 case '=': return GREATEREQUAL;
1171 case '>': return RIGHTSHIFT;
1172 }
1173 break;
1174 case '+':
1175 switch (c2) {
1176 case '=': return PLUSEQUAL;
1177 }
1178 break;
1179 case '-':
1180 switch (c2) {
1181 case '=': return MINEQUAL;
1182 case '>': return RARROW;
1183 }
1184 break;
1185 case '*':
1186 switch (c2) {
1187 case '*': return DOUBLESTAR;
1188 case '=': return STAREQUAL;
1189 }
1190 break;
1191 case '/':
1192 switch (c2) {
1193 case '/': return DOUBLESLASH;
1194 case '=': return SLASHEQUAL;
1195 }
1196 break;
1197 case '|':
1198 switch (c2) {
1199 case '=': return VBAREQUAL;
1200 }
1201 break;
1202 case '%':
1203 switch (c2) {
1204 case '=': return PERCENTEQUAL;
1205 }
1206 break;
1207 case '&':
1208 switch (c2) {
1209 case '=': return AMPEREQUAL;
1210 }
1211 break;
1212 case '^':
1213 switch (c2) {
1214 case '=': return CIRCUMFLEXEQUAL;
1215 }
1216 break;
Benjamin Petersond51374e2014-04-09 23:55:56 -04001217 case '@':
1218 switch (c2) {
1219 case '=': return ATEQUAL;
1220 }
1221 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 }
1223 return OP;
Guido van Rossumfbab9051991-10-20 20:25:03 +00001224}
1225
Thomas Wouters434d0822000-08-24 20:11:32 +00001226int
1227PyToken_ThreeChars(int c1, int c2, int c3)
1228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 switch (c1) {
1230 case '<':
1231 switch (c2) {
1232 case '<':
1233 switch (c3) {
1234 case '=':
1235 return LEFTSHIFTEQUAL;
1236 }
1237 break;
1238 }
1239 break;
1240 case '>':
1241 switch (c2) {
1242 case '>':
1243 switch (c3) {
1244 case '=':
1245 return RIGHTSHIFTEQUAL;
1246 }
1247 break;
1248 }
1249 break;
1250 case '*':
1251 switch (c2) {
1252 case '*':
1253 switch (c3) {
1254 case '=':
1255 return DOUBLESTAREQUAL;
1256 }
1257 break;
1258 }
1259 break;
1260 case '/':
1261 switch (c2) {
1262 case '/':
1263 switch (c3) {
1264 case '=':
1265 return DOUBLESLASHEQUAL;
1266 }
1267 break;
1268 }
1269 break;
1270 case '.':
1271 switch (c2) {
Georg Brandldde00282007-03-18 19:01:53 +00001272 case '.':
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 switch (c3) {
1274 case '.':
1275 return ELLIPSIS;
1276 }
1277 break;
1278 }
1279 break;
1280 }
1281 return OP;
Thomas Wouters434d0822000-08-24 20:11:32 +00001282}
Guido van Rossumfbab9051991-10-20 20:25:03 +00001283
Guido van Rossum926f13a1998-04-09 21:38:06 +00001284static int
Thomas Wouters23c9e002000-07-22 19:20:54 +00001285indenterror(struct tok_state *tok)
Guido van Rossum926f13a1998-04-09 21:38:06 +00001286{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 if (tok->alterror) {
1288 tok->done = E_TABSPACE;
1289 tok->cur = tok->inp;
1290 return 1;
1291 }
1292 if (tok->altwarning) {
Victor Stinner7f2fee32011-04-05 00:39:01 +02001293#ifdef PGEN
1294 PySys_WriteStderr("inconsistent use of tabs and spaces "
1295 "in indentation\n");
1296#else
1297 PySys_FormatStderr("%U: inconsistent use of tabs and spaces "
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 "in indentation\n", tok->filename);
Victor Stinner7f2fee32011-04-05 00:39:01 +02001299#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 tok->altwarning = 0;
1301 }
1302 return 0;
Guido van Rossum926f13a1998-04-09 21:38:06 +00001303}
1304
Martin v. Löwis47383402007-08-15 07:32:56 +00001305#ifdef PGEN
Victor Stinner52f6dd72010-03-12 14:45:56 +00001306#define verify_identifier(tok) 1
Martin v. Löwis47383402007-08-15 07:32:56 +00001307#else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308/* Verify that the identifier follows PEP 3131.
1309 All identifier strings are guaranteed to be "ready" unicode objects.
1310 */
Martin v. Löwis47383402007-08-15 07:32:56 +00001311static int
Victor Stinner52f6dd72010-03-12 14:45:56 +00001312verify_identifier(struct tok_state *tok)
Martin v. Löwis47383402007-08-15 07:32:56 +00001313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 PyObject *s;
1315 int result;
Benjamin Petersond73aca72015-04-21 12:05:19 -04001316 if (tok->decoding_erred)
1317 return 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 if (s == NULL || PyUnicode_READY(s) == -1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
1321 PyErr_Clear();
1322 tok->done = E_IDENTIFIER;
1323 } else {
1324 tok->done = E_ERROR;
1325 }
1326 return 0;
1327 }
1328 result = PyUnicode_IsIdentifier(s);
1329 Py_DECREF(s);
1330 if (result == 0)
1331 tok->done = E_IDENTIFIER;
1332 return result;
Martin v. Löwis47383402007-08-15 07:32:56 +00001333}
1334#endif
Guido van Rossum926f13a1998-04-09 21:38:06 +00001335
Brett Cannona721aba2016-09-09 14:57:09 -07001336static int
1337tok_decimal_tail(struct tok_state *tok)
1338{
1339 int c;
1340
1341 while (1) {
1342 do {
1343 c = tok_nextc(tok);
1344 } while (isdigit(c));
1345 if (c != '_') {
1346 break;
1347 }
1348 c = tok_nextc(tok);
1349 if (!isdigit(c)) {
1350 tok->done = E_TOKEN;
1351 tok_backup(tok, c);
1352 return 0;
1353 }
1354 }
1355 return c;
1356}
1357
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001358/* Get next token, after space stripping etc. */
1359
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +00001360static int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001361tok_get(struct tok_state *tok, char **p_start, char **p_end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001362{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001363 int c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 int blankline, nonascii;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 *p_start = *p_end = NULL;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001367 nextline:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 tok->start = NULL;
1369 blankline = 0;
Guido van Rossum8c11a5c1991-07-27 21:42:56 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 /* Get indentation level */
1372 if (tok->atbol) {
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001373 int col = 0;
1374 int altcol = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 tok->atbol = 0;
1376 for (;;) {
1377 c = tok_nextc(tok);
Brett Cannona721aba2016-09-09 14:57:09 -07001378 if (c == ' ') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 col++, altcol++;
Brett Cannona721aba2016-09-09 14:57:09 -07001380 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 else if (c == '\t') {
1382 col = (col/tok->tabsize + 1) * tok->tabsize;
1383 altcol = (altcol/tok->alttabsize + 1)
1384 * tok->alttabsize;
1385 }
Brett Cannona721aba2016-09-09 14:57:09 -07001386 else if (c == '\014') {/* Control-L (formfeed) */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 col = altcol = 0; /* For Emacs users */
Brett Cannona721aba2016-09-09 14:57:09 -07001388 }
1389 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 break;
Brett Cannona721aba2016-09-09 14:57:09 -07001391 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 }
1393 tok_backup(tok, c);
1394 if (c == '#' || c == '\n') {
1395 /* Lines with only whitespace and/or comments
1396 shouldn't affect the indentation and are
1397 not passed to the parser as NEWLINE tokens,
1398 except *totally* empty lines in interactive
1399 mode, which signal the end of a command group. */
Brett Cannona721aba2016-09-09 14:57:09 -07001400 if (col == 0 && c == '\n' && tok->prompt != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 blankline = 0; /* Let it through */
Brett Cannona721aba2016-09-09 14:57:09 -07001402 }
1403 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 blankline = 1; /* Ignore completely */
Brett Cannona721aba2016-09-09 14:57:09 -07001405 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 /* We can't jump back right here since we still
1407 may need to skip to the end of a comment */
1408 }
1409 if (!blankline && tok->level == 0) {
1410 if (col == tok->indstack[tok->indent]) {
1411 /* No change */
1412 if (altcol != tok->altindstack[tok->indent]) {
Brett Cannona721aba2016-09-09 14:57:09 -07001413 if (indenterror(tok)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 return ERRORTOKEN;
Brett Cannona721aba2016-09-09 14:57:09 -07001415 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 }
1417 }
1418 else if (col > tok->indstack[tok->indent]) {
1419 /* Indent -- always one */
1420 if (tok->indent+1 >= MAXINDENT) {
1421 tok->done = E_TOODEEP;
1422 tok->cur = tok->inp;
1423 return ERRORTOKEN;
1424 }
1425 if (altcol <= tok->altindstack[tok->indent]) {
Brett Cannona721aba2016-09-09 14:57:09 -07001426 if (indenterror(tok)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 return ERRORTOKEN;
Brett Cannona721aba2016-09-09 14:57:09 -07001428 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 }
1430 tok->pendin++;
1431 tok->indstack[++tok->indent] = col;
1432 tok->altindstack[tok->indent] = altcol;
1433 }
1434 else /* col < tok->indstack[tok->indent] */ {
1435 /* Dedent -- any number, must be consistent */
1436 while (tok->indent > 0 &&
1437 col < tok->indstack[tok->indent]) {
1438 tok->pendin--;
1439 tok->indent--;
1440 }
1441 if (col != tok->indstack[tok->indent]) {
1442 tok->done = E_DEDENT;
1443 tok->cur = tok->inp;
1444 return ERRORTOKEN;
1445 }
1446 if (altcol != tok->altindstack[tok->indent]) {
Brett Cannona721aba2016-09-09 14:57:09 -07001447 if (indenterror(tok)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 return ERRORTOKEN;
Brett Cannona721aba2016-09-09 14:57:09 -07001449 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 }
1451 }
1452 }
1453 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 tok->start = tok->cur;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 /* Return pending indents/dedents */
1458 if (tok->pendin != 0) {
1459 if (tok->pendin < 0) {
1460 tok->pendin++;
1461 return DEDENT;
1462 }
1463 else {
1464 tok->pendin--;
1465 return INDENT;
1466 }
1467 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001468
Yury Selivanov96ec9342015-07-23 15:01:58 +03001469 if (tok->async_def
1470 && !blankline
1471 && tok->level == 0
1472 /* There was a NEWLINE after ASYNC DEF,
1473 so we're past the signature. */
1474 && tok->async_def_nl
1475 /* Current indentation level is less than where
1476 the async function was defined */
1477 && tok->async_def_indent >= tok->indent)
Yury Selivanov8fb307c2015-07-22 13:33:45 +03001478 {
Yury Selivanov96ec9342015-07-23 15:01:58 +03001479 tok->async_def = 0;
1480 tok->async_def_indent = 0;
1481 tok->async_def_nl = 0;
Yury Selivanov8fb307c2015-07-22 13:33:45 +03001482 }
1483
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001484 again:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 tok->start = NULL;
1486 /* Skip spaces */
1487 do {
1488 c = tok_nextc(tok);
1489 } while (c == ' ' || c == '\t' || c == '\014');
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 /* Set start of current token */
1492 tok->start = tok->cur - 1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 /* Skip comment */
Brett Cannona721aba2016-09-09 14:57:09 -07001495 if (c == '#') {
1496 while (c != EOF && c != '\n') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 c = tok_nextc(tok);
Brett Cannona721aba2016-09-09 14:57:09 -07001498 }
1499 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 /* Check for EOF and errors now */
1502 if (c == EOF) {
1503 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
1504 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 /* Identifier (most frequent token!) */
1507 nonascii = 0;
1508 if (is_potential_identifier_start(c)) {
Christian Heimes0b3847d2012-06-20 11:17:58 +02001509 /* Process b"", r"", u"", br"" and rb"" */
Eric V. Smith235a6f02015-09-19 14:51:32 -04001510 int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01001511 while (1) {
Eric V. Smith235a6f02015-09-19 14:51:32 -04001512 if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01001513 saw_b = 1;
Armin Ronacher6ecf77b2012-03-04 12:04:06 +00001514 /* Since this is a backwards compatibility support literal we don't
1515 want to support it in arbitrary order like byte literals. */
Brett Cannona721aba2016-09-09 14:57:09 -07001516 else if (!(saw_b || saw_u || saw_r || saw_f)
1517 && (c == 'u'|| c == 'U')) {
Armin Ronacher6ecf77b2012-03-04 12:04:06 +00001518 saw_u = 1;
Brett Cannona721aba2016-09-09 14:57:09 -07001519 }
Christian Heimes0b3847d2012-06-20 11:17:58 +02001520 /* ur"" and ru"" are not supported */
Brett Cannona721aba2016-09-09 14:57:09 -07001521 else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01001522 saw_r = 1;
Brett Cannona721aba2016-09-09 14:57:09 -07001523 }
1524 else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
Eric V. Smith235a6f02015-09-19 14:51:32 -04001525 saw_f = 1;
Brett Cannona721aba2016-09-09 14:57:09 -07001526 }
1527 else {
Antoine Pitrou3a5d4cb2012-01-12 22:46:19 +01001528 break;
Brett Cannona721aba2016-09-09 14:57:09 -07001529 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 c = tok_nextc(tok);
Brett Cannona721aba2016-09-09 14:57:09 -07001531 if (c == '"' || c == '\'') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 goto letter_quote;
Brett Cannona721aba2016-09-09 14:57:09 -07001533 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 }
1535 while (is_potential_identifier_char(c)) {
Brett Cannona721aba2016-09-09 14:57:09 -07001536 if (c >= 128) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 nonascii = 1;
Brett Cannona721aba2016-09-09 14:57:09 -07001538 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 c = tok_nextc(tok);
1540 }
1541 tok_backup(tok, c);
Brett Cannona721aba2016-09-09 14:57:09 -07001542 if (nonascii && !verify_identifier(tok)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 return ERRORTOKEN;
Brett Cannona721aba2016-09-09 14:57:09 -07001544 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 *p_start = tok->start;
1546 *p_end = tok->cur;
Yury Selivanov75445082015-05-11 22:57:16 -04001547
Yury Selivanov96ec9342015-07-23 15:01:58 +03001548 /* async/await parsing block. */
1549 if (tok->cur - tok->start == 5) {
1550 /* Current token length is 5. */
1551 if (tok->async_def) {
1552 /* We're inside an 'async def' function. */
Brett Cannona721aba2016-09-09 14:57:09 -07001553 if (memcmp(tok->start, "async", 5) == 0) {
Yury Selivanov96ec9342015-07-23 15:01:58 +03001554 return ASYNC;
Brett Cannona721aba2016-09-09 14:57:09 -07001555 }
1556 if (memcmp(tok->start, "await", 5) == 0) {
Yury Selivanov96ec9342015-07-23 15:01:58 +03001557 return AWAIT;
Brett Cannona721aba2016-09-09 14:57:09 -07001558 }
Yury Selivanov75445082015-05-11 22:57:16 -04001559 }
Yury Selivanov96ec9342015-07-23 15:01:58 +03001560 else if (memcmp(tok->start, "async", 5) == 0) {
1561 /* The current token is 'async'.
1562 Look ahead one token.*/
Yury Selivanov8085b802015-05-18 12:50:52 -04001563
Yury Selivanov96ec9342015-07-23 15:01:58 +03001564 struct tok_state ahead_tok;
1565 char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
1566 int ahead_tok_kind;
Yury Selivanov8085b802015-05-18 12:50:52 -04001567
Yury Selivanov75445082015-05-11 22:57:16 -04001568 memcpy(&ahead_tok, tok, sizeof(ahead_tok));
Yury Selivanov75445082015-05-11 22:57:16 -04001569 ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
Yury Selivanov96ec9342015-07-23 15:01:58 +03001570 &ahead_tok_end);
Yury Selivanov75445082015-05-11 22:57:16 -04001571
Yury Selivanov8fb307c2015-07-22 13:33:45 +03001572 if (ahead_tok_kind == NAME
1573 && ahead_tok.cur - ahead_tok.start == 3
1574 && memcmp(ahead_tok.start, "def", 3) == 0)
1575 {
1576 /* The next token is going to be 'def', so instead of
1577 returning 'async' NAME token, we return ASYNC. */
Yury Selivanov96ec9342015-07-23 15:01:58 +03001578 tok->async_def_indent = tok->indent;
1579 tok->async_def = 1;
Yury Selivanov75445082015-05-11 22:57:16 -04001580 return ASYNC;
1581 }
Yury Selivanov75445082015-05-11 22:57:16 -04001582 }
1583 }
1584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 return NAME;
1586 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 /* Newline */
1589 if (c == '\n') {
1590 tok->atbol = 1;
Brett Cannona721aba2016-09-09 14:57:09 -07001591 if (blankline || tok->level > 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 goto nextline;
Brett Cannona721aba2016-09-09 14:57:09 -07001593 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 *p_start = tok->start;
1595 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
1596 tok->cont_line = 0;
Yury Selivanov96ec9342015-07-23 15:01:58 +03001597 if (tok->async_def) {
1598 /* We're somewhere inside an 'async def' function, and
1599 we've encountered a NEWLINE after its signature. */
1600 tok->async_def_nl = 1;
Yury Selivanov8fb307c2015-07-22 13:33:45 +03001601 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 return NEWLINE;
1603 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 /* Period or number starting with period? */
1606 if (c == '.') {
1607 c = tok_nextc(tok);
1608 if (isdigit(c)) {
1609 goto fraction;
1610 } else if (c == '.') {
1611 c = tok_nextc(tok);
1612 if (c == '.') {
1613 *p_start = tok->start;
1614 *p_end = tok->cur;
1615 return ELLIPSIS;
Brett Cannona721aba2016-09-09 14:57:09 -07001616 }
1617 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 tok_backup(tok, c);
1619 }
1620 tok_backup(tok, '.');
Brett Cannona721aba2016-09-09 14:57:09 -07001621 }
1622 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 tok_backup(tok, c);
1624 }
1625 *p_start = tok->start;
1626 *p_end = tok->cur;
1627 return DOT;
1628 }
Guido van Rossumf595fde1996-01-12 01:31:58 +00001629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630 /* Number */
1631 if (isdigit(c)) {
1632 if (c == '0') {
1633 /* Hex, octal or binary -- maybe. */
1634 c = tok_nextc(tok);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 if (c == 'x' || c == 'X') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 /* Hex */
1637 c = tok_nextc(tok);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 do {
Brett Cannona721aba2016-09-09 14:57:09 -07001639 if (c == '_') {
1640 c = tok_nextc(tok);
1641 }
1642 if (!isxdigit(c)) {
1643 tok->done = E_TOKEN;
1644 tok_backup(tok, c);
1645 return ERRORTOKEN;
1646 }
1647 do {
1648 c = tok_nextc(tok);
1649 } while (isxdigit(c));
1650 } while (c == '_');
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 }
1652 else if (c == 'o' || c == 'O') {
1653 /* Octal */
1654 c = tok_nextc(tok);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 do {
Brett Cannona721aba2016-09-09 14:57:09 -07001656 if (c == '_') {
1657 c = tok_nextc(tok);
1658 }
1659 if (c < '0' || c >= '8') {
1660 tok->done = E_TOKEN;
1661 tok_backup(tok, c);
1662 return ERRORTOKEN;
1663 }
1664 do {
1665 c = tok_nextc(tok);
1666 } while ('0' <= c && c < '8');
1667 } while (c == '_');
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 }
1669 else if (c == 'b' || c == 'B') {
1670 /* Binary */
1671 c = tok_nextc(tok);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 do {
Brett Cannona721aba2016-09-09 14:57:09 -07001673 if (c == '_') {
1674 c = tok_nextc(tok);
1675 }
1676 if (c != '0' && c != '1') {
1677 tok->done = E_TOKEN;
1678 tok_backup(tok, c);
1679 return ERRORTOKEN;
1680 }
1681 do {
1682 c = tok_nextc(tok);
1683 } while (c == '0' || c == '1');
1684 } while (c == '_');
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 }
1686 else {
1687 int nonzero = 0;
1688 /* maybe old-style octal; c is first char of it */
1689 /* in any case, allow '0' as a literal */
Brett Cannona721aba2016-09-09 14:57:09 -07001690 while (1) {
1691 if (c == '_') {
1692 c = tok_nextc(tok);
1693 if (!isdigit(c)) {
1694 tok->done = E_TOKEN;
1695 tok_backup(tok, c);
1696 return ERRORTOKEN;
1697 }
1698 }
1699 if (c != '0') {
1700 break;
1701 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 c = tok_nextc(tok);
1703 }
Brett Cannona721aba2016-09-09 14:57:09 -07001704 if (isdigit(c)) {
1705 nonzero = 1;
1706 c = tok_decimal_tail(tok);
1707 if (c == 0) {
1708 return ERRORTOKEN;
1709 }
1710 }
1711 if (c == '.') {
1712 c = tok_nextc(tok);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 goto fraction;
Brett Cannona721aba2016-09-09 14:57:09 -07001714 }
1715 else if (c == 'e' || c == 'E') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 goto exponent;
Brett Cannona721aba2016-09-09 14:57:09 -07001717 }
1718 else if (c == 'j' || c == 'J') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 goto imaginary;
Brett Cannona721aba2016-09-09 14:57:09 -07001720 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 else if (nonzero) {
Brett Cannona721aba2016-09-09 14:57:09 -07001722 /* Old-style octal: now disallowed. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 tok->done = E_TOKEN;
1724 tok_backup(tok, c);
1725 return ERRORTOKEN;
1726 }
1727 }
1728 }
1729 else {
1730 /* Decimal */
Brett Cannona721aba2016-09-09 14:57:09 -07001731 c = tok_decimal_tail(tok);
1732 if (c == 0) {
1733 return ERRORTOKEN;
1734 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 {
1736 /* Accept floating point numbers. */
1737 if (c == '.') {
Brett Cannona721aba2016-09-09 14:57:09 -07001738 c = tok_nextc(tok);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 fraction:
1740 /* Fraction */
Brett Cannona721aba2016-09-09 14:57:09 -07001741 if (isdigit(c)) {
1742 c = tok_decimal_tail(tok);
1743 if (c == 0) {
1744 return ERRORTOKEN;
1745 }
1746 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 }
1748 if (c == 'e' || c == 'E') {
Benjamin Petersonc4161622014-06-07 12:36:39 -07001749 int e;
1750 exponent:
1751 e = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 /* Exponent part */
1753 c = tok_nextc(tok);
Benjamin Petersonc4161622014-06-07 12:36:39 -07001754 if (c == '+' || c == '-') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 c = tok_nextc(tok);
Benjamin Petersonc4161622014-06-07 12:36:39 -07001756 if (!isdigit(c)) {
1757 tok->done = E_TOKEN;
1758 tok_backup(tok, c);
1759 return ERRORTOKEN;
1760 }
1761 } else if (!isdigit(c)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 tok_backup(tok, c);
Benjamin Petersonc4161622014-06-07 12:36:39 -07001763 tok_backup(tok, e);
1764 *p_start = tok->start;
1765 *p_end = tok->cur;
1766 return NUMBER;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 }
Brett Cannona721aba2016-09-09 14:57:09 -07001768 c = tok_decimal_tail(tok);
1769 if (c == 0) {
1770 return ERRORTOKEN;
1771 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 }
Brett Cannona721aba2016-09-09 14:57:09 -07001773 if (c == 'j' || c == 'J') {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 /* Imaginary part */
1775 imaginary:
1776 c = tok_nextc(tok);
Brett Cannona721aba2016-09-09 14:57:09 -07001777 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 }
1779 }
1780 tok_backup(tok, c);
1781 *p_start = tok->start;
1782 *p_end = tok->cur;
1783 return NUMBER;
1784 }
Guido van Rossum24dacb31997-04-06 03:46:20 +00001785
1786 letter_quote:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 /* String */
1788 if (c == '\'' || c == '"') {
1789 int quote = c;
1790 int quote_size = 1; /* 1 or 3 */
1791 int end_quote_size = 0;
Guido van Rossumcf171a72007-11-16 00:51:45 +00001792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 /* Find the quote size and start of string */
1794 c = tok_nextc(tok);
1795 if (c == quote) {
1796 c = tok_nextc(tok);
Brett Cannona721aba2016-09-09 14:57:09 -07001797 if (c == quote) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 quote_size = 3;
Brett Cannona721aba2016-09-09 14:57:09 -07001799 }
1800 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 end_quote_size = 1; /* empty string found */
Brett Cannona721aba2016-09-09 14:57:09 -07001802 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 }
Brett Cannona721aba2016-09-09 14:57:09 -07001804 if (c != quote) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 tok_backup(tok, c);
Brett Cannona721aba2016-09-09 14:57:09 -07001806 }
Guido van Rossumcf171a72007-11-16 00:51:45 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 /* Get rest of string */
1809 while (end_quote_size != quote_size) {
1810 c = tok_nextc(tok);
1811 if (c == EOF) {
Brett Cannona721aba2016-09-09 14:57:09 -07001812 if (quote_size == 3) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001813 tok->done = E_EOFS;
Brett Cannona721aba2016-09-09 14:57:09 -07001814 }
1815 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 tok->done = E_EOLS;
Brett Cannona721aba2016-09-09 14:57:09 -07001817 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001818 tok->cur = tok->inp;
1819 return ERRORTOKEN;
1820 }
1821 if (quote_size == 1 && c == '\n') {
1822 tok->done = E_EOLS;
1823 tok->cur = tok->inp;
1824 return ERRORTOKEN;
1825 }
Brett Cannona721aba2016-09-09 14:57:09 -07001826 if (c == quote) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 end_quote_size += 1;
Brett Cannona721aba2016-09-09 14:57:09 -07001828 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 else {
1830 end_quote_size = 0;
Brett Cannona721aba2016-09-09 14:57:09 -07001831 if (c == '\\') {
Christian Heimesc6cc23d2016-09-09 00:09:45 +02001832 tok_nextc(tok); /* skip escaped char */
Brett Cannona721aba2016-09-09 14:57:09 -07001833 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 }
1835 }
Guido van Rossumcf171a72007-11-16 00:51:45 +00001836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 *p_start = tok->start;
1838 *p_end = tok->cur;
1839 return STRING;
1840 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 /* Line continuation */
1843 if (c == '\\') {
1844 c = tok_nextc(tok);
1845 if (c != '\n') {
1846 tok->done = E_LINECONT;
1847 tok->cur = tok->inp;
1848 return ERRORTOKEN;
1849 }
1850 tok->cont_line = 1;
1851 goto again; /* Read next line */
1852 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 /* Check for two-character token */
1855 {
1856 int c2 = tok_nextc(tok);
1857 int token = PyToken_TwoChars(c, c2);
1858 if (token != OP) {
1859 int c3 = tok_nextc(tok);
1860 int token3 = PyToken_ThreeChars(c, c2, c3);
1861 if (token3 != OP) {
1862 token = token3;
Brett Cannona721aba2016-09-09 14:57:09 -07001863 }
1864 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 tok_backup(tok, c3);
1866 }
1867 *p_start = tok->start;
1868 *p_end = tok->cur;
1869 return token;
1870 }
1871 tok_backup(tok, c2);
1872 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 /* Keep track of parentheses nesting level */
1875 switch (c) {
1876 case '(':
1877 case '[':
1878 case '{':
1879 tok->level++;
1880 break;
1881 case ')':
1882 case ']':
1883 case '}':
1884 tok->level--;
1885 break;
1886 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 /* Punctuation character */
1889 *p_start = tok->start;
1890 *p_end = tok->cur;
1891 return PyToken_OneChar(c);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001892}
1893
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +00001894int
1895PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1896{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 int result = tok_get(tok, p_start, p_end);
1898 if (tok->decoding_erred) {
1899 result = ERRORTOKEN;
1900 tok->done = E_DECODE;
1901 }
1902 return result;
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +00001903}
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001904
Victor Stinnerfe7c5b52011-04-05 01:48:03 +02001905/* Get the encoding of a Python file. Check for the coding cookie and check if
1906 the file starts with a BOM.
Guido van Rossumce3a72a2007-10-19 23:16:50 +00001907
Victor Stinnerfe7c5b52011-04-05 01:48:03 +02001908 PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
1909 encoding in the first or second line of the file (in which case the encoding
1910 should be assumed to be UTF-8).
Brett Cannone4539892007-10-20 03:46:49 +00001911
Victor Stinnerfe7c5b52011-04-05 01:48:03 +02001912 The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
1913 by the caller. */
1914
Guido van Rossumce3a72a2007-10-19 23:16:50 +00001915char *
Victor Stinnerfe7c5b52011-04-05 01:48:03 +02001916PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
Guido van Rossum40d20bc2007-10-22 00:09:51 +00001917{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 struct tok_state *tok;
1919 FILE *fp;
1920 char *p_start =NULL , *p_end =NULL , *encoding = NULL;
Guido van Rossumce3a72a2007-10-19 23:16:50 +00001921
Victor Stinnerdaf45552013-08-28 00:53:59 +02001922#ifndef PGEN
1923 fd = _Py_dup(fd);
1924#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 fd = dup(fd);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001926#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 if (fd < 0) {
1928 return NULL;
1929 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 fp = fdopen(fd, "r");
1932 if (fp == NULL) {
1933 return NULL;
1934 }
1935 tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
1936 if (tok == NULL) {
1937 fclose(fp);
1938 return NULL;
1939 }
Victor Stinner7f2fee32011-04-05 00:39:01 +02001940#ifndef PGEN
Victor Stinnerfe7c5b52011-04-05 01:48:03 +02001941 if (filename != NULL) {
1942 Py_INCREF(filename);
1943 tok->filename = filename;
1944 }
1945 else {
1946 tok->filename = PyUnicode_FromString("<string>");
1947 if (tok->filename == NULL) {
1948 fclose(fp);
1949 PyTokenizer_Free(tok);
1950 return encoding;
1951 }
1952 }
Victor Stinner7f2fee32011-04-05 00:39:01 +02001953#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 while (tok->lineno < 2 && tok->done == E_OK) {
1955 PyTokenizer_Get(tok, &p_start, &p_end);
1956 }
1957 fclose(fp);
1958 if (tok->encoding) {
1959 encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
1960 if (encoding)
1961 strcpy(encoding, tok->encoding);
1962 }
1963 PyTokenizer_Free(tok);
1964 return encoding;
Guido van Rossumce3a72a2007-10-19 23:16:50 +00001965}
Thomas Wouters89d996e2007-09-08 17:39:28 +00001966
Victor Stinnerfe7c5b52011-04-05 01:48:03 +02001967char *
1968PyTokenizer_FindEncoding(int fd)
1969{
1970 return PyTokenizer_FindEncodingFilename(fd, NULL);
1971}
1972
Guido van Rossum408027e1996-12-30 16:17:54 +00001973#ifdef Py_DEBUG
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001974
1975void
Thomas Wouters23c9e002000-07-22 19:20:54 +00001976tok_dump(int type, char *start, char *end)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001977{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 printf("%s", _PyParser_TokenNames[type]);
1979 if (type == NAME || type == NUMBER || type == STRING || type == OP)
1980 printf("(%.*s)", (int)(end - start), start);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001981}
1982
1983#endif