blob: d37e28a0a36cadf9e81dff47f96bf80b3ddb00d4 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser-tokenizer link implementation */
3
Guido van Rossum3f5da241990-12-20 15:06:42 +00004#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005#include "tokenizer.h"
6#include "node.h"
7#include "grammar.h"
8#include "parser.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "parsetok.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "errcode.h"
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +000011#include "graminit.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000012
Guido van Rossum3f5da241990-12-20 15:06:42 +000013
14/* Forward */
Christian Heimes4d6ec852008-03-26 22:34:47 +000015static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
Victor Stinner14e461d2013-08-26 22:28:21 +020016static int initerr(perrdetail *err_ret, PyObject * filename);
Guido van Rossum3f5da241990-12-20 15:06:42 +000017
18/* Parse input coming from a string. Return error code, print some errors. */
Guido van Rossumbd0389d1994-08-29 12:25:45 +000019node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000020PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
Guido van Rossum3f5da241990-12-20 15:06:42 +000021{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000022 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
Tim Petersfe2127d2001-07-16 05:37:24 +000023}
24
25node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000026PyParser_ParseStringFlags(const char *s, grammar *g, int start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 perrdetail *err_ret, int flags)
Tim Petersfe2127d2001-07-16 05:37:24 +000028{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000029 return PyParser_ParseStringFlagsFilename(s, NULL,
30 g, start, err_ret, flags);
Thomas Heller6b17abf2002-07-09 09:23:27 +000031}
32
33node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000034PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000035 grammar *g, int start,
36 perrdetail *err_ret, int flags)
Thomas Heller6b17abf2002-07-09 09:23:27 +000037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 int iflags = flags;
39 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
40 err_ret, &iflags);
Christian Heimes4d6ec852008-03-26 22:34:47 +000041}
42
43node *
Victor Stinner14e461d2013-08-26 22:28:21 +020044PyParser_ParseStringObject(const char *s, PyObject *filename,
45 grammar *g, int start,
46 perrdetail *err_ret, int *flags)
Christian Heimes4d6ec852008-03-26 22:34:47 +000047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 struct tok_state *tok;
49 int exec_input = start == file_input;
Guido van Rossumbd0389d1994-08-29 12:25:45 +000050
Victor Stinner7f2fee32011-04-05 00:39:01 +020051 if (initerr(err_ret, filename) < 0)
52 return NULL;
Guido van Rossumbd0389d1994-08-29 12:25:45 +000053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 if (*flags & PyPARSE_IGNORE_COOKIE)
55 tok = PyTokenizer_FromUTF8(s, exec_input);
56 else
57 tok = PyTokenizer_FromString(s, exec_input);
58 if (tok == NULL) {
59 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
60 return NULL;
61 }
Guido van Rossumbd0389d1994-08-29 12:25:45 +000062
Victor Stinner7f2fee32011-04-05 00:39:01 +020063#ifndef PGEN
64 Py_INCREF(err_ret->filename);
65 tok->filename = err_ret->filename;
66#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 return parsetok(tok, g, start, err_ret, flags);
Guido van Rossum3f5da241990-12-20 15:06:42 +000068}
69
Victor Stinner14e461d2013-08-26 22:28:21 +020070node *
71PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
72 grammar *g, int start,
73 perrdetail *err_ret, int *flags)
74{
75 node *n;
76 PyObject *filename = NULL;
77#ifndef PGEN
78 if (filename_str != NULL) {
79 filename = PyUnicode_DecodeFSDefault(filename_str);
80 if (filename == NULL) {
81 err_ret->error = E_ERROR;
82 return NULL;
83 }
84 }
85#endif
86 n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
87#ifndef PGEN
88 Py_XDECREF(filename);
89#endif
90 return n;
91}
92
Guido van Rossum3f5da241990-12-20 15:06:42 +000093/* Parse input coming from a file. Return error code, print some errors. */
94
Guido van Rossumbd0389d1994-08-29 12:25:45 +000095node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000096PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
Serhiy Storchakac6792272013-10-19 21:03:34 +030097 const char *ps1, const char *ps2,
98 perrdetail *err_ret)
Guido van Rossum3f5da241990-12-20 15:06:42 +000099{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 return PyParser_ParseFileFlags(fp, filename, NULL,
101 g, start, ps1, ps2, err_ret, 0);
Tim Petersfe2127d2001-07-16 05:37:24 +0000102}
103
104node *
Christian Heimes4d6ec852008-03-26 22:34:47 +0000105PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 grammar *g, int start,
Serhiy Storchakac6792272013-10-19 21:03:34 +0300107 const char *ps1, const char *ps2,
108 perrdetail *err_ret, int flags)
Tim Petersfe2127d2001-07-16 05:37:24 +0000109{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 int iflags = flags;
111 return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
112 ps2, err_ret, &iflags);
Christian Heimes4d6ec852008-03-26 22:34:47 +0000113}
114
115node *
Victor Stinner14e461d2013-08-26 22:28:21 +0200116PyParser_ParseFileObject(FILE *fp, PyObject *filename,
117 const char *enc, grammar *g, int start,
Serhiy Storchakac6792272013-10-19 21:03:34 +0300118 const char *ps1, const char *ps2,
119 perrdetail *err_ret, int *flags)
Christian Heimes4d6ec852008-03-26 22:34:47 +0000120{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 struct tok_state *tok;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000122
Victor Stinner7f2fee32011-04-05 00:39:01 +0200123 if (initerr(err_ret, filename) < 0)
124 return NULL;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000125
Serhiy Storchakac6792272013-10-19 21:03:34 +0300126 if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 err_ret->error = E_NOMEM;
128 return NULL;
129 }
Victor Stinner7f2fee32011-04-05 00:39:01 +0200130#ifndef PGEN
131 Py_INCREF(err_ret->filename);
132 tok->filename = err_ret->filename;
133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 return parsetok(tok, g, start, err_ret, flags);
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000135}
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136
Victor Stinner14e461d2013-08-26 22:28:21 +0200137node *
138PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
139 const char *enc, grammar *g, int start,
Serhiy Storchakac6792272013-10-19 21:03:34 +0300140 const char *ps1, const char *ps2,
141 perrdetail *err_ret, int *flags)
Victor Stinner14e461d2013-08-26 22:28:21 +0200142{
143 node *n;
144 PyObject *fileobj = NULL;
145#ifndef PGEN
146 if (filename != NULL) {
147 fileobj = PyUnicode_DecodeFSDefault(filename);
148 if (fileobj == NULL) {
149 err_ret->error = E_ERROR;
150 return NULL;
151 }
152 }
153#endif
154 n = PyParser_ParseFileObject(fp, fileobj, enc, g,
155 start, ps1, ps2, err_ret, flags);
156#ifndef PGEN
157 Py_XDECREF(fileobj);
158#endif
159 return n;
160}
161
Neal Norwitze4993c72006-03-16 06:01:25 +0000162#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000163#if 0
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200164static const char with_msg[] =
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000165"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
166
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200167static const char as_msg[] =
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000168"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
169
170static void
171warn(const char *msg, const char *filename, int lineno)
172{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 if (filename == NULL)
174 filename = "<string>";
175 PySys_WriteStderr(msg, filename, lineno);
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000176}
Neal Norwitzfc85c922006-03-17 05:44:46 +0000177#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000178#endif
Guido van Rossumda62ecc2001-07-17 16:53:11 +0000179
Thomas Wouters89f507f2006-12-13 04:49:30 +0000180/* Parse input coming from the given tokenizer structure.
181 Return error code. */
182
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000183static node *
Tim Petersfe2127d2001-07-16 05:37:24 +0000184parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 int *flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 parser_state *ps;
188 node *n;
Brett Cannonb94767f2011-02-22 20:15:44 +0000189 int started = 0;
Ammar Askar025eb982018-09-24 17:12:49 -0400190 int col_offset;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 if ((ps = PyParser_New(g, start)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 err_ret->error = E_NOMEM;
194 PyTokenizer_Free(tok);
195 return NULL;
196 }
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000197#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 if (*flags & PyPARSE_BARRY_AS_BDFL)
199 ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
Neil Schemenauerc24ea082002-03-22 23:53:36 +0000200#endif
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 for (;;) {
203 char *a, *b;
204 int type;
205 size_t len;
206 char *str;
Ammar Askar025eb982018-09-24 17:12:49 -0400207 col_offset = -1;
Anthony Sottile995d9b92019-01-12 20:05:13 -0800208 int lineno;
209 const char *line_start;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 type = PyTokenizer_Get(tok, &a, &b);
212 if (type == ERRORTOKEN) {
213 err_ret->error = tok->done;
214 break;
215 }
216 if (type == ENDMARKER && started) {
217 type = NEWLINE; /* Add an extra newline */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000218 started = 0;
219 /* Add the right number of dedent tokens,
220 except if a certain flag is given --
221 codeop.py uses this. */
222 if (tok->indent &&
223 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
224 {
225 tok->pendin = -tok->indent;
226 tok->indent = 0;
227 }
228 }
229 else
230 started = 1;
Zackery Spytz7c4ab2a2018-08-15 00:27:26 -0600231 len = (a != NULL && b != NULL) ? b - a : 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 str = (char *) PyObject_MALLOC(len + 1);
233 if (str == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 err_ret->error = E_NOMEM;
235 break;
236 }
237 if (len > 0)
238 strncpy(str, a, len);
239 str[len] = '\0';
Guido van Rossumda62ecc2001-07-17 16:53:11 +0000240
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000241#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 if (type == NOTEQUAL) {
243 if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
244 strcmp(str, "!=")) {
Antoine Pitrou9ec25932011-11-13 01:01:23 +0100245 PyObject_FREE(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 err_ret->error = E_SYNTAX;
247 break;
248 }
249 else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
250 strcmp(str, "<>")) {
Antoine Pitrou9ec25932011-11-13 01:01:23 +0100251 PyObject_FREE(str);
Serhiy Storchakaaba24ff2018-07-23 23:41:11 +0300252 err_ret->expected = NOTEQUAL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 err_ret->error = E_SYNTAX;
254 break;
255 }
256 }
Neil Schemenauerc24ea082002-03-22 23:53:36 +0000257#endif
Anthony Sottile995d9b92019-01-12 20:05:13 -0800258
259 /* Nodes of type STRING, especially multi line strings
260 must be handled differently in order to get both
261 the starting line number and the column offset right.
262 (cf. issue 16806) */
263 lineno = type == STRING ? tok->first_lineno : tok->lineno;
264 line_start = type == STRING ? tok->multi_line_start : tok->line_start;
265 if (a != NULL && a >= line_start) {
266 col_offset = Py_SAFE_DOWNCAST(a - line_start,
Benjamin Petersonca470632016-09-06 13:47:26 -0700267 intptr_t, int);
Zackery Spytz3e26e422018-08-20 21:11:40 -0600268 }
269 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 col_offset = -1;
Zackery Spytz3e26e422018-08-20 21:11:40 -0600271 }
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 if ((err_ret->error =
274 PyParser_AddToken(ps, (int)type, str,
Anthony Sottile995d9b92019-01-12 20:05:13 -0800275 lineno, col_offset,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 &(err_ret->expected))) != E_OK) {
277 if (err_ret->error != E_DONE) {
278 PyObject_FREE(str);
279 err_ret->token = type;
280 }
281 break;
282 }
283 }
284
285 if (err_ret->error == E_DONE) {
286 n = ps->p_tree;
287 ps->p_tree = NULL;
Meador Ingefa21bf02012-01-19 01:08:41 -0600288
Benjamin Peterson79c1f962012-01-19 08:48:11 -0500289#ifndef PGEN
Meador Ingefa21bf02012-01-19 01:08:41 -0600290 /* Check that the source for a single input statement really
291 is a single statement by looking at what is left in the
292 buffer after parsing. Trailing whitespace and comments
293 are OK. */
294 if (start == single_input) {
295 char *cur = tok->cur;
296 char c = *tok->cur;
297
Benjamin Petersoncff92372012-01-19 17:46:13 -0500298 for (;;) {
299 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
300 c = *++cur;
Meador Ingefa21bf02012-01-19 01:08:41 -0600301
Benjamin Petersoncff92372012-01-19 17:46:13 -0500302 if (!c)
303 break;
304
305 if (c != '#') {
306 err_ret->error = E_BADSINGLE;
307 PyNode_Free(n);
308 n = NULL;
309 break;
310 }
311
312 /* Suck up comment. */
313 while (c && c != '\n')
314 c = *++cur;
Meador Ingefa21bf02012-01-19 01:08:41 -0600315 }
316 }
Benjamin Peterson79c1f962012-01-19 08:48:11 -0500317#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 }
319 else
320 n = NULL;
Christian Heimesb1b3efc2008-03-26 23:24:27 +0000321
Christian Heimes4d6ec852008-03-26 22:34:47 +0000322#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 *flags = ps->p_flags;
Christian Heimes4d6ec852008-03-26 22:34:47 +0000324#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 PyParser_Delete(ps);
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 if (n == NULL) {
Benjamin Peterson758888d2011-05-30 11:12:38 -0500328 if (tok->done == E_EOF)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000329 err_ret->error = E_EOF;
330 err_ret->lineno = tok->lineno;
331 if (tok->buf != NULL) {
332 size_t len;
333 assert(tok->cur - tok->buf < INT_MAX);
Ammar Askar025eb982018-09-24 17:12:49 -0400334 /* if we've managed to parse a token, point the offset to its start,
335 * else use the current reading position of the tokenizer
336 */
337 err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 len = tok->inp - tok->buf;
339 err_ret->text = (char *) PyObject_MALLOC(len + 1);
340 if (err_ret->text != NULL) {
341 if (len > 0)
342 strncpy(err_ret->text, tok->buf, len);
343 err_ret->text[len] = '\0';
344 }
345 }
346 } else if (tok->encoding != NULL) {
347 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
348 * allocated using PyMem_
349 */
350 node* r = PyNode_New(encoding_decl);
351 if (r)
352 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
353 if (!r || !r->n_str) {
354 err_ret->error = E_NOMEM;
355 if (r)
356 PyObject_FREE(r);
357 n = NULL;
358 goto done;
359 }
360 strcpy(r->n_str, tok->encoding);
361 PyMem_FREE(tok->encoding);
362 tok->encoding = NULL;
363 r->n_nchildren = 1;
364 r->n_child = n;
365 n = r;
366 }
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000367
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000368done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 PyTokenizer_Free(tok);
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 return n;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000372}
Guido van Rossum0c156a52001-10-20 14:27:56 +0000373
Victor Stinner7f2fee32011-04-05 00:39:01 +0200374static int
Victor Stinner14e461d2013-08-26 22:28:21 +0200375initerr(perrdetail *err_ret, PyObject *filename)
Guido van Rossum0c156a52001-10-20 14:27:56 +0000376{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 err_ret->error = E_OK;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 err_ret->lineno = 0;
379 err_ret->offset = 0;
380 err_ret->text = NULL;
381 err_ret->token = -1;
382 err_ret->expected = -1;
Victor Stinner7f2fee32011-04-05 00:39:01 +0200383#ifndef PGEN
Victor Stinner14e461d2013-08-26 22:28:21 +0200384 if (filename) {
385 Py_INCREF(filename);
386 err_ret->filename = filename;
387 }
388 else {
Victor Stinner7f2fee32011-04-05 00:39:01 +0200389 err_ret->filename = PyUnicode_FromString("<string>");
Victor Stinner14e461d2013-08-26 22:28:21 +0200390 if (err_ret->filename == NULL) {
391 err_ret->error = E_ERROR;
392 return -1;
393 }
Victor Stinner7f2fee32011-04-05 00:39:01 +0200394 }
395#endif
396 return 0;
Guido van Rossum0c156a52001-10-20 14:27:56 +0000397}