blob: 2df91599268ef6175740f1bbaef902165ba65a14 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* Parser-tokenizer link implementation */
3
Guido van Rossum3f5da241990-12-20 15:06:42 +00004#include "pgenheaders.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005#include "tokenizer.h"
6#include "node.h"
7#include "grammar.h"
8#include "parser.h"
Guido van Rossum3f5da241990-12-20 15:06:42 +00009#include "parsetok.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000010#include "errcode.h"
Martin v. Löwis00f1e3f2002-08-04 17:29:52 +000011#include "graminit.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000012
Guido van Rossum3f5da241990-12-20 15:06:42 +000013
14/* Forward */
Christian Heimes4d6ec852008-03-26 22:34:47 +000015static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
Victor Stinner14e461d2013-08-26 22:28:21 +020016static int initerr(perrdetail *err_ret, PyObject * filename);
Guido van Rossum3f5da241990-12-20 15:06:42 +000017
18/* Parse input coming from a string. Return error code, print some errors. */
Guido van Rossumbd0389d1994-08-29 12:25:45 +000019node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000020PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
Guido van Rossum3f5da241990-12-20 15:06:42 +000021{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000022 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
Tim Petersfe2127d2001-07-16 05:37:24 +000023}
24
25node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000026PyParser_ParseStringFlags(const char *s, grammar *g, int start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 perrdetail *err_ret, int flags)
Tim Petersfe2127d2001-07-16 05:37:24 +000028{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000029 return PyParser_ParseStringFlagsFilename(s, NULL,
30 g, start, err_ret, flags);
Thomas Heller6b17abf2002-07-09 09:23:27 +000031}
32
33node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000034PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000035 grammar *g, int start,
36 perrdetail *err_ret, int flags)
Thomas Heller6b17abf2002-07-09 09:23:27 +000037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 int iflags = flags;
39 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
40 err_ret, &iflags);
Christian Heimes4d6ec852008-03-26 22:34:47 +000041}
42
43node *
Victor Stinner14e461d2013-08-26 22:28:21 +020044PyParser_ParseStringObject(const char *s, PyObject *filename,
45 grammar *g, int start,
46 perrdetail *err_ret, int *flags)
Christian Heimes4d6ec852008-03-26 22:34:47 +000047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 struct tok_state *tok;
49 int exec_input = start == file_input;
Guido van Rossumbd0389d1994-08-29 12:25:45 +000050
Victor Stinner7f2fee32011-04-05 00:39:01 +020051 if (initerr(err_ret, filename) < 0)
52 return NULL;
Guido van Rossumbd0389d1994-08-29 12:25:45 +000053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 if (*flags & PyPARSE_IGNORE_COOKIE)
55 tok = PyTokenizer_FromUTF8(s, exec_input);
56 else
57 tok = PyTokenizer_FromString(s, exec_input);
58 if (tok == NULL) {
59 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
60 return NULL;
61 }
Guido van Rossumbd0389d1994-08-29 12:25:45 +000062
Victor Stinner7f2fee32011-04-05 00:39:01 +020063#ifndef PGEN
64 Py_INCREF(err_ret->filename);
65 tok->filename = err_ret->filename;
66#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 return parsetok(tok, g, start, err_ret, flags);
Guido van Rossum3f5da241990-12-20 15:06:42 +000068}
69
Victor Stinner14e461d2013-08-26 22:28:21 +020070node *
71PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
72 grammar *g, int start,
73 perrdetail *err_ret, int *flags)
74{
75 node *n;
76 PyObject *filename = NULL;
77#ifndef PGEN
78 if (filename_str != NULL) {
79 filename = PyUnicode_DecodeFSDefault(filename_str);
80 if (filename == NULL) {
81 err_ret->error = E_ERROR;
82 return NULL;
83 }
84 }
85#endif
86 n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
87#ifndef PGEN
88 Py_XDECREF(filename);
89#endif
90 return n;
91}
92
Guido van Rossum3f5da241990-12-20 15:06:42 +000093/* Parse input coming from a file. Return error code, print some errors. */
94
Guido van Rossumbd0389d1994-08-29 12:25:45 +000095node *
Martin v. Löwis95292d62002-12-11 14:04:59 +000096PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 char *ps1, char *ps2, perrdetail *err_ret)
Guido van Rossum3f5da241990-12-20 15:06:42 +000098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 return PyParser_ParseFileFlags(fp, filename, NULL,
100 g, start, ps1, ps2, err_ret, 0);
Tim Petersfe2127d2001-07-16 05:37:24 +0000101}
102
103node *
Christian Heimes4d6ec852008-03-26 22:34:47 +0000104PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 grammar *g, int start,
106 char *ps1, char *ps2, perrdetail *err_ret, int flags)
Tim Petersfe2127d2001-07-16 05:37:24 +0000107{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 int iflags = flags;
109 return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
110 ps2, err_ret, &iflags);
Christian Heimes4d6ec852008-03-26 22:34:47 +0000111}
112
113node *
Victor Stinner14e461d2013-08-26 22:28:21 +0200114PyParser_ParseFileObject(FILE *fp, PyObject *filename,
115 const char *enc, grammar *g, int start,
116 char *ps1, char *ps2, perrdetail *err_ret,
117 int *flags)
Christian Heimes4d6ec852008-03-26 22:34:47 +0000118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 struct tok_state *tok;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000120
Victor Stinner7f2fee32011-04-05 00:39:01 +0200121 if (initerr(err_ret, filename) < 0)
122 return NULL;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
125 err_ret->error = E_NOMEM;
126 return NULL;
127 }
Victor Stinner7f2fee32011-04-05 00:39:01 +0200128#ifndef PGEN
129 Py_INCREF(err_ret->filename);
130 tok->filename = err_ret->filename;
131#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 return parsetok(tok, g, start, err_ret, flags);
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000133}
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134
Victor Stinner14e461d2013-08-26 22:28:21 +0200135node *
136PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
137 const char *enc, grammar *g, int start,
138 char *ps1, char *ps2, perrdetail *err_ret, int *flags)
139{
140 node *n;
141 PyObject *fileobj = NULL;
142#ifndef PGEN
143 if (filename != NULL) {
144 fileobj = PyUnicode_DecodeFSDefault(filename);
145 if (fileobj == NULL) {
146 err_ret->error = E_ERROR;
147 return NULL;
148 }
149 }
150#endif
151 n = PyParser_ParseFileObject(fp, fileobj, enc, g,
152 start, ps1, ps2, err_ret, flags);
153#ifndef PGEN
154 Py_XDECREF(fileobj);
155#endif
156 return n;
157}
158
Neal Norwitze4993c72006-03-16 06:01:25 +0000159#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Brett Cannone3944a52009-04-01 05:08:41 +0000160#if 0
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000161static char with_msg[] =
162"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
163
164static char as_msg[] =
165"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
166
167static void
168warn(const char *msg, const char *filename, int lineno)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 if (filename == NULL)
171 filename = "<string>";
172 PySys_WriteStderr(msg, filename, lineno);
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000173}
Neal Norwitzfc85c922006-03-17 05:44:46 +0000174#endif
Brett Cannone3944a52009-04-01 05:08:41 +0000175#endif
Guido van Rossumda62ecc2001-07-17 16:53:11 +0000176
Thomas Wouters89f507f2006-12-13 04:49:30 +0000177/* Parse input coming from the given tokenizer structure.
178 Return error code. */
179
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000180static node *
Tim Petersfe2127d2001-07-16 05:37:24 +0000181parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 int *flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 parser_state *ps;
185 node *n;
Brett Cannonb94767f2011-02-22 20:15:44 +0000186 int started = 0;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 if ((ps = PyParser_New(g, start)) == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 err_ret->error = E_NOMEM;
190 PyTokenizer_Free(tok);
191 return NULL;
192 }
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000193#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 if (*flags & PyPARSE_BARRY_AS_BDFL)
195 ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
Neil Schemenauerc24ea082002-03-22 23:53:36 +0000196#endif
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 for (;;) {
199 char *a, *b;
200 int type;
201 size_t len;
202 char *str;
203 int col_offset;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000205 type = PyTokenizer_Get(tok, &a, &b);
206 if (type == ERRORTOKEN) {
207 err_ret->error = tok->done;
208 break;
209 }
210 if (type == ENDMARKER && started) {
211 type = NEWLINE; /* Add an extra newline */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 started = 0;
213 /* Add the right number of dedent tokens,
214 except if a certain flag is given --
215 codeop.py uses this. */
216 if (tok->indent &&
217 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
218 {
219 tok->pendin = -tok->indent;
220 tok->indent = 0;
221 }
222 }
223 else
224 started = 1;
225 len = b - a; /* XXX this may compute NULL - NULL */
226 str = (char *) PyObject_MALLOC(len + 1);
227 if (str == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 err_ret->error = E_NOMEM;
229 break;
230 }
231 if (len > 0)
232 strncpy(str, a, len);
233 str[len] = '\0';
Guido van Rossumda62ecc2001-07-17 16:53:11 +0000234
Thomas Wouters34aa7ba2006-02-28 19:02:24 +0000235#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (type == NOTEQUAL) {
237 if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
238 strcmp(str, "!=")) {
Antoine Pitrou9ec25932011-11-13 01:01:23 +0100239 PyObject_FREE(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000240 err_ret->error = E_SYNTAX;
241 break;
242 }
243 else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
244 strcmp(str, "<>")) {
Antoine Pitrou9ec25932011-11-13 01:01:23 +0100245 PyObject_FREE(str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 err_ret->text = "with Barry as BDFL, use '<>' "
247 "instead of '!='";
248 err_ret->error = E_SYNTAX;
249 break;
250 }
251 }
Neil Schemenauerc24ea082002-03-22 23:53:36 +0000252#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 if (a >= tok->line_start)
254 col_offset = a - tok->line_start;
255 else
256 col_offset = -1;
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 if ((err_ret->error =
259 PyParser_AddToken(ps, (int)type, str,
260 tok->lineno, col_offset,
261 &(err_ret->expected))) != E_OK) {
262 if (err_ret->error != E_DONE) {
263 PyObject_FREE(str);
264 err_ret->token = type;
265 }
266 break;
267 }
268 }
269
270 if (err_ret->error == E_DONE) {
271 n = ps->p_tree;
272 ps->p_tree = NULL;
Meador Ingefa21bf02012-01-19 01:08:41 -0600273
Benjamin Peterson79c1f962012-01-19 08:48:11 -0500274#ifndef PGEN
Meador Ingefa21bf02012-01-19 01:08:41 -0600275 /* Check that the source for a single input statement really
276 is a single statement by looking at what is left in the
277 buffer after parsing. Trailing whitespace and comments
278 are OK. */
279 if (start == single_input) {
280 char *cur = tok->cur;
281 char c = *tok->cur;
282
Benjamin Petersoncff92372012-01-19 17:46:13 -0500283 for (;;) {
284 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
285 c = *++cur;
Meador Ingefa21bf02012-01-19 01:08:41 -0600286
Benjamin Petersoncff92372012-01-19 17:46:13 -0500287 if (!c)
288 break;
289
290 if (c != '#') {
291 err_ret->error = E_BADSINGLE;
292 PyNode_Free(n);
293 n = NULL;
294 break;
295 }
296
297 /* Suck up comment. */
298 while (c && c != '\n')
299 c = *++cur;
Meador Ingefa21bf02012-01-19 01:08:41 -0600300 }
301 }
Benjamin Peterson79c1f962012-01-19 08:48:11 -0500302#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 }
304 else
305 n = NULL;
Christian Heimesb1b3efc2008-03-26 23:24:27 +0000306
Christian Heimes4d6ec852008-03-26 22:34:47 +0000307#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 *flags = ps->p_flags;
Christian Heimes4d6ec852008-03-26 22:34:47 +0000309#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 PyParser_Delete(ps);
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 if (n == NULL) {
Benjamin Peterson758888d2011-05-30 11:12:38 -0500313 if (tok->done == E_EOF)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 err_ret->error = E_EOF;
315 err_ret->lineno = tok->lineno;
316 if (tok->buf != NULL) {
317 size_t len;
318 assert(tok->cur - tok->buf < INT_MAX);
319 err_ret->offset = (int)(tok->cur - tok->buf);
320 len = tok->inp - tok->buf;
321 err_ret->text = (char *) PyObject_MALLOC(len + 1);
322 if (err_ret->text != NULL) {
323 if (len > 0)
324 strncpy(err_ret->text, tok->buf, len);
325 err_ret->text[len] = '\0';
326 }
327 }
328 } else if (tok->encoding != NULL) {
329 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
330 * allocated using PyMem_
331 */
332 node* r = PyNode_New(encoding_decl);
333 if (r)
334 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
335 if (!r || !r->n_str) {
336 err_ret->error = E_NOMEM;
337 if (r)
338 PyObject_FREE(r);
339 n = NULL;
340 goto done;
341 }
342 strcpy(r->n_str, tok->encoding);
343 PyMem_FREE(tok->encoding);
344 tok->encoding = NULL;
345 r->n_nchildren = 1;
346 r->n_child = n;
347 n = r;
348 }
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000349
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000350done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 PyTokenizer_Free(tok);
Guido van Rossumbd0389d1994-08-29 12:25:45 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 return n;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000354}
Guido van Rossum0c156a52001-10-20 14:27:56 +0000355
Victor Stinner7f2fee32011-04-05 00:39:01 +0200356static int
Victor Stinner14e461d2013-08-26 22:28:21 +0200357initerr(perrdetail *err_ret, PyObject *filename)
Guido van Rossum0c156a52001-10-20 14:27:56 +0000358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 err_ret->error = E_OK;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 err_ret->lineno = 0;
361 err_ret->offset = 0;
362 err_ret->text = NULL;
363 err_ret->token = -1;
364 err_ret->expected = -1;
Victor Stinner7f2fee32011-04-05 00:39:01 +0200365#ifndef PGEN
Victor Stinner14e461d2013-08-26 22:28:21 +0200366 if (filename) {
367 Py_INCREF(filename);
368 err_ret->filename = filename;
369 }
370 else {
Victor Stinner7f2fee32011-04-05 00:39:01 +0200371 err_ret->filename = PyUnicode_FromString("<string>");
Victor Stinner14e461d2013-08-26 22:28:21 +0200372 if (err_ret->filename == NULL) {
373 err_ret->error = E_ERROR;
374 return -1;
375 }
Victor Stinner7f2fee32011-04-05 00:39:01 +0200376 }
377#endif
378 return 0;
Guido van Rossum0c156a52001-10-20 14:27:56 +0000379}