Guido van Rossum | f70e43a | 1991-02-19 12:39:46 +0000 | [diff] [blame] | 1 | |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2 | /* Parser-tokenizer link implementation */ |
| 3 | |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 4 | #include "pgenheaders.h" |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 5 | #include "tokenizer.h" |
| 6 | #include "node.h" |
| 7 | #include "grammar.h" |
| 8 | #include "parser.h" |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 9 | #include "parsetok.h" |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 10 | #include "errcode.h" |
Martin v. Löwis | 00f1e3f | 2002-08-04 17:29:52 +0000 | [diff] [blame] | 11 | #include "graminit.h" |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 12 | |
Guido van Rossum | 6135df6 | 1998-04-10 19:35:06 +0000 | [diff] [blame] | 13 | int Py_TabcheckFlag; |
| 14 | |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 15 | |
| 16 | /* Forward */ |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 17 | static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int); |
Martin v. Löwis | 95292d6 | 2002-12-11 14:04:59 +0000 | [diff] [blame] | 18 | static void initerr(perrdetail *err_ret, const char* filename); |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 19 | |
| 20 | /* Parse input coming from a string. Return error code, print some errors. */ |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 21 | node * |
Martin v. Löwis | 95292d6 | 2002-12-11 14:04:59 +0000 | [diff] [blame] | 22 | PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 23 | { |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 24 | return PyParser_ParseStringFlags(s, g, start, err_ret, 0); |
| 25 | } |
| 26 | |
| 27 | node * |
Martin v. Löwis | 95292d6 | 2002-12-11 14:04:59 +0000 | [diff] [blame] | 28 | PyParser_ParseStringFlags(const char *s, grammar *g, int start, |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 29 | perrdetail *err_ret, int flags) |
| 30 | { |
Thomas Heller | 6b17abf | 2002-07-09 09:23:27 +0000 | [diff] [blame] | 31 | return PyParser_ParseStringFlagsFilename(s, NULL, |
| 32 | g, start, err_ret, 0); |
| 33 | } |
| 34 | |
| 35 | node * |
Martin v. Löwis | 95292d6 | 2002-12-11 14:04:59 +0000 | [diff] [blame] | 36 | PyParser_ParseStringFlagsFilename(const char *s, const char *filename, |
Thomas Heller | 6b17abf | 2002-07-09 09:23:27 +0000 | [diff] [blame] | 37 | grammar *g, int start, |
| 38 | perrdetail *err_ret, int flags) |
| 39 | { |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 40 | struct tok_state *tok; |
| 41 | |
Thomas Heller | 6b17abf | 2002-07-09 09:23:27 +0000 | [diff] [blame] | 42 | initerr(err_ret, filename); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 43 | |
Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 44 | if ((tok = PyTokenizer_FromString(s)) == NULL) { |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 45 | err_ret->error = E_NOMEM; |
| 46 | return NULL; |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 47 | } |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 48 | |
Martin v. Löwis | 00f1e3f | 2002-08-04 17:29:52 +0000 | [diff] [blame] | 49 | tok->filename = filename ? filename : "<string>"; |
Guido van Rossum | 89ce454 | 1998-12-21 18:32:40 +0000 | [diff] [blame] | 50 | if (Py_TabcheckFlag || Py_VerboseFlag) { |
Guido van Rossum | 89ce454 | 1998-12-21 18:32:40 +0000 | [diff] [blame] | 51 | tok->altwarning = (tok->filename != NULL); |
| 52 | if (Py_TabcheckFlag >= 2) |
| 53 | tok->alterror++; |
| 54 | } |
| 55 | |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 56 | return parsetok(tok, g, start, err_ret, flags); |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 57 | } |
| 58 | |
| 59 | |
| 60 | /* Parse input coming from a file. Return error code, print some errors. */ |
| 61 | |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 62 | node * |
Martin v. Löwis | 95292d6 | 2002-12-11 14:04:59 +0000 | [diff] [blame] | 63 | PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, |
Thomas Wouters | 23c9e00 | 2000-07-22 19:20:54 +0000 | [diff] [blame] | 64 | char *ps1, char *ps2, perrdetail *err_ret) |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 65 | { |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 66 | return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, |
| 67 | err_ret, 0); |
| 68 | } |
| 69 | |
| 70 | node * |
Martin v. Löwis | 95292d6 | 2002-12-11 14:04:59 +0000 | [diff] [blame] | 71 | PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 72 | char *ps1, char *ps2, perrdetail *err_ret, int flags) |
| 73 | { |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 74 | struct tok_state *tok; |
| 75 | |
Martin v. Löwis | 16eff6f | 2002-01-05 21:40:08 +0000 | [diff] [blame] | 76 | initerr(err_ret, filename); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 77 | |
Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 78 | if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 79 | err_ret->error = E_NOMEM; |
| 80 | return NULL; |
Guido van Rossum | 3f5da24 | 1990-12-20 15:06:42 +0000 | [diff] [blame] | 81 | } |
Martin v. Löwis | 00f1e3f | 2002-08-04 17:29:52 +0000 | [diff] [blame] | 82 | tok->filename = filename; |
Guido van Rossum | 6135df6 | 1998-04-10 19:35:06 +0000 | [diff] [blame] | 83 | if (Py_TabcheckFlag || Py_VerboseFlag) { |
Guido van Rossum | 6135df6 | 1998-04-10 19:35:06 +0000 | [diff] [blame] | 84 | tok->altwarning = (filename != NULL); |
| 85 | if (Py_TabcheckFlag >= 2) |
| 86 | tok->alterror++; |
| 87 | } |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 88 | |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 89 | |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 90 | return parsetok(tok, g, start, err_ret, flags); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 91 | } |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 92 | |
| 93 | /* Parse input coming from the given tokenizer structure. |
| 94 | Return error code. */ |
| 95 | |
Neil Schemenauer | c24ea08 | 2002-03-22 23:53:36 +0000 | [diff] [blame] | 96 | #if 0 /* future keyword */ |
Guido van Rossum | da62ecc | 2001-07-17 16:53:11 +0000 | [diff] [blame] | 97 | static char yield_msg[] = |
| 98 | "%s:%d: Warning: 'yield' will become a reserved keyword in the future\n"; |
Neil Schemenauer | c24ea08 | 2002-03-22 23:53:36 +0000 | [diff] [blame] | 99 | #endif |
Guido van Rossum | da62ecc | 2001-07-17 16:53:11 +0000 | [diff] [blame] | 100 | |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 101 | static node * |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 102 | parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, |
| 103 | int flags) |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 104 | { |
| 105 | parser_state *ps; |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 106 | node *n; |
Guido van Rossum | d8b1d37 | 1992-03-04 16:40:44 +0000 | [diff] [blame] | 107 | int started = 0; |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 108 | |
Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 109 | if ((ps = PyParser_New(g, start)) == NULL) { |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 110 | fprintf(stderr, "no mem for new parser\n"); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 111 | err_ret->error = E_NOMEM; |
| 112 | return NULL; |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 113 | } |
Neil Schemenauer | c24ea08 | 2002-03-22 23:53:36 +0000 | [diff] [blame] | 114 | #if 0 /* future keyword */ |
Tim Peters | fe2127d | 2001-07-16 05:37:24 +0000 | [diff] [blame] | 115 | if (flags & PyPARSE_YIELD_IS_KEYWORD) |
| 116 | ps->p_generators = 1; |
Neil Schemenauer | c24ea08 | 2002-03-22 23:53:36 +0000 | [diff] [blame] | 117 | #endif |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 118 | |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 119 | for (;;) { |
| 120 | char *a, *b; |
| 121 | int type; |
Guido van Rossum | 6da3434 | 2000-06-28 22:00:02 +0000 | [diff] [blame] | 122 | size_t len; |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 123 | char *str; |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 124 | |
Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 125 | type = PyTokenizer_Get(tok, &a, &b); |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 126 | if (type == ERRORTOKEN) { |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 127 | err_ret->error = tok->done; |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 128 | break; |
| 129 | } |
Guido van Rossum | d8b1d37 | 1992-03-04 16:40:44 +0000 | [diff] [blame] | 130 | if (type == ENDMARKER && started) { |
| 131 | type = NEWLINE; /* Add an extra newline */ |
| 132 | started = 0; |
Guido van Rossum | 4b499dd3 | 2003-02-13 22:07:59 +0000 | [diff] [blame] | 133 | /* Add the right number of dedent tokens, |
| 134 | except if a certain flag is given -- |
| 135 | codeop.py uses this. */ |
| 136 | if (tok->indent && |
| 137 | !(flags & PyPARSE_DONT_IMPLY_DEDENT)) |
| 138 | { |
| 139 | tok->pendin = -tok->indent; |
| 140 | tok->indent = 0; |
| 141 | } |
Guido van Rossum | d8b1d37 | 1992-03-04 16:40:44 +0000 | [diff] [blame] | 142 | } |
| 143 | else |
| 144 | started = 1; |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 145 | len = b - a; /* XXX this may compute NULL - NULL */ |
Andrew MacIntyre | 80d4e2a | 2002-08-04 06:28:21 +0000 | [diff] [blame] | 146 | str = (char *) PyObject_MALLOC(len + 1); |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 147 | if (str == NULL) { |
| 148 | fprintf(stderr, "no mem for next token\n"); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 149 | err_ret->error = E_NOMEM; |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 150 | break; |
| 151 | } |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 152 | if (len > 0) |
| 153 | strncpy(str, a, len); |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 154 | str[len] = '\0'; |
Guido van Rossum | da62ecc | 2001-07-17 16:53:11 +0000 | [diff] [blame] | 155 | |
Neil Schemenauer | c24ea08 | 2002-03-22 23:53:36 +0000 | [diff] [blame] | 156 | #if 0 /* future keyword */ |
Guido van Rossum | da62ecc | 2001-07-17 16:53:11 +0000 | [diff] [blame] | 157 | /* Warn about yield as NAME */ |
| 158 | if (type == NAME && !ps->p_generators && |
| 159 | len == 5 && str[0] == 'y' && strcmp(str, "yield") == 0) |
| 160 | PySys_WriteStderr(yield_msg, |
| 161 | err_ret->filename==NULL ? |
| 162 | "<string>" : err_ret->filename, |
| 163 | tok->lineno); |
Neil Schemenauer | c24ea08 | 2002-03-22 23:53:36 +0000 | [diff] [blame] | 164 | #endif |
Guido van Rossum | da62ecc | 2001-07-17 16:53:11 +0000 | [diff] [blame] | 165 | |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 166 | if ((err_ret->error = |
Fred Drake | 85f3639 | 2000-07-11 17:53:00 +0000 | [diff] [blame] | 167 | PyParser_AddToken(ps, (int)type, str, tok->lineno, |
| 168 | &(err_ret->expected))) != E_OK) { |
Guido van Rossum | ff0ec52 | 1997-07-27 01:52:50 +0000 | [diff] [blame] | 169 | if (err_ret->error != E_DONE) |
Andrew MacIntyre | 80d4e2a | 2002-08-04 06:28:21 +0000 | [diff] [blame] | 170 | PyObject_FREE(str); |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 171 | break; |
Guido van Rossum | ff0ec52 | 1997-07-27 01:52:50 +0000 | [diff] [blame] | 172 | } |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 173 | } |
| 174 | |
| 175 | if (err_ret->error == E_DONE) { |
| 176 | n = ps->p_tree; |
| 177 | ps->p_tree = NULL; |
| 178 | } |
| 179 | else |
| 180 | n = NULL; |
| 181 | |
Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 182 | PyParser_Delete(ps); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 183 | |
| 184 | if (n == NULL) { |
| 185 | if (tok->lineno <= 1 && tok->done == E_EOF) |
| 186 | err_ret->error = E_EOF; |
| 187 | err_ret->lineno = tok->lineno; |
| 188 | err_ret->offset = tok->cur - tok->buf; |
| 189 | if (tok->buf != NULL) { |
Guido van Rossum | 6da3434 | 2000-06-28 22:00:02 +0000 | [diff] [blame] | 190 | size_t len = tok->inp - tok->buf; |
Andrew MacIntyre | 80d4e2a | 2002-08-04 06:28:21 +0000 | [diff] [blame] | 191 | err_ret->text = (char *) PyObject_MALLOC(len + 1); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 192 | if (err_ret->text != NULL) { |
Guido van Rossum | ec49827 | 1995-01-20 16:59:12 +0000 | [diff] [blame] | 193 | if (len > 0) |
| 194 | strncpy(err_ret->text, tok->buf, len); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 195 | err_ret->text[len] = '\0'; |
| 196 | } |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 197 | } |
Martin v. Löwis | 00f1e3f | 2002-08-04 17:29:52 +0000 | [diff] [blame] | 198 | } else if (tok->encoding != NULL) { |
| 199 | node* r = PyNode_New(encoding_decl); |
| 200 | r->n_str = tok->encoding; |
| 201 | r->n_nchildren = 1; |
| 202 | r->n_child = n; |
| 203 | tok->encoding = NULL; |
| 204 | n = r; |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 205 | } |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 206 | |
Guido van Rossum | 86bea46 | 1997-04-29 21:03:06 +0000 | [diff] [blame] | 207 | PyTokenizer_Free(tok); |
Guido van Rossum | bd0389d | 1994-08-29 12:25:45 +0000 | [diff] [blame] | 208 | |
| 209 | return n; |
Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 210 | } |
Guido van Rossum | 0c156a5 | 2001-10-20 14:27:56 +0000 | [diff] [blame] | 211 | |
| 212 | static void |
Martin v. Löwis | 95292d6 | 2002-12-11 14:04:59 +0000 | [diff] [blame] | 213 | initerr(perrdetail *err_ret, const char* filename) |
Guido van Rossum | 0c156a5 | 2001-10-20 14:27:56 +0000 | [diff] [blame] | 214 | { |
| 215 | err_ret->error = E_OK; |
Martin v. Löwis | 16eff6f | 2002-01-05 21:40:08 +0000 | [diff] [blame] | 216 | err_ret->filename = filename; |
Guido van Rossum | 0c156a5 | 2001-10-20 14:27:56 +0000 | [diff] [blame] | 217 | err_ret->lineno = 0; |
| 218 | err_ret->offset = 0; |
| 219 | err_ret->text = NULL; |
| 220 | err_ret->token = -1; |
| 221 | err_ret->expected = -1; |
| 222 | } |