| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 1 | #  Copyright (c) 1998-2002 John Aycock | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 2 | # | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 3 | #  Permission is hereby granted, free of charge, to any person obtaining | 
 | 4 | #  a copy of this software and associated documentation files (the | 
 | 5 | #  "Software"), to deal in the Software without restriction, including | 
 | 6 | #  without limitation the rights to use, copy, modify, merge, publish, | 
 | 7 | #  distribute, sublicense, and/or sell copies of the Software, and to | 
 | 8 | #  permit persons to whom the Software is furnished to do so, subject to | 
 | 9 | #  the following conditions: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 10 | # | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 11 | #  The above copyright notice and this permission notice shall be | 
 | 12 | #  included in all copies or substantial portions of the Software. | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 13 | # | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 14 | #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
 | 15 | #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
 | 16 | #  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | 
 | 17 | #  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | 
 | 18 | #  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | 
 | 19 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | 
 | 20 | #  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | 
 | 21 |  | 
 | 22 | __version__ = 'SPARK-0.7 (pre-alpha-5)' | 
 | 23 |  | 
 | 24 | import re | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 25 |  | 
| Ezio Melotti | 42da663 | 2011-03-15 05:18:48 +0200 | [diff] [blame] | 26 | # Compatibility with older pythons. | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 27 | def output(string='', end='\n'): | 
 | 28 |     sys.stdout.write(string + end) | 
 | 29 |  | 
 | 30 | try: | 
 | 31 |     sorted | 
 | 32 | except NameError: | 
 | 33 |     def sorted(seq): | 
 | 34 |         seq2 = seq[:] | 
 | 35 |         seq2.sort() | 
 | 36 |         return seq2 | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 37 |  | 
 | 38 | def _namelist(instance): | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 39 |     namelist, namedict, classlist = [], {}, [instance.__class__] | 
 | 40 |     for c in classlist: | 
 | 41 |         for b in c.__bases__: | 
 | 42 |             classlist.append(b) | 
 | 43 |         for name in c.__dict__.keys(): | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 44 |             if name not in namedict: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 45 |                 namelist.append(name) | 
 | 46 |                 namedict[name] = 1 | 
 | 47 |     return namelist | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 48 |  | 
 | 49 | class GenericScanner: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 50 |     def __init__(self, flags=0): | 
 | 51 |         pattern = self.reflect() | 
 | 52 |         self.re = re.compile(pattern, re.VERBOSE|flags) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 53 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 54 |         self.index2func = {} | 
 | 55 |         for name, number in self.re.groupindex.items(): | 
 | 56 |             self.index2func[number-1] = getattr(self, 't_' + name) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 57 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 58 |     def makeRE(self, name): | 
 | 59 |         doc = getattr(self, name).__doc__ | 
 | 60 |         rv = '(?P<%s>%s)' % (name[2:], doc) | 
 | 61 |         return rv | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 62 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 63 |     def reflect(self): | 
 | 64 |         rv = [] | 
 | 65 |         for name in _namelist(self): | 
 | 66 |             if name[:2] == 't_' and name != 't_default': | 
 | 67 |                 rv.append(self.makeRE(name)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 68 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 69 |         rv.append(self.makeRE('t_default')) | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 70 |         return '|'.join(rv) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 71 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 72 |     def error(self, s, pos): | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 73 |         output("Lexical error at position %s" % pos) | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 74 |         raise SystemExit | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 75 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 76 |     def tokenize(self, s): | 
 | 77 |         pos = 0 | 
 | 78 |         n = len(s) | 
 | 79 |         while pos < n: | 
 | 80 |             m = self.re.match(s, pos) | 
 | 81 |             if m is None: | 
 | 82 |                 self.error(s, pos) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 83 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 84 |             groups = m.groups() | 
 | 85 |             for i in range(len(groups)): | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 86 |                 if groups[i] and i in self.index2func: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 87 |                     self.index2func[i](groups[i]) | 
 | 88 |             pos = m.end() | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 89 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 90 |     def t_default(self, s): | 
 | 91 |         r'( . | \n )+' | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 92 |         output("Specification error: unmatched input") | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 93 |         raise SystemExit | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 94 |  | 
 | 95 | # | 
 | 96 | #  Extracted from GenericParser and made global so that [un]picking works. | 
 | 97 | # | 
 | 98 | class _State: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 99 |     def __init__(self, stateno, items): | 
 | 100 |         self.T, self.complete, self.items = [], [], items | 
 | 101 |         self.stateno = stateno | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 102 |  | 
 | 103 | class GenericParser: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 104 |     # | 
 | 105 |     #  An Earley parser, as per J. Earley, "An Efficient Context-Free | 
 | 106 |     #  Parsing Algorithm", CACM 13(2), pp. 94-102.  Also J. C. Earley, | 
 | 107 |     #  "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, | 
 | 108 |     #  Carnegie-Mellon University, August 1968.  New formulation of | 
 | 109 |     #  the parser according to J. Aycock, "Practical Earley Parsing | 
 | 110 |     #  and the SPARK Toolkit", Ph.D. thesis, University of Victoria, | 
 | 111 |     #  2001, and J. Aycock and R. N. Horspool, "Practical Earley | 
 | 112 |     #  Parsing", unpublished paper, 2001. | 
 | 113 |     # | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 114 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 115 |     def __init__(self, start): | 
 | 116 |         self.rules = {} | 
 | 117 |         self.rule2func = {} | 
 | 118 |         self.rule2name = {} | 
 | 119 |         self.collectRules() | 
 | 120 |         self.augment(start) | 
 | 121 |         self.ruleschanged = 1 | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 122 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 123 |     _NULLABLE = '\e_' | 
 | 124 |     _START = 'START' | 
 | 125 |     _BOF = '|-' | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 126 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 127 |     # | 
 | 128 |     #  When pickling, take the time to generate the full state machine; | 
 | 129 |     #  some information is then extraneous, too.  Unfortunately we | 
 | 130 |     #  can't save the rule2func map. | 
 | 131 |     # | 
 | 132 |     def __getstate__(self): | 
 | 133 |         if self.ruleschanged: | 
 | 134 |             # | 
 | 135 |             #  XXX - duplicated from parse() | 
 | 136 |             # | 
 | 137 |             self.computeNull() | 
 | 138 |             self.newrules = {} | 
 | 139 |             self.new2old = {} | 
 | 140 |             self.makeNewRules() | 
 | 141 |             self.ruleschanged = 0 | 
 | 142 |             self.edges, self.cores = {}, {} | 
 | 143 |             self.states = { 0: self.makeState0() } | 
 | 144 |             self.makeState(0, self._BOF) | 
 | 145 |         # | 
 | 146 |         #  XXX - should find a better way to do this.. | 
 | 147 |         # | 
 | 148 |         changes = 1 | 
 | 149 |         while changes: | 
 | 150 |             changes = 0 | 
 | 151 |             for k, v in self.edges.items(): | 
 | 152 |                 if v is None: | 
 | 153 |                     state, sym = k | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 154 |                     if state in self.states: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 155 |                         self.goto(state, sym) | 
 | 156 |                         changes = 1 | 
 | 157 |         rv = self.__dict__.copy() | 
 | 158 |         for s in self.states.values(): | 
 | 159 |             del s.items | 
 | 160 |         del rv['rule2func'] | 
 | 161 |         del rv['nullable'] | 
 | 162 |         del rv['cores'] | 
 | 163 |         return rv | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 164 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 165 |     def __setstate__(self, D): | 
 | 166 |         self.rules = {} | 
 | 167 |         self.rule2func = {} | 
 | 168 |         self.rule2name = {} | 
 | 169 |         self.collectRules() | 
 | 170 |         start = D['rules'][self._START][0][1][1]        # Blech. | 
 | 171 |         self.augment(start) | 
 | 172 |         D['rule2func'] = self.rule2func | 
 | 173 |         D['makeSet'] = self.makeSet_fast | 
 | 174 |         self.__dict__ = D | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 175 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 176 |     # | 
 | 177 |     #  A hook for GenericASTBuilder and GenericASTMatcher.  Mess | 
 | 178 |     #  thee not with this; nor shall thee toucheth the _preprocess | 
 | 179 |     #  argument to addRule. | 
 | 180 |     # | 
 | 181 |     def preprocess(self, rule, func):       return rule, func | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 182 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 183 |     def addRule(self, doc, func, _preprocess=1): | 
 | 184 |         fn = func | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 185 |         rules = doc.split() | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 186 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 187 |         index = [] | 
 | 188 |         for i in range(len(rules)): | 
 | 189 |             if rules[i] == '::=': | 
 | 190 |                 index.append(i-1) | 
 | 191 |         index.append(len(rules)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 192 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 193 |         for i in range(len(index)-1): | 
 | 194 |             lhs = rules[index[i]] | 
 | 195 |             rhs = rules[index[i]+2:index[i+1]] | 
 | 196 |             rule = (lhs, tuple(rhs)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 197 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 198 |             if _preprocess: | 
 | 199 |                 rule, fn = self.preprocess(rule, func) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 200 |  | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 201 |             if lhs in self.rules: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 202 |                 self.rules[lhs].append(rule) | 
 | 203 |             else: | 
 | 204 |                 self.rules[lhs] = [ rule ] | 
 | 205 |             self.rule2func[rule] = fn | 
 | 206 |             self.rule2name[rule] = func.__name__[2:] | 
 | 207 |         self.ruleschanged = 1 | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 208 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 209 |     def collectRules(self): | 
 | 210 |         for name in _namelist(self): | 
 | 211 |             if name[:2] == 'p_': | 
 | 212 |                 func = getattr(self, name) | 
 | 213 |                 doc = func.__doc__ | 
 | 214 |                 self.addRule(doc, func) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 215 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 216 |     def augment(self, start): | 
 | 217 |         rule = '%s ::= %s %s' % (self._START, self._BOF, start) | 
 | 218 |         self.addRule(rule, lambda args: args[1], 0) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 219 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 220 |     def computeNull(self): | 
 | 221 |         self.nullable = {} | 
 | 222 |         tbd = [] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 223 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 224 |         for rulelist in self.rules.values(): | 
 | 225 |             lhs = rulelist[0][0] | 
 | 226 |             self.nullable[lhs] = 0 | 
 | 227 |             for rule in rulelist: | 
 | 228 |                 rhs = rule[1] | 
 | 229 |                 if len(rhs) == 0: | 
 | 230 |                     self.nullable[lhs] = 1 | 
 | 231 |                     continue | 
 | 232 |                 # | 
 | 233 |                 #  We only need to consider rules which | 
 | 234 |                 #  consist entirely of nonterminal symbols. | 
 | 235 |                 #  This should be a savings on typical | 
 | 236 |                 #  grammars. | 
 | 237 |                 # | 
 | 238 |                 for sym in rhs: | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 239 |                     if sym not in self.rules: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 240 |                         break | 
 | 241 |                 else: | 
 | 242 |                     tbd.append(rule) | 
 | 243 |         changes = 1 | 
 | 244 |         while changes: | 
 | 245 |             changes = 0 | 
 | 246 |             for lhs, rhs in tbd: | 
 | 247 |                 if self.nullable[lhs]: | 
 | 248 |                     continue | 
 | 249 |                 for sym in rhs: | 
 | 250 |                     if not self.nullable[sym]: | 
 | 251 |                         break | 
 | 252 |                 else: | 
 | 253 |                     self.nullable[lhs] = 1 | 
 | 254 |                     changes = 1 | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 255 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 256 |     def makeState0(self): | 
 | 257 |         s0 = _State(0, []) | 
 | 258 |         for rule in self.newrules[self._START]: | 
 | 259 |             s0.items.append((rule, 0)) | 
 | 260 |         return s0 | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 261 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 262 |     def finalState(self, tokens): | 
 | 263 |         # | 
 | 264 |         #  Yuck. | 
 | 265 |         # | 
 | 266 |         if len(self.newrules[self._START]) == 2 and len(tokens) == 0: | 
 | 267 |             return 1 | 
 | 268 |         start = self.rules[self._START][0][1][1] | 
 | 269 |         return self.goto(1, start) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 270 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 271 |     def makeNewRules(self): | 
 | 272 |         worklist = [] | 
 | 273 |         for rulelist in self.rules.values(): | 
 | 274 |             for rule in rulelist: | 
 | 275 |                 worklist.append((rule, 0, 1, rule)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 276 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 277 |         for rule, i, candidate, oldrule in worklist: | 
 | 278 |             lhs, rhs = rule | 
 | 279 |             n = len(rhs) | 
 | 280 |             while i < n: | 
 | 281 |                 sym = rhs[i] | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 282 |                 if sym not in self.rules or \ | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 283 |                    not self.nullable[sym]: | 
 | 284 |                     candidate = 0 | 
 | 285 |                     i = i + 1 | 
 | 286 |                     continue | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 287 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 288 |                 newrhs = list(rhs) | 
 | 289 |                 newrhs[i] = self._NULLABLE+sym | 
 | 290 |                 newrule = (lhs, tuple(newrhs)) | 
 | 291 |                 worklist.append((newrule, i+1, | 
 | 292 |                                  candidate, oldrule)) | 
 | 293 |                 candidate = 0 | 
 | 294 |                 i = i + 1 | 
 | 295 |             else: | 
 | 296 |                 if candidate: | 
 | 297 |                     lhs = self._NULLABLE+lhs | 
 | 298 |                     rule = (lhs, rhs) | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 299 |                 if lhs in self.newrules: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 300 |                     self.newrules[lhs].append(rule) | 
 | 301 |                 else: | 
 | 302 |                     self.newrules[lhs] = [ rule ] | 
 | 303 |                 self.new2old[rule] = oldrule | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 304 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 305 |     def typestring(self, token): | 
 | 306 |         return None | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 307 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 308 |     def error(self, token): | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 309 |         output("Syntax error at or near `%s' token" % token) | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 310 |         raise SystemExit | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 311 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 312 |     def parse(self, tokens): | 
 | 313 |         sets = [ [(1,0), (2,0)] ] | 
 | 314 |         self.links = {} | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 315 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 316 |         if self.ruleschanged: | 
 | 317 |             self.computeNull() | 
 | 318 |             self.newrules = {} | 
 | 319 |             self.new2old = {} | 
 | 320 |             self.makeNewRules() | 
 | 321 |             self.ruleschanged = 0 | 
 | 322 |             self.edges, self.cores = {}, {} | 
 | 323 |             self.states = { 0: self.makeState0() } | 
 | 324 |             self.makeState(0, self._BOF) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 325 |  | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 326 |         for i in range(len(tokens)): | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 327 |             sets.append([]) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 328 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 329 |             if sets[i] == []: | 
 | 330 |                 break | 
 | 331 |             self.makeSet(tokens[i], sets, i) | 
 | 332 |         else: | 
 | 333 |             sets.append([]) | 
 | 334 |             self.makeSet(None, sets, len(tokens)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 335 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 336 |         #_dump(tokens, sets, self.states) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 337 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 338 |         finalitem = (self.finalState(tokens), 0) | 
 | 339 |         if finalitem not in sets[-2]: | 
 | 340 |             if len(tokens) > 0: | 
 | 341 |                 self.error(tokens[i-1]) | 
 | 342 |             else: | 
 | 343 |                 self.error(None) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 344 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 345 |         return self.buildTree(self._START, finalitem, | 
 | 346 |                               tokens, len(sets)-2) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 347 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 348 |     def isnullable(self, sym): | 
 | 349 |         # | 
 | 350 |         #  For symbols in G_e only.  If we weren't supporting 1.5, | 
 | 351 |         #  could just use sym.startswith(). | 
 | 352 |         # | 
 | 353 |         return self._NULLABLE == sym[0:len(self._NULLABLE)] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 354 |  | 
| Guido van Rossum | 1bc535d | 2007-05-15 18:46:22 +0000 | [diff] [blame] | 355 |     def skip(self, hs, pos=0): | 
 | 356 |         n = len(hs[1]) | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 357 |         while pos < n: | 
| Guido van Rossum | 1bc535d | 2007-05-15 18:46:22 +0000 | [diff] [blame] | 358 |             if not self.isnullable(hs[1][pos]): | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 359 |                 break | 
 | 360 |             pos = pos + 1 | 
 | 361 |         return pos | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 362 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 363 |     def makeState(self, state, sym): | 
 | 364 |         assert sym is not None | 
 | 365 |         # | 
 | 366 |         #  Compute \epsilon-kernel state's core and see if | 
 | 367 |         #  it exists already. | 
 | 368 |         # | 
 | 369 |         kitems = [] | 
 | 370 |         for rule, pos in self.states[state].items: | 
 | 371 |             lhs, rhs = rule | 
 | 372 |             if rhs[pos:pos+1] == (sym,): | 
 | 373 |                 kitems.append((rule, self.skip(rule, pos+1))) | 
 | 374 |         core = kitems | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 375 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 376 |         core.sort() | 
 | 377 |         tcore = tuple(core) | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 378 |         if tcore in self.cores: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 379 |             return self.cores[tcore] | 
 | 380 |         # | 
 | 381 |         #  Nope, doesn't exist.  Compute it and the associated | 
 | 382 |         #  \epsilon-nonkernel state together; we'll need it right away. | 
 | 383 |         # | 
 | 384 |         k = self.cores[tcore] = len(self.states) | 
 | 385 |         K, NK = _State(k, kitems), _State(k+1, []) | 
 | 386 |         self.states[k] = K | 
 | 387 |         predicted = {} | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 388 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 389 |         edges = self.edges | 
 | 390 |         rules = self.newrules | 
 | 391 |         for X in K, NK: | 
 | 392 |             worklist = X.items | 
 | 393 |             for item in worklist: | 
 | 394 |                 rule, pos = item | 
 | 395 |                 lhs, rhs = rule | 
 | 396 |                 if pos == len(rhs): | 
 | 397 |                     X.complete.append(rule) | 
 | 398 |                     continue | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 399 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 400 |                 nextSym = rhs[pos] | 
 | 401 |                 key = (X.stateno, nextSym) | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 402 |                 if nextSym not in rules: | 
 | 403 |                     if key not in edges: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 404 |                         edges[key] = None | 
 | 405 |                         X.T.append(nextSym) | 
 | 406 |                 else: | 
 | 407 |                     edges[key] = None | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 408 |                     if nextSym not in predicted: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 409 |                         predicted[nextSym] = 1 | 
 | 410 |                         for prule in rules[nextSym]: | 
 | 411 |                             ppos = self.skip(prule) | 
 | 412 |                             new = (prule, ppos) | 
 | 413 |                             NK.items.append(new) | 
 | 414 |             # | 
 | 415 |             #  Problem: we know K needs generating, but we | 
 | 416 |             #  don't yet know about NK.  Can't commit anything | 
 | 417 |             #  regarding NK to self.edges until we're sure.  Should | 
 | 418 |             #  we delay committing on both K and NK to avoid this | 
 | 419 |             #  hacky code?  This creates other problems.. | 
 | 420 |             # | 
 | 421 |             if X is K: | 
 | 422 |                 edges = {} | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 423 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 424 |         if NK.items == []: | 
 | 425 |             return k | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 426 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 427 |         # | 
 | 428 |         #  Check for \epsilon-nonkernel's core.  Unfortunately we | 
 | 429 |         #  need to know the entire set of predicted nonterminals | 
 | 430 |         #  to do this without accidentally duplicating states. | 
 | 431 |         # | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 432 |         core = sorted(predicted.keys()) | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 433 |         tcore = tuple(core) | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 434 |         if tcore in self.cores: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 435 |             self.edges[(k, None)] = self.cores[tcore] | 
 | 436 |             return k | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 437 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 438 |         nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno | 
 | 439 |         self.edges.update(edges) | 
 | 440 |         self.states[nk] = NK | 
 | 441 |         return k | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 442 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 443 |     def goto(self, state, sym): | 
 | 444 |         key = (state, sym) | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 445 |         if key not in self.edges: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 446 |             # | 
 | 447 |             #  No transitions from state on sym. | 
 | 448 |             # | 
 | 449 |             return None | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 450 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 451 |         rv = self.edges[key] | 
 | 452 |         if rv is None: | 
 | 453 |             # | 
 | 454 |             #  Target state isn't generated yet.  Remedy this. | 
 | 455 |             # | 
 | 456 |             rv = self.makeState(state, sym) | 
 | 457 |             self.edges[key] = rv | 
 | 458 |         return rv | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 459 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 460 |     def gotoT(self, state, t): | 
 | 461 |         return [self.goto(state, t)] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 462 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 463 |     def gotoST(self, state, st): | 
 | 464 |         rv = [] | 
 | 465 |         for t in self.states[state].T: | 
 | 466 |             if st == t: | 
 | 467 |                 rv.append(self.goto(state, t)) | 
 | 468 |         return rv | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 469 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 470 |     def add(self, set, item, i=None, predecessor=None, causal=None): | 
 | 471 |         if predecessor is None: | 
 | 472 |             if item not in set: | 
 | 473 |                 set.append(item) | 
 | 474 |         else: | 
 | 475 |             key = (item, i) | 
 | 476 |             if item not in set: | 
 | 477 |                 self.links[key] = [] | 
 | 478 |                 set.append(item) | 
 | 479 |             self.links[key].append((predecessor, causal)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 480 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 481 |     def makeSet(self, token, sets, i): | 
 | 482 |         cur, next = sets[i], sets[i+1] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 483 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 484 |         ttype = token is not None and self.typestring(token) or None | 
 | 485 |         if ttype is not None: | 
 | 486 |             fn, arg = self.gotoT, ttype | 
 | 487 |         else: | 
 | 488 |             fn, arg = self.gotoST, token | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 489 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 490 |         for item in cur: | 
 | 491 |             ptr = (item, i) | 
 | 492 |             state, parent = item | 
 | 493 |             add = fn(state, arg) | 
 | 494 |             for k in add: | 
 | 495 |                 if k is not None: | 
 | 496 |                     self.add(next, (k, parent), i+1, ptr) | 
 | 497 |                     nk = self.goto(k, None) | 
 | 498 |                     if nk is not None: | 
 | 499 |                         self.add(next, (nk, i+1)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 500 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 501 |             if parent == i: | 
 | 502 |                 continue | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 503 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 504 |             for rule in self.states[state].complete: | 
 | 505 |                 lhs, rhs = rule | 
 | 506 |                 for pitem in sets[parent]: | 
 | 507 |                     pstate, pparent = pitem | 
 | 508 |                     k = self.goto(pstate, lhs) | 
 | 509 |                     if k is not None: | 
 | 510 |                         why = (item, i, rule) | 
 | 511 |                         pptr = (pitem, parent) | 
 | 512 |                         self.add(cur, (k, pparent), | 
 | 513 |                                  i, pptr, why) | 
 | 514 |                         nk = self.goto(k, None) | 
 | 515 |                         if nk is not None: | 
 | 516 |                             self.add(cur, (nk, i)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 517 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 518 |     def makeSet_fast(self, token, sets, i): | 
 | 519 |         # | 
 | 520 |         #  Call *only* when the entire state machine has been built! | 
 | 521 |         #  It relies on self.edges being filled in completely, and | 
 | 522 |         #  then duplicates and inlines code to boost speed at the | 
 | 523 |         #  cost of extreme ugliness. | 
 | 524 |         # | 
 | 525 |         cur, next = sets[i], sets[i+1] | 
 | 526 |         ttype = token is not None and self.typestring(token) or None | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 527 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 528 |         for item in cur: | 
 | 529 |             ptr = (item, i) | 
 | 530 |             state, parent = item | 
 | 531 |             if ttype is not None: | 
 | 532 |                 k = self.edges.get((state, ttype), None) | 
 | 533 |                 if k is not None: | 
 | 534 |                     #self.add(next, (k, parent), i+1, ptr) | 
 | 535 |                     #INLINED --v | 
 | 536 |                     new = (k, parent) | 
 | 537 |                     key = (new, i+1) | 
 | 538 |                     if new not in next: | 
 | 539 |                         self.links[key] = [] | 
 | 540 |                         next.append(new) | 
 | 541 |                     self.links[key].append((ptr, None)) | 
 | 542 |                     #INLINED --^ | 
 | 543 |                     #nk = self.goto(k, None) | 
 | 544 |                     nk = self.edges.get((k, None), None) | 
 | 545 |                     if nk is not None: | 
 | 546 |                         #self.add(next, (nk, i+1)) | 
 | 547 |                         #INLINED --v | 
 | 548 |                         new = (nk, i+1) | 
 | 549 |                         if new not in next: | 
 | 550 |                             next.append(new) | 
 | 551 |                         #INLINED --^ | 
 | 552 |             else: | 
 | 553 |                 add = self.gotoST(state, token) | 
 | 554 |                 for k in add: | 
 | 555 |                     if k is not None: | 
 | 556 |                         self.add(next, (k, parent), i+1, ptr) | 
 | 557 |                         #nk = self.goto(k, None) | 
 | 558 |                         nk = self.edges.get((k, None), None) | 
 | 559 |                         if nk is not None: | 
 | 560 |                             self.add(next, (nk, i+1)) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 561 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 562 |             if parent == i: | 
 | 563 |                 continue | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 564 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 565 |             for rule in self.states[state].complete: | 
 | 566 |                 lhs, rhs = rule | 
 | 567 |                 for pitem in sets[parent]: | 
 | 568 |                     pstate, pparent = pitem | 
 | 569 |                     #k = self.goto(pstate, lhs) | 
 | 570 |                     k = self.edges.get((pstate, lhs), None) | 
 | 571 |                     if k is not None: | 
 | 572 |                         why = (item, i, rule) | 
 | 573 |                         pptr = (pitem, parent) | 
 | 574 |                         #self.add(cur, (k, pparent), | 
 | 575 |                         #        i, pptr, why) | 
 | 576 |                         #INLINED --v | 
 | 577 |                         new = (k, pparent) | 
 | 578 |                         key = (new, i) | 
 | 579 |                         if new not in cur: | 
 | 580 |                             self.links[key] = [] | 
 | 581 |                             cur.append(new) | 
 | 582 |                         self.links[key].append((pptr, why)) | 
 | 583 |                         #INLINED --^ | 
 | 584 |                         #nk = self.goto(k, None) | 
 | 585 |                         nk = self.edges.get((k, None), None) | 
 | 586 |                         if nk is not None: | 
 | 587 |                             #self.add(cur, (nk, i)) | 
 | 588 |                             #INLINED --v | 
 | 589 |                             new = (nk, i) | 
 | 590 |                             if new not in cur: | 
 | 591 |                                 cur.append(new) | 
 | 592 |                             #INLINED --^ | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 593 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 594 |     def predecessor(self, key, causal): | 
 | 595 |         for p, c in self.links[key]: | 
 | 596 |             if c == causal: | 
 | 597 |                 return p | 
 | 598 |         assert 0 | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 599 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 600 |     def causal(self, key): | 
 | 601 |         links = self.links[key] | 
 | 602 |         if len(links) == 1: | 
 | 603 |             return links[0][1] | 
 | 604 |         choices = [] | 
 | 605 |         rule2cause = {} | 
 | 606 |         for p, c in links: | 
 | 607 |             rule = c[2] | 
 | 608 |             choices.append(rule) | 
 | 609 |             rule2cause[rule] = c | 
 | 610 |         return rule2cause[self.ambiguity(choices)] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 611 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 612 |     def deriveEpsilon(self, nt): | 
 | 613 |         if len(self.newrules[nt]) > 1: | 
 | 614 |             rule = self.ambiguity(self.newrules[nt]) | 
 | 615 |         else: | 
 | 616 |             rule = self.newrules[nt][0] | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 617 |         #output(rule) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 618 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 619 |         rhs = rule[1] | 
 | 620 |         attr = [None] * len(rhs) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 621 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 622 |         for i in range(len(rhs)-1, -1, -1): | 
 | 623 |             attr[i] = self.deriveEpsilon(rhs[i]) | 
 | 624 |         return self.rule2func[self.new2old[rule]](attr) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 625 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 626 |     def buildTree(self, nt, item, tokens, k): | 
 | 627 |         state, parent = item | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 628 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 629 |         choices = [] | 
 | 630 |         for rule in self.states[state].complete: | 
 | 631 |             if rule[0] == nt: | 
 | 632 |                 choices.append(rule) | 
 | 633 |         rule = choices[0] | 
 | 634 |         if len(choices) > 1: | 
 | 635 |             rule = self.ambiguity(choices) | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 636 |         #output(rule) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 637 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 638 |         rhs = rule[1] | 
 | 639 |         attr = [None] * len(rhs) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 640 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 641 |         for i in range(len(rhs)-1, -1, -1): | 
 | 642 |             sym = rhs[i] | 
| Fred Drake | 34e4f52 | 2006-12-29 04:42:48 +0000 | [diff] [blame] | 643 |             if sym not in self.newrules: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 644 |                 if sym != self._BOF: | 
 | 645 |                     attr[i] = tokens[k-1] | 
 | 646 |                     key = (item, k) | 
 | 647 |                     item, k = self.predecessor(key, None) | 
 | 648 |             #elif self.isnullable(sym): | 
 | 649 |             elif self._NULLABLE == sym[0:len(self._NULLABLE)]: | 
 | 650 |                 attr[i] = self.deriveEpsilon(sym) | 
 | 651 |             else: | 
 | 652 |                 key = (item, k) | 
 | 653 |                 why = self.causal(key) | 
 | 654 |                 attr[i] = self.buildTree(sym, why[0], | 
 | 655 |                                          tokens, why[1]) | 
 | 656 |                 item, k = self.predecessor(key, why) | 
 | 657 |         return self.rule2func[self.new2old[rule]](attr) | 
 | 658 |  | 
 | 659 |     def ambiguity(self, rules): | 
 | 660 |         # | 
 | 661 |         #  XXX - problem here and in collectRules() if the same rule | 
 | 662 |         #        appears in >1 method.  Also undefined results if rules | 
 | 663 |         #        causing the ambiguity appear in the same method. | 
 | 664 |         # | 
 | 665 |         sortlist = [] | 
 | 666 |         name2index = {} | 
 | 667 |         for i in range(len(rules)): | 
 | 668 |             lhs, rhs = rule = rules[i] | 
 | 669 |             name = self.rule2name[self.new2old[rule]] | 
 | 670 |             sortlist.append((len(rhs), name)) | 
 | 671 |             name2index[name] = i | 
 | 672 |         sortlist.sort() | 
| Guido van Rossum | 1bc535d | 2007-05-15 18:46:22 +0000 | [diff] [blame] | 673 |         list = [b for a, b in sortlist] | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 674 |         return rules[name2index[self.resolve(list)]] | 
 | 675 |  | 
 | 676 |     def resolve(self, list): | 
 | 677 |         # | 
 | 678 |         #  Resolve ambiguity in favor of the shortest RHS. | 
 | 679 |         #  Since we walk the tree from the top down, this | 
 | 680 |         #  should effectively resolve in favor of a "shift". | 
 | 681 |         # | 
 | 682 |         return list[0] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 683 |  | 
 | 684 | # | 
 | 685 | #  GenericASTBuilder automagically constructs a concrete/abstract syntax tree | 
 | 686 | #  for a given input.  The extra argument is a class (not an instance!) | 
 | 687 | #  which supports the "__setslice__" and "__len__" methods. | 
 | 688 | # | 
 | 689 | #  XXX - silently overrides any user code in methods. | 
 | 690 | # | 
 | 691 |  | 
 | 692 | class GenericASTBuilder(GenericParser): | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 693 |     def __init__(self, AST, start): | 
 | 694 |         GenericParser.__init__(self, start) | 
 | 695 |         self.AST = AST | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 696 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 697 |     def preprocess(self, rule, func): | 
 | 698 |         rebind = lambda lhs, self=self: \ | 
 | 699 |                         lambda args, lhs=lhs, self=self: \ | 
 | 700 |                                 self.buildASTNode(args, lhs) | 
 | 701 |         lhs, rhs = rule | 
 | 702 |         return rule, rebind(lhs) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 703 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 704 |     def buildASTNode(self, args, lhs): | 
 | 705 |         children = [] | 
 | 706 |         for arg in args: | 
 | 707 |             if isinstance(arg, self.AST): | 
 | 708 |                 children.append(arg) | 
 | 709 |             else: | 
 | 710 |                 children.append(self.terminal(arg)) | 
 | 711 |         return self.nonterminal(lhs, children) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 712 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 713 |     def terminal(self, token):      return token | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 714 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 715 |     def nonterminal(self, type, args): | 
 | 716 |         rv = self.AST(type) | 
 | 717 |         rv[:len(args)] = args | 
 | 718 |         return rv | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 719 |  | 
 | 720 | # | 
 | 721 | #  GenericASTTraversal is a Visitor pattern according to Design Patterns.  For | 
 | 722 | #  each node it attempts to invoke the method n_<node type>, falling | 
 | 723 | #  back onto the default() method if the n_* can't be found.  The preorder | 
 | 724 | #  traversal also looks for an exit hook named n_<node type>_exit (no default | 
 | 725 | #  routine is called if it's not found).  To prematurely halt traversal | 
 | 726 | #  of a subtree, call the prune() method -- this only makes sense for a | 
 | 727 | #  preorder traversal.  Node type is determined via the typestring() method. | 
 | 728 | # | 
 | 729 |  | 
 | 730 | class GenericASTTraversalPruningException: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 731 |     pass | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 732 |  | 
 | 733 | class GenericASTTraversal: | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 734 |     def __init__(self, ast): | 
 | 735 |         self.ast = ast | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 736 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 737 |     def typestring(self, node): | 
 | 738 |         return node.type | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 739 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 740 |     def prune(self): | 
 | 741 |         raise GenericASTTraversalPruningException | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 742 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 743 |     def preorder(self, node=None): | 
 | 744 |         if node is None: | 
 | 745 |             node = self.ast | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 746 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 747 |         try: | 
 | 748 |             name = 'n_' + self.typestring(node) | 
 | 749 |             if hasattr(self, name): | 
 | 750 |                 func = getattr(self, name) | 
 | 751 |                 func(node) | 
 | 752 |             else: | 
 | 753 |                 self.default(node) | 
 | 754 |         except GenericASTTraversalPruningException: | 
 | 755 |             return | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 756 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 757 |         for kid in node: | 
 | 758 |             self.preorder(kid) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 759 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 760 |         name = name + '_exit' | 
 | 761 |         if hasattr(self, name): | 
 | 762 |             func = getattr(self, name) | 
 | 763 |             func(node) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 764 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 765 |     def postorder(self, node=None): | 
 | 766 |         if node is None: | 
 | 767 |             node = self.ast | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 768 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 769 |         for kid in node: | 
 | 770 |             self.postorder(kid) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 771 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 772 |         name = 'n_' + self.typestring(node) | 
 | 773 |         if hasattr(self, name): | 
 | 774 |             func = getattr(self, name) | 
 | 775 |             func(node) | 
 | 776 |         else: | 
 | 777 |             self.default(node) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 778 |  | 
 | 779 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 780 |     def default(self, node): | 
 | 781 |         pass | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 782 |  | 
 | 783 | # | 
 | 784 | #  GenericASTMatcher.  AST nodes must have "__getitem__" and "__cmp__" | 
 | 785 | #  implemented. | 
 | 786 | # | 
 | 787 | #  XXX - makes assumptions about how GenericParser walks the parse tree. | 
 | 788 | # | 
 | 789 |  | 
 | 790 | class GenericASTMatcher(GenericParser): | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 791 |     def __init__(self, start, ast): | 
 | 792 |         GenericParser.__init__(self, start) | 
 | 793 |         self.ast = ast | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 794 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 795 |     def preprocess(self, rule, func): | 
 | 796 |         rebind = lambda func, self=self: \ | 
 | 797 |                         lambda args, func=func, self=self: \ | 
 | 798 |                                 self.foundMatch(args, func) | 
 | 799 |         lhs, rhs = rule | 
 | 800 |         rhslist = list(rhs) | 
 | 801 |         rhslist.reverse() | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 802 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 803 |         return (lhs, tuple(rhslist)), rebind(func) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 804 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 805 |     def foundMatch(self, args, func): | 
 | 806 |         func(args[-1]) | 
 | 807 |         return args[-1] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 808 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 809 |     def match_r(self, node): | 
 | 810 |         self.input.insert(0, node) | 
 | 811 |         children = 0 | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 812 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 813 |         for child in node: | 
 | 814 |             if children == 0: | 
 | 815 |                 self.input.insert(0, '(') | 
 | 816 |             children = children + 1 | 
 | 817 |             self.match_r(child) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 818 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 819 |         if children > 0: | 
 | 820 |             self.input.insert(0, ')') | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 821 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 822 |     def match(self, ast=None): | 
 | 823 |         if ast is None: | 
 | 824 |             ast = self.ast | 
 | 825 |         self.input = [] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 826 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 827 |         self.match_r(ast) | 
 | 828 |         self.parse(self.input) | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 829 |  | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 830 |     def resolve(self, list): | 
 | 831 |         # | 
 | 832 |         #  Resolve ambiguity in favor of the longest RHS. | 
 | 833 |         # | 
 | 834 |         return list[-1] | 
| Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 835 |  | 
 | 836 | def _dump(tokens, sets, states): | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 837 |     for i in range(len(sets)): | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 838 |         output('set %d' % i) | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 839 |         for item in sets[i]: | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 840 |             output('\t', item) | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 841 |             for (lhs, rhs), pos in states[item[0]].items: | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 842 |                 output('\t\t', lhs, '::=', end='') | 
 | 843 |                 output(' '.join(rhs[:pos]), end='') | 
 | 844 |                 output('.', end='') | 
 | 845 |                 output(' '.join(rhs[pos:])) | 
| Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 846 |         if i < len(tokens): | 
| Guido van Rossum | 805365e | 2007-05-07 22:24:25 +0000 | [diff] [blame] | 847 |             output() | 
 | 848 |             output('token %s' % str(tokens[i])) | 
 | 849 |             output() |