Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 1 | """An implementation of the Zephyr Abstract Syntax Definition Language. |
| 2 | |
| 3 | See http://asdl.sourceforge.net/ and |
| 4 | http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html. |
| 5 | |
| 6 | Only supports top level module decl, not view. I'm guessing that view |
| 7 | is intended to support the browser and I'm not interested in the |
| 8 | browser. |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 9 | |
| 10 | Changes for Python: Add support for module versions |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 11 | """ |
| 12 | |
| 13 | #__metaclass__ = type |
| 14 | |
| 15 | import os |
| 16 | import traceback |
| 17 | |
| 18 | import spark |
| 19 | |
| 20 | class Token: |
| 21 | # spark seems to dispatch in the parser based on a token's |
| 22 | # type attribute |
| 23 | def __init__(self, type, lineno): |
| 24 | self.type = type |
| 25 | self.lineno = lineno |
| 26 | |
| 27 | def __str__(self): |
| 28 | return self.type |
| 29 | |
| 30 | def __repr__(self): |
| 31 | return str(self) |
| 32 | |
| 33 | class Id(Token): |
| 34 | def __init__(self, value, lineno): |
| 35 | self.type = 'Id' |
| 36 | self.value = value |
| 37 | self.lineno = lineno |
| 38 | |
| 39 | def __str__(self): |
| 40 | return self.value |
Tim Peters | 710ab3b | 2006-02-28 18:30:36 +0000 | [diff] [blame] | 41 | |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 42 | class String(Token): |
| 43 | def __init__(self, value, lineno): |
| 44 | self.type = 'String' |
| 45 | self.value = value |
| 46 | self.lineno = lineno |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 47 | |
| 48 | class ASDLSyntaxError: |
| 49 | |
| 50 | def __init__(self, lineno, token=None, msg=None): |
| 51 | self.lineno = lineno |
| 52 | self.token = token |
| 53 | self.msg = msg |
| 54 | |
| 55 | def __str__(self): |
| 56 | if self.msg is None: |
| 57 | return "Error at '%s', line %d" % (self.token, self.lineno) |
| 58 | else: |
| 59 | return "%s, line %d" % (self.msg, self.lineno) |
| 60 | |
| 61 | class ASDLScanner(spark.GenericScanner, object): |
| 62 | |
| 63 | def tokenize(self, input): |
| 64 | self.rv = [] |
| 65 | self.lineno = 1 |
| 66 | super(ASDLScanner, self).tokenize(input) |
| 67 | return self.rv |
| 68 | |
| 69 | def t_id(self, s): |
| 70 | r"[\w\.]+" |
| 71 | # XXX doesn't distinguish upper vs. lower, which is |
| 72 | # significant for ASDL. |
| 73 | self.rv.append(Id(s, self.lineno)) |
Tim Peters | 710ab3b | 2006-02-28 18:30:36 +0000 | [diff] [blame] | 74 | |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 75 | def t_string(self, s): |
| 76 | r'"[^"]*"' |
| 77 | self.rv.append(String(s, self.lineno)) |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 78 | |
| 79 | def t_xxx(self, s): # not sure what this production means |
| 80 | r"<=" |
| 81 | self.rv.append(Token(s, self.lineno)) |
| 82 | |
| 83 | def t_punctuation(self, s): |
| 84 | r"[\{\}\*\=\|\(\)\,\?\:]" |
| 85 | self.rv.append(Token(s, self.lineno)) |
| 86 | |
| 87 | def t_comment(self, s): |
| 88 | r"\-\-[^\n]*" |
| 89 | pass |
| 90 | |
| 91 | def t_newline(self, s): |
| 92 | r"\n" |
| 93 | self.lineno += 1 |
| 94 | |
| 95 | def t_whitespace(self, s): |
| 96 | r"[ \t]+" |
| 97 | pass |
| 98 | |
| 99 | def t_default(self, s): |
| 100 | r" . +" |
| 101 | raise ValueError, "unmatched input: %s" % `s` |
| 102 | |
| 103 | class ASDLParser(spark.GenericParser, object): |
| 104 | def __init__(self): |
| 105 | super(ASDLParser, self).__init__("module") |
| 106 | |
| 107 | def typestring(self, tok): |
| 108 | return tok.type |
| 109 | |
| 110 | def error(self, tok): |
| 111 | raise ASDLSyntaxError(tok.lineno, tok) |
| 112 | |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 113 | def p_module_0(self, (module, name, version, _0, _1)): |
| 114 | " module ::= Id Id version { } " |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 115 | if module.value != "module": |
| 116 | raise ASDLSyntaxError(module.lineno, |
| 117 | msg="expected 'module', found %s" % module) |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 118 | return Module(name, None, version) |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 119 | |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 120 | def p_module(self, (module, name, version, _0, definitions, _1)): |
| 121 | " module ::= Id Id version { definitions } " |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 122 | if module.value != "module": |
| 123 | raise ASDLSyntaxError(module.lineno, |
| 124 | msg="expected 'module', found %s" % module) |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 125 | return Module(name, definitions, version) |
Tim Peters | 710ab3b | 2006-02-28 18:30:36 +0000 | [diff] [blame] | 126 | |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 127 | def p_version(self, (version, V)): |
| 128 | "version ::= Id String" |
| 129 | if version.value != "version": |
| 130 | raise ASDLSyntaxError(version.lineno, |
| 131 | msg="expected 'version', found %" % version) |
| 132 | return V |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 133 | |
| 134 | def p_definition_0(self, (definition,)): |
| 135 | " definitions ::= definition " |
| 136 | return definition |
| 137 | |
| 138 | def p_definition_1(self, (definitions, definition)): |
| 139 | " definitions ::= definition definitions " |
| 140 | return definitions + definition |
| 141 | |
| 142 | def p_definition(self, (id, _, type)): |
| 143 | " definition ::= Id = type " |
| 144 | return [Type(id, type)] |
| 145 | |
| 146 | def p_type_0(self, (product,)): |
| 147 | " type ::= product " |
| 148 | return product |
| 149 | |
| 150 | def p_type_1(self, (sum,)): |
| 151 | " type ::= sum " |
| 152 | return Sum(sum) |
| 153 | |
| 154 | def p_type_2(self, (sum, id, _0, attributes, _1)): |
| 155 | " type ::= sum Id ( fields ) " |
| 156 | if id.value != "attributes": |
| 157 | raise ASDLSyntaxError(id.lineno, |
| 158 | msg="expected attributes, found %s" % id) |
Martin v. Löwis | 49c5da1 | 2006-03-01 22:49:05 +0000 | [diff] [blame] | 159 | if attributes: |
| 160 | attributes.reverse() |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 161 | return Sum(sum, attributes) |
| 162 | |
| 163 | def p_product(self, (_0, fields, _1)): |
| 164 | " product ::= ( fields ) " |
| 165 | # XXX can't I just construct things in the right order? |
Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 166 | fields.reverse() |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 167 | return Product(fields) |
| 168 | |
| 169 | def p_sum_0(self, (constructor,)): |
Georg Brandl | bc13092 | 2009-01-03 13:45:15 +0000 | [diff] [blame] | 170 | " sum ::= constructor " |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 171 | return [constructor] |
| 172 | |
| 173 | def p_sum_1(self, (constructor, _, sum)): |
| 174 | " sum ::= constructor | sum " |
| 175 | return [constructor] + sum |
| 176 | |
| 177 | def p_sum_2(self, (constructor, _, sum)): |
| 178 | " sum ::= constructor | sum " |
| 179 | return [constructor] + sum |
| 180 | |
| 181 | def p_constructor_0(self, (id,)): |
| 182 | " constructor ::= Id " |
| 183 | return Constructor(id) |
| 184 | |
| 185 | def p_constructor_1(self, (id, _0, fields, _1)): |
| 186 | " constructor ::= Id ( fields ) " |
| 187 | # XXX can't I just construct things in the right order? |
Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 188 | fields.reverse() |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 189 | return Constructor(id, fields) |
| 190 | |
| 191 | def p_fields_0(self, (field,)): |
| 192 | " fields ::= field " |
| 193 | return [field] |
| 194 | |
| 195 | def p_fields_1(self, (field, _, fields)): |
| 196 | " fields ::= field , fields " |
| 197 | return fields + [field] |
| 198 | |
| 199 | def p_field_0(self, (type,)): |
| 200 | " field ::= Id " |
| 201 | return Field(type) |
| 202 | |
| 203 | def p_field_1(self, (type, name)): |
| 204 | " field ::= Id Id " |
| 205 | return Field(type, name) |
| 206 | |
| 207 | def p_field_2(self, (type, _, name)): |
| 208 | " field ::= Id * Id " |
| 209 | return Field(type, name, seq=1) |
| 210 | |
| 211 | def p_field_3(self, (type, _, name)): |
| 212 | " field ::= Id ? Id " |
| 213 | return Field(type, name, opt=1) |
| 214 | |
| 215 | def p_field_4(self, (type, _)): |
| 216 | " field ::= Id * " |
| 217 | return Field(type, seq=1) |
| 218 | |
| 219 | def p_field_5(self, (type, _)): |
| 220 | " field ::= Id ? " |
| 221 | return Field(type, opt=1) |
| 222 | |
| 223 | builtin_types = ("identifier", "string", "int", "bool", "object") |
| 224 | |
| 225 | # below is a collection of classes to capture the AST of an AST :-) |
| 226 | # not sure if any of the methods are useful yet, but I'm adding them |
| 227 | # piecemeal as they seem helpful |
| 228 | |
| 229 | class AST: |
| 230 | pass # a marker class |
| 231 | |
| 232 | class Module(AST): |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 233 | def __init__(self, name, dfns, version): |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 234 | self.name = name |
| 235 | self.dfns = dfns |
Martin v. Löwis | eae93b7 | 2006-02-28 00:12:47 +0000 | [diff] [blame] | 236 | self.version = version |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 237 | self.types = {} # maps type name to value (from dfns) |
| 238 | for type in dfns: |
| 239 | self.types[type.name.value] = type.value |
| 240 | |
| 241 | def __repr__(self): |
| 242 | return "Module(%s, %s)" % (self.name, self.dfns) |
| 243 | |
| 244 | class Type(AST): |
| 245 | def __init__(self, name, value): |
| 246 | self.name = name |
| 247 | self.value = value |
| 248 | |
| 249 | def __repr__(self): |
| 250 | return "Type(%s, %s)" % (self.name, self.value) |
| 251 | |
| 252 | class Constructor(AST): |
| 253 | def __init__(self, name, fields=None): |
| 254 | self.name = name |
| 255 | self.fields = fields or [] |
| 256 | |
| 257 | def __repr__(self): |
| 258 | return "Constructor(%s, %s)" % (self.name, self.fields) |
| 259 | |
| 260 | class Field(AST): |
| 261 | def __init__(self, type, name=None, seq=0, opt=0): |
| 262 | self.type = type |
| 263 | self.name = name |
| 264 | self.seq = seq |
| 265 | self.opt = opt |
| 266 | |
| 267 | def __repr__(self): |
| 268 | if self.seq: |
| 269 | extra = ", seq=1" |
| 270 | elif self.opt: |
| 271 | extra = ", opt=1" |
| 272 | else: |
| 273 | extra = "" |
| 274 | if self.name is None: |
| 275 | return "Field(%s%s)" % (self.type, extra) |
| 276 | else: |
| 277 | return "Field(%s, %s%s)" % (self.type, self.name, extra) |
| 278 | |
| 279 | class Sum(AST): |
| 280 | def __init__(self, types, attributes=None): |
| 281 | self.types = types |
| 282 | self.attributes = attributes or [] |
| 283 | |
| 284 | def __repr__(self): |
| 285 | if self.attributes is None: |
| 286 | return "Sum(%s)" % self.types |
| 287 | else: |
| 288 | return "Sum(%s, %s)" % (self.types, self.attributes) |
| 289 | |
| 290 | class Product(AST): |
| 291 | def __init__(self, fields): |
| 292 | self.fields = fields |
| 293 | |
| 294 | def __repr__(self): |
| 295 | return "Product(%s)" % self.fields |
| 296 | |
| 297 | class VisitorBase(object): |
| 298 | |
| 299 | def __init__(self, skip=0): |
| 300 | self.cache = {} |
| 301 | self.skip = skip |
| 302 | |
| 303 | def visit(self, object, *args): |
| 304 | meth = self._dispatch(object) |
| 305 | if meth is None: |
| 306 | return |
| 307 | try: |
| 308 | meth(object, *args) |
| 309 | except Exception, err: |
| 310 | print "Error visiting", repr(object) |
| 311 | print err |
| 312 | traceback.print_exc() |
| 313 | # XXX hack |
| 314 | if hasattr(self, 'file'): |
| 315 | self.file.flush() |
| 316 | os._exit(1) |
| 317 | |
| 318 | def _dispatch(self, object): |
| 319 | assert isinstance(object, AST), repr(object) |
| 320 | klass = object.__class__ |
| 321 | meth = self.cache.get(klass) |
| 322 | if meth is None: |
| 323 | methname = "visit" + klass.__name__ |
| 324 | if self.skip: |
| 325 | meth = getattr(self, methname, None) |
| 326 | else: |
| 327 | meth = getattr(self, methname) |
| 328 | self.cache[klass] = meth |
| 329 | return meth |
| 330 | |
| 331 | class Check(VisitorBase): |
| 332 | |
| 333 | def __init__(self): |
| 334 | super(Check, self).__init__(skip=1) |
| 335 | self.cons = {} |
| 336 | self.errors = 0 |
| 337 | self.types = {} |
| 338 | |
| 339 | def visitModule(self, mod): |
| 340 | for dfn in mod.dfns: |
| 341 | self.visit(dfn) |
| 342 | |
| 343 | def visitType(self, type): |
| 344 | self.visit(type.value, str(type.name)) |
| 345 | |
| 346 | def visitSum(self, sum, name): |
| 347 | for t in sum.types: |
| 348 | self.visit(t, name) |
| 349 | |
| 350 | def visitConstructor(self, cons, name): |
| 351 | key = str(cons.name) |
| 352 | conflict = self.cons.get(key) |
| 353 | if conflict is None: |
| 354 | self.cons[key] = name |
| 355 | else: |
| 356 | print "Redefinition of constructor %s" % key |
| 357 | print "Defined in %s and %s" % (conflict, name) |
| 358 | self.errors += 1 |
| 359 | for f in cons.fields: |
| 360 | self.visit(f, key) |
| 361 | |
| 362 | def visitField(self, field, name): |
| 363 | key = str(field.type) |
| 364 | l = self.types.setdefault(key, []) |
| 365 | l.append(name) |
| 366 | |
| 367 | def visitProduct(self, prod, name): |
| 368 | for f in prod.fields: |
| 369 | self.visit(f, name) |
| 370 | |
| 371 | def check(mod): |
| 372 | v = Check() |
| 373 | v.visit(mod) |
| 374 | |
| 375 | for t in v.types: |
| 376 | if not mod.types.has_key(t) and not t in builtin_types: |
| 377 | v.errors += 1 |
| 378 | uses = ", ".join(v.types[t]) |
| 379 | print "Undefined type %s, used in %s" % (t, uses) |
Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 380 | |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 381 | return not v.errors |
| 382 | |
| 383 | def parse(file): |
| 384 | scanner = ASDLScanner() |
| 385 | parser = ASDLParser() |
| 386 | |
| 387 | buf = open(file).read() |
| 388 | tokens = scanner.tokenize(buf) |
| 389 | try: |
| 390 | return parser.parse(tokens) |
| 391 | except ASDLSyntaxError, err: |
| 392 | print err |
| 393 | lines = buf.split("\n") |
| 394 | print lines[err.lineno - 1] # lines starts at 0, files at 1 |
| 395 | |
| 396 | if __name__ == "__main__": |
| 397 | import glob |
| 398 | import sys |
| 399 | |
| 400 | if len(sys.argv) > 1: |
| 401 | files = sys.argv[1:] |
| 402 | else: |
| 403 | testdir = "tests" |
| 404 | files = glob.glob(testdir + "/*.asdl") |
Tim Peters | 536cf99 | 2005-12-25 23:18:31 +0000 | [diff] [blame] | 405 | |
Jeremy Hylton | 3e0055f | 2005-10-20 19:59:25 +0000 | [diff] [blame] | 406 | for file in files: |
| 407 | print file |
| 408 | mod = parse(file) |
| 409 | print "module", mod.name |
| 410 | print len(mod.dfns), "definitions" |
| 411 | if not check(mod): |
| 412 | print "Check failed" |
| 413 | else: |
| 414 | for dfn in mod.dfns: |
| 415 | print dfn.type |