Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 1 | """ |
| 2 | A simple demo that reads in an XML document and displays the number of |
| 3 | elements and attributes as well as a tally of elements and attributes by name. |
| 4 | """ |
| 5 | |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 6 | import sys |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 7 | from collections import defaultdict |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 8 | |
| 9 | from xml.sax import make_parser, handler |
| 10 | |
| 11 | class FancyCounter(handler.ContentHandler): |
| 12 | |
| 13 | def __init__(self): |
| 14 | self._elems = 0 |
| 15 | self._attrs = 0 |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 16 | self._elem_types = defaultdict(int) |
| 17 | self._attr_types = defaultdict(int) |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 18 | |
| 19 | def startElement(self, name, attrs): |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 20 | self._elems += 1 |
| 21 | self._attrs += len(attrs) |
| 22 | self._elem_types[name] += 1 |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 23 | |
| 24 | for name in attrs.keys(): |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 25 | self._attr_types[name] += 1 |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 26 | |
| 27 | def endDocument(self): |
| 28 | print "There were", self._elems, "elements." |
| 29 | print "There were", self._attrs, "attributes." |
| 30 | |
| 31 | print "---ELEMENT TYPES" |
| 32 | for pair in self._elem_types.items(): |
| 33 | print "%20s %d" % pair |
| 34 | |
| 35 | print "---ATTRIBUTE TYPES" |
| 36 | for pair in self._attr_types.items(): |
| 37 | print "%20s %d" % pair |
| 38 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 39 | if __name__ == '__main__': |
| 40 | parser = make_parser() |
| 41 | parser.setContentHandler(FancyCounter()) |
| 42 | parser.parse(sys.argv[1]) |