Georg Brandl | bc470d5 | 2009-10-11 15:56:06 +0000 | [diff] [blame] | 1 | """ |
| 2 | A simple demo that reads in an XML document and displays the number of |
| 3 | elements and attributes as well as a tally of elements and attributes by name. |
| 4 | """ |
| 5 | |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 6 | import sys |
Georg Brandl | bc470d5 | 2009-10-11 15:56:06 +0000 | [diff] [blame] | 7 | from collections import defaultdict |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 8 | |
| 9 | from xml.sax import make_parser, handler |
| 10 | |
| 11 | class FancyCounter(handler.ContentHandler): |
| 12 | |
| 13 | def __init__(self): |
| 14 | self._elems = 0 |
| 15 | self._attrs = 0 |
Georg Brandl | bc470d5 | 2009-10-11 15:56:06 +0000 | [diff] [blame] | 16 | self._elem_types = defaultdict(int) |
| 17 | self._attr_types = defaultdict(int) |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 18 | |
| 19 | def startElement(self, name, attrs): |
Georg Brandl | bc470d5 | 2009-10-11 15:56:06 +0000 | [diff] [blame] | 20 | self._elems += 1 |
| 21 | self._attrs += len(attrs) |
| 22 | self._elem_types[name] += 1 |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 23 | |
Skip Montanaro | 1e8ce58 | 2007-08-06 21:07:53 +0000 | [diff] [blame] | 24 | for name in attrs.keys(): |
Georg Brandl | bc470d5 | 2009-10-11 15:56:06 +0000 | [diff] [blame] | 25 | self._attr_types[name] += 1 |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 26 | |
| 27 | def endDocument(self): |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 28 | print("There were", self._elems, "elements.") |
| 29 | print("There were", self._attrs, "attributes.") |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 30 | |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 31 | print("---ELEMENT TYPES") |
Skip Montanaro | 1e8ce58 | 2007-08-06 21:07:53 +0000 | [diff] [blame] | 32 | for pair in self._elem_types.items(): |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 33 | print("%20s %d" % pair) |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 34 | |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 35 | print("---ATTRIBUTE TYPES") |
Skip Montanaro | 1e8ce58 | 2007-08-06 21:07:53 +0000 | [diff] [blame] | 36 | for pair in self._attr_types.items(): |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 37 | print("%20s %d" % pair) |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 38 | |
Georg Brandl | bc470d5 | 2009-10-11 15:56:06 +0000 | [diff] [blame] | 39 | if __name__ == '__main__': |
| 40 | parser = make_parser() |
| 41 | parser.setContentHandler(FancyCounter()) |
| 42 | parser.parse(sys.argv[1]) |