Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 1 | """ |
| 2 | A demo that reads in an RSS XML document and emits an HTML file containing |
| 3 | a list of the individual items in the feed. |
| 4 | """ |
| 5 | |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 6 | import sys |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 7 | import codecs |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 8 | |
| 9 | from xml.sax import make_parser, handler |
| 10 | |
| 11 | # --- Templates |
| 12 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 13 | top = """\ |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 14 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 15 | <html> |
| 16 | <head> |
| 17 | <title>%s</title> |
| 18 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
| 19 | </head> |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 20 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 21 | <body> |
| 22 | <h1>%s</h1> |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 23 | """ |
| 24 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 25 | bottom = """ |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 26 | </ul> |
| 27 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 28 | <hr> |
| 29 | <address> |
| 30 | Converted to HTML by rss2html.py. |
| 31 | </address> |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 32 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 33 | </body> |
| 34 | </html> |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 35 | """ |
| 36 | |
| 37 | # --- The ContentHandler |
| 38 | |
| 39 | class RSSHandler(handler.ContentHandler): |
| 40 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 41 | def __init__(self, out=sys.stdout): |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 42 | handler.ContentHandler.__init__(self) |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 43 | self._out = codecs.getwriter('utf-8')(out) |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 44 | |
| 45 | self._text = "" |
| 46 | self._parent = None |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 47 | self._list_started = False |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 48 | self._title = None |
| 49 | self._link = None |
| 50 | self._descr = "" |
| 51 | |
| 52 | # ContentHandler methods |
| 53 | |
| 54 | def startElement(self, name, attrs): |
| 55 | if name == "channel" or name == "image" or name == "item": |
| 56 | self._parent = name |
| 57 | |
| 58 | self._text = "" |
| 59 | |
| 60 | def endElement(self, name): |
| 61 | if self._parent == "channel": |
| 62 | if name == "title": |
| 63 | self._out.write(top % (self._text, self._text)) |
| 64 | elif name == "description": |
| 65 | self._out.write("<p>%s</p>\n" % self._text) |
| 66 | |
| 67 | elif self._parent == "item": |
| 68 | if name == "title": |
| 69 | self._title = self._text |
| 70 | elif name == "link": |
| 71 | self._link = self._text |
| 72 | elif name == "description": |
| 73 | self._descr = self._text |
| 74 | elif name == "item": |
| 75 | if not self._list_started: |
| 76 | self._out.write("<ul>\n") |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 77 | self._list_started = True |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 78 | |
| 79 | self._out.write(' <li><a href="%s">%s</a> %s\n' % |
| 80 | (self._link, self._title, self._descr)) |
| 81 | |
| 82 | self._title = None |
| 83 | self._link = None |
| 84 | self._descr = "" |
| 85 | |
| 86 | if name == "rss": |
| 87 | self._out.write(bottom) |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 88 | |
Fred Drake | ac5f748 | 2000-10-16 15:27:05 +0000 | [diff] [blame] | 89 | def characters(self, content): |
| 90 | self._text = self._text + content |
| 91 | |
| 92 | # --- Main program |
| 93 | |
Georg Brandl | 32855b6 | 2009-10-11 15:06:44 +0000 | [diff] [blame] | 94 | if __name__ == '__main__': |
| 95 | parser = make_parser() |
| 96 | parser.setContentHandler(RSSHandler()) |
| 97 | parser.parse(sys.argv[1]) |