Catch I/O errors when parsing robots.txt file.
Add version number, printed at startup in non-quited mode.
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index 6b6fbf5..2ec9b61 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -93,6 +93,8 @@
"""
+__version__ = "0.1"
+
import sys
import os
@@ -135,7 +137,6 @@
except getopt.error, msg:
sys.stdout = sys.stderr
print msg
- print __doc__ % globals()
sys.exit(2)
for o, a in opts:
if o == '-R':
@@ -151,6 +152,9 @@
if o == '-v':
verbose = verbose + 1
+ if verbose:
+ print AGENTNAME, "version", __version__
+
if restart:
if verbose > 0:
print "Loading checkpoint from %s ..." % dumpfile
@@ -234,13 +238,17 @@
self.addrobot(root)
def addrobot(self, root):
- self.robots[root] = rp = robotparser.RobotFileParser()
- if verbose > 3:
- print "Parsing robots.txt file"
- rp.debug = 1
url = urlparse.urljoin(root, "/robots.txt")
+ self.robots[root] = rp = robotparser.RobotFileParser()
+ if verbose > 2:
+ print "Parsing", url
+ rp.debug = 1
rp.set_url(url)
- rp.read()
+ try:
+ rp.read()
+ except IOError, msg:
+ if verbose > 1:
+ print "I/O error parsing", url, ":", msg
def run(self):
while self.todo: