Issue #16099: RobotFileParser now supports Crawl-delay and Request-rate
extensions.

Patch by Nikolay Bogoychev.
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 4fbb0cb..4ac553a 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -10,7 +10,9 @@
     http://www.robotstxt.org/norobots-rfc.txt
 """
 
-import urllib.parse, urllib.request
+import collections
+import urllib.parse
+import urllib.request
 
 __all__ = ["RobotFileParser"]
 
@@ -120,10 +122,29 @@
                     if state != 0:
                         entry.rulelines.append(RuleLine(line[1], True))
                         state = 2
+                elif line[0] == "crawl-delay":
+                    if state != 0:
+                        # before trying to convert to int we need to make
+                        # sure that robots.txt has valid syntax otherwise
+                        # it will crash
+                        if line[1].strip().isdigit():
+                            entry.delay = int(line[1])
+                        state = 2
+                elif line[0] == "request-rate":
+                    if state != 0:
+                        numbers = line[1].split('/')
+                        # check if all values are sane
+                        if (len(numbers) == 2 and numbers[0].strip().isdigit()
+                            and numbers[1].strip().isdigit()):
+                            req_rate = collections.namedtuple('req_rate',
+                                                              'requests seconds')
+                            entry.req_rate = req_rate
+                            entry.req_rate.requests = int(numbers[0])
+                            entry.req_rate.seconds = int(numbers[1])
+                        state = 2
         if state == 2:
             self._add_entry(entry)
 
-
     def can_fetch(self, useragent, url):
         """using the parsed robots.txt decide if useragent can fetch url"""
         if self.disallow_all:
@@ -153,6 +174,18 @@
         # agent not found ==> access granted
         return True
 
+    def crawl_delay(self, useragent):
+        for entry in self.entries:
+            if entry.applies_to(useragent):
+                return entry.delay
+        return None
+
+    def request_rate(self, useragent):
+        for entry in self.entries:
+            if entry.applies_to(useragent):
+                return entry.req_rate
+        return None
+
     def __str__(self):
         return ''.join([str(entry) + "\n" for entry in self.entries])
 
@@ -180,6 +213,8 @@
     def __init__(self):
         self.useragents = []
         self.rulelines = []
+        self.delay = None
+        self.req_rate = None
 
     def __str__(self):
         ret = []