Issue #1285086: Get rid of the refcounting hack and speed up urllib.unquote().
diff --git a/Lib/urllib.py b/Lib/urllib.py
index 33641a5..f9655f9 100644
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -28,6 +28,7 @@
import time
import sys
import base64
+import re
from urlparse import urljoin as basejoin
@@ -1198,22 +1199,35 @@
_hexdig = '0123456789ABCDEFabcdef'
_hextochr = dict((a + b, chr(int(a + b, 16)))
for a in _hexdig for b in _hexdig)
+_asciire = re.compile('([\x00-\x7f]+)')
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
- res = s.split('%')
+ if _is_unicode(s):
+ if '%' not in s:
+ return s
+ bits = _asciire.split(s)
+ res = [bits[0]]
+ append = res.append
+ for i in range(1, len(bits), 2):
+ append(unquote(str(bits[i])).decode('latin1'))
+ append(bits[i + 1])
+ return ''.join(res)
+
+ bits = s.split('%')
# fastpath
- if len(res) == 1:
+ if len(bits) == 1:
return s
- s = res[0]
- for item in res[1:]:
+ res = [bits[0]]
+ append = res.append
+ for item in bits[1:]:
try:
- s += _hextochr[item[:2]] + item[2:]
+ append(_hextochr[item[:2]])
+ append(item[2:])
except KeyError:
- s += '%' + item
- except UnicodeDecodeError:
- s += unichr(int(item[:2], 16)) + item[2:]
- return s
+ append('%')
+ append(item)
+ return ''.join(res)
def unquote_plus(s):
"""unquote('%7e/abc+def') -> '~/abc def'"""