Phillip J. Eby | 069159b | 2006-04-18 04:05:34 +0000 | [diff] [blame] | 1 | """PyPI and direct package downloading""" |
| 2 | |
| 3 | import sys, os.path, re, urlparse, urllib2, shutil, random, socket |
| 4 | from pkg_resources import * |
| 5 | from distutils import log |
| 6 | from distutils.errors import DistutilsError |
| 7 | from md5 import md5 |
| 8 | from fnmatch import translate |
| 9 | |
| 10 | EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$') |
| 11 | HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I) |
| 12 | # this is here to fix emacs' cruddy broken syntax highlighting |
| 13 | PYPI_MD5 = re.compile( |
| 14 | '<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a href="[^?]+\?:action=show_md5' |
| 15 | '&digest=([0-9a-f]{32})">md5</a>\\)' |
| 16 | ) |
| 17 | |
| 18 | URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match |
| 19 | EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() |
| 20 | |
| 21 | __all__ = [ |
| 22 | 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst', |
| 23 | 'interpret_distro_name', |
| 24 | ] |
| 25 | |
| 26 | |
| 27 | def parse_bdist_wininst(name): |
| 28 | """Return (base,pyversion) or (None,None) for possible .exe name""" |
| 29 | |
| 30 | lower = name.lower() |
| 31 | base, py_ver = None, None |
| 32 | |
| 33 | if lower.endswith('.exe'): |
| 34 | if lower.endswith('.win32.exe'): |
| 35 | base = name[:-10] |
| 36 | elif lower.startswith('.win32-py',-16): |
| 37 | py_ver = name[-7:-4] |
| 38 | base = name[:-16] |
| 39 | |
| 40 | return base,py_ver |
| 41 | |
| 42 | def egg_info_for_url(url): |
| 43 | scheme, server, path, parameters, query, fragment = urlparse.urlparse(url) |
| 44 | base = urllib2.unquote(path.split('/')[-1]) |
| 45 | if '#' in base: base, fragment = base.split('#',1) |
| 46 | return base,fragment |
| 47 | |
| 48 | def distros_for_url(url, metadata=None): |
| 49 | """Yield egg or source distribution objects that might be found at a URL""" |
| 50 | base, fragment = egg_info_for_url(url) |
| 51 | dists = distros_for_location(url, base, metadata) |
| 52 | if fragment and not dists: |
| 53 | match = EGG_FRAGMENT.match(fragment) |
| 54 | if match: |
| 55 | return interpret_distro_name( |
| 56 | url, match.group(1), metadata, precedence = CHECKOUT_DIST |
| 57 | ) |
| 58 | return dists |
| 59 | |
| 60 | def distros_for_location(location, basename, metadata=None): |
| 61 | """Yield egg or source distribution objects based on basename""" |
| 62 | if basename.endswith('.egg.zip'): |
| 63 | basename = basename[:-4] # strip the .zip |
| 64 | if basename.endswith('.egg'): # only one, unambiguous interpretation |
| 65 | return [Distribution.from_location(location, basename, metadata)] |
| 66 | |
| 67 | if basename.endswith('.exe'): |
| 68 | win_base, py_ver = parse_bdist_wininst(basename) |
| 69 | if win_base is not None: |
| 70 | return interpret_distro_name( |
| 71 | location, win_base, metadata, py_ver, BINARY_DIST, "win32" |
| 72 | ) |
| 73 | |
| 74 | # Try source distro extensions (.zip, .tgz, etc.) |
| 75 | # |
| 76 | for ext in EXTENSIONS: |
| 77 | if basename.endswith(ext): |
| 78 | basename = basename[:-len(ext)] |
| 79 | return interpret_distro_name(location, basename, metadata) |
| 80 | return [] # no extension matched |
| 81 | |
| 82 | |
| 83 | def distros_for_filename(filename, metadata=None): |
| 84 | """Yield possible egg or source distribution objects based on a filename""" |
| 85 | return distros_for_location( |
| 86 | normalize_path(filename), os.path.basename(filename), metadata |
| 87 | ) |
| 88 | |
| 89 | |
| 90 | def interpret_distro_name(location, basename, metadata, |
| 91 | py_version=None, precedence=SOURCE_DIST, platform=None |
| 92 | ): |
| 93 | """Generate alternative interpretations of a source distro name |
| 94 | |
| 95 | Note: if `location` is a filesystem filename, you should call |
| 96 | ``pkg_resources.normalize_path()`` on it before passing it to this |
| 97 | routine! |
| 98 | """ |
| 99 | |
| 100 | # Generate alternative interpretations of a source distro name |
| 101 | # Because some packages are ambiguous as to name/versions split |
| 102 | # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc. |
| 103 | # So, we generate each possible interepretation (e.g. "adns, python-1.1.0" |
| 104 | # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice, |
| 105 | # the spurious interpretations should be ignored, because in the event |
| 106 | # there's also an "adns" package, the spurious "python-1.1.0" version will |
| 107 | # compare lower than any numeric version number, and is therefore unlikely |
| 108 | # to match a request for it. It's still a potential problem, though, and |
| 109 | # in the long run PyPI and the distutils should go for "safe" names and |
| 110 | # versions in distribution archive names (sdist and bdist). |
| 111 | |
| 112 | parts = basename.split('-') |
| 113 | for p in range(1,len(parts)+1): |
| 114 | yield Distribution( |
| 115 | location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]), |
| 116 | py_version=py_version, precedence = precedence, |
| 117 | platform = platform |
| 118 | ) |
| 119 | |
| 120 | |
| 121 | |
| 122 | |
| 123 | |
| 124 | class PackageIndex(Environment): |
| 125 | """A distribution index that scans web pages for download URLs""" |
| 126 | |
| 127 | def __init__(self,index_url="http://www.python.org/pypi",hosts=('*',),*args,**kw): |
| 128 | Environment.__init__(self,*args,**kw) |
| 129 | self.index_url = index_url + "/"[:not index_url.endswith('/')] |
| 130 | self.scanned_urls = {} |
| 131 | self.fetched_urls = {} |
| 132 | self.package_pages = {} |
| 133 | self.allows = re.compile('|'.join(map(translate,hosts))).match |
| 134 | self.to_scan = [] |
| 135 | |
| 136 | def process_url(self, url, retrieve=False): |
| 137 | """Evaluate a URL as a possible download, and maybe retrieve it""" |
| 138 | url = fix_sf_url(url) |
| 139 | if url in self.scanned_urls and not retrieve: |
| 140 | return |
| 141 | self.scanned_urls[url] = True |
| 142 | if not URL_SCHEME(url): |
| 143 | self.process_filename(url) |
| 144 | return |
| 145 | else: |
| 146 | dists = list(distros_for_url(url)) |
| 147 | if dists: |
| 148 | if not self.url_ok(url): |
| 149 | return |
| 150 | self.debug("Found link: %s", url) |
| 151 | |
| 152 | if dists or not retrieve or url in self.fetched_urls: |
| 153 | map(self.add, dists) |
| 154 | return # don't need the actual page |
| 155 | |
| 156 | if not self.url_ok(url): |
| 157 | self.fetched_urls[url] = True |
| 158 | return |
| 159 | |
| 160 | self.info("Reading %s", url) |
| 161 | f = self.open_url(url) |
| 162 | self.fetched_urls[url] = self.fetched_urls[f.url] = True |
| 163 | |
| 164 | |
| 165 | if 'html' not in f.headers['content-type'].lower(): |
| 166 | f.close() # not html, we can't process it |
| 167 | return |
| 168 | |
| 169 | base = f.url # handle redirects |
| 170 | page = f.read() |
| 171 | f.close() |
| 172 | if url.startswith(self.index_url): |
| 173 | page = self.process_index(url, page) |
| 174 | |
| 175 | for match in HREF.finditer(page): |
| 176 | link = urlparse.urljoin(base, match.group(1)) |
| 177 | self.process_url(link) |
| 178 | |
| 179 | def process_filename(self, fn, nested=False): |
| 180 | # process filenames or directories |
| 181 | if not os.path.exists(fn): |
| 182 | self.warn("Not found: %s", url) |
| 183 | return |
| 184 | |
| 185 | if os.path.isdir(fn) and not nested: |
| 186 | path = os.path.realpath(fn) |
| 187 | for item in os.listdir(path): |
| 188 | self.process_filename(os.path.join(path,item), True) |
| 189 | |
| 190 | dists = distros_for_filename(fn) |
| 191 | if dists: |
| 192 | self.debug("Found: %s", fn) |
| 193 | map(self.add, dists) |
| 194 | |
| 195 | def url_ok(self, url, fatal=False): |
| 196 | if self.allows(urlparse.urlparse(url)[1]): |
| 197 | return True |
| 198 | msg = "\nLink to % s ***BLOCKED*** by --allow-hosts\n" |
| 199 | if fatal: |
| 200 | raise DistutilsError(msg % url) |
| 201 | else: |
| 202 | self.warn(msg, url) |
| 203 | |
| 204 | |
| 205 | |
| 206 | def process_index(self,url,page): |
| 207 | """Process the contents of a PyPI page""" |
| 208 | def scan(link): |
| 209 | # Process a URL to see if it's for a package page |
| 210 | if link.startswith(self.index_url): |
| 211 | parts = map( |
| 212 | urllib2.unquote, link[len(self.index_url):].split('/') |
| 213 | ) |
| 214 | if len(parts)==2: |
| 215 | # it's a package page, sanitize and index it |
| 216 | pkg = safe_name(parts[0]) |
| 217 | ver = safe_version(parts[1]) |
| 218 | self.package_pages.setdefault(pkg.lower(),{})[link] = True |
| 219 | return to_filename(pkg), to_filename(ver) |
| 220 | return None, None |
| 221 | |
| 222 | if url==self.index_url or 'Index of Packages</title>' in page: |
| 223 | # process an index page into the package-page index |
| 224 | for match in HREF.finditer(page): |
| 225 | scan( urlparse.urljoin(url, match.group(1)) ) |
| 226 | else: |
| 227 | pkg,ver = scan(url) # ensure this page is in the page index |
| 228 | # process individual package page |
| 229 | for tag in ("<th>Home Page", "<th>Download URL"): |
| 230 | pos = page.find(tag) |
| 231 | if pos!=-1: |
| 232 | match = HREF.search(page,pos) |
| 233 | if match: |
| 234 | # Process the found URL |
| 235 | new_url = urlparse.urljoin(url, match.group(1)) |
| 236 | base, frag = egg_info_for_url(new_url) |
| 237 | if base.endswith('.py') and not frag: |
| 238 | if pkg and ver: |
| 239 | new_url+='#egg=%s-%s' % (pkg,ver) |
| 240 | else: |
| 241 | self.need_version_info(url) |
| 242 | self.scan_url(new_url) |
| 243 | return PYPI_MD5.sub( |
| 244 | lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page |
| 245 | ) |
| 246 | |
| 247 | def need_version_info(self, url): |
| 248 | self.scan_all( |
| 249 | "Page at %s links to .py file(s) without version info; an index " |
| 250 | "scan is required.", url |
| 251 | ) |
| 252 | |
| 253 | def scan_all(self, msg=None, *args): |
| 254 | if self.index_url not in self.fetched_urls: |
| 255 | if msg: self.warn(msg,*args) |
| 256 | self.warn( |
| 257 | "Scanning index of all packages (this may take a while)" |
| 258 | ) |
| 259 | self.scan_url(self.index_url) |
| 260 | |
| 261 | def find_packages(self, requirement): |
| 262 | self.scan_url(self.index_url + requirement.unsafe_name+'/') |
Tim Peters | 584b0e0 | 2006-04-18 17:32:12 +0000 | [diff] [blame^] | 263 | |
Phillip J. Eby | 069159b | 2006-04-18 04:05:34 +0000 | [diff] [blame] | 264 | if not self.package_pages.get(requirement.key): |
| 265 | # Fall back to safe version of the name |
| 266 | self.scan_url(self.index_url + requirement.project_name+'/') |
| 267 | |
| 268 | if not self.package_pages.get(requirement.key): |
| 269 | # We couldn't find the target package, so search the index page too |
| 270 | self.warn( |
| 271 | "Couldn't find index page for %r (maybe misspelled?)", |
| 272 | requirement.unsafe_name |
| 273 | ) |
| 274 | self.scan_all() |
| 275 | |
| 276 | for url in self.package_pages.get(requirement.key,()): |
| 277 | # scan each page that might be related to the desired package |
| 278 | self.scan_url(url) |
| 279 | |
| 280 | def obtain(self, requirement, installer=None): |
| 281 | self.prescan(); self.find_packages(requirement) |
| 282 | for dist in self[requirement.key]: |
| 283 | if dist in requirement: |
| 284 | return dist |
| 285 | self.debug("%s does not match %s", requirement, dist) |
| 286 | return super(PackageIndex, self).obtain(requirement,installer) |
| 287 | |
| 288 | def check_md5(self, cs, info, filename, tfp): |
| 289 | if re.match('md5=[0-9a-f]{32}$', info): |
| 290 | self.debug("Validating md5 checksum for %s", filename) |
| 291 | if cs.hexdigest()<>info[4:]: |
| 292 | tfp.close() |
| 293 | os.unlink(filename) |
| 294 | raise DistutilsError( |
| 295 | "MD5 validation failed for "+os.path.basename(filename)+ |
| 296 | "; possible download problem?" |
| 297 | ) |
| 298 | |
| 299 | def add_find_links(self, urls): |
| 300 | """Add `urls` to the list that will be prescanned for searches""" |
| 301 | for url in urls: |
| 302 | if ( |
| 303 | self.to_scan is None # if we have already "gone online" |
| 304 | or not URL_SCHEME(url) # or it's a local file/directory |
| 305 | or url.startswith('file:') |
| 306 | or list(distros_for_url(url)) # or a direct package link |
| 307 | ): |
| 308 | # then go ahead and process it now |
| 309 | self.scan_url(url) |
| 310 | else: |
| 311 | # otherwise, defer retrieval till later |
| 312 | self.to_scan.append(url) |
| 313 | |
| 314 | def prescan(self): |
| 315 | """Scan urls scheduled for prescanning (e.g. --find-links)""" |
| 316 | if self.to_scan: |
| 317 | map(self.scan_url, self.to_scan) |
| 318 | self.to_scan = None # from now on, go ahead and process immediately |
| 319 | |
| 320 | |
| 321 | |
| 322 | |
| 323 | |
| 324 | |
| 325 | |
| 326 | |
| 327 | |
| 328 | |
| 329 | def download(self, spec, tmpdir): |
| 330 | """Locate and/or download `spec` to `tmpdir`, returning a local path |
| 331 | |
| 332 | `spec` may be a ``Requirement`` object, or a string containing a URL, |
| 333 | an existing local filename, or a project/version requirement spec |
| 334 | (i.e. the string form of a ``Requirement`` object). If it is the URL |
| 335 | of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one |
| 336 | that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is |
| 337 | automatically created alongside the downloaded file. |
| 338 | |
| 339 | If `spec` is a ``Requirement`` object or a string containing a |
| 340 | project/version requirement spec, this method returns the location of |
| 341 | a matching distribution (possibly after downloading it to `tmpdir`). |
| 342 | If `spec` is a locally existing file or directory name, it is simply |
| 343 | returned unchanged. If `spec` is a URL, it is downloaded to a subpath |
| 344 | of `tmpdir`, and the local filename is returned. Various errors may be |
| 345 | raised if a problem occurs during downloading. |
| 346 | """ |
| 347 | if not isinstance(spec,Requirement): |
| 348 | scheme = URL_SCHEME(spec) |
| 349 | if scheme: |
| 350 | # It's a url, download it to tmpdir |
| 351 | found = self._download_url(scheme.group(1), spec, tmpdir) |
| 352 | base, fragment = egg_info_for_url(spec) |
| 353 | if base.endswith('.py'): |
| 354 | found = self.gen_setup(found,fragment,tmpdir) |
| 355 | return found |
| 356 | elif os.path.exists(spec): |
| 357 | # Existing file or directory, just return it |
| 358 | return spec |
| 359 | else: |
| 360 | try: |
| 361 | spec = Requirement.parse(spec) |
| 362 | except ValueError: |
| 363 | raise DistutilsError( |
| 364 | "Not a URL, existing file, or requirement spec: %r" % |
| 365 | (spec,) |
| 366 | ) |
| 367 | return getattr(self.fetch_distribution(spec, tmpdir),'location',None) |
| 368 | |
| 369 | |
| 370 | def fetch_distribution(self, |
| 371 | requirement, tmpdir, force_scan=False, source=False, develop_ok=False |
| 372 | ): |
| 373 | """Obtain a distribution suitable for fulfilling `requirement` |
| 374 | |
| 375 | `requirement` must be a ``pkg_resources.Requirement`` instance. |
| 376 | If necessary, or if the `force_scan` flag is set, the requirement is |
| 377 | searched for in the (online) package index as well as the locally |
| 378 | installed packages. If a distribution matching `requirement` is found, |
| 379 | the returned distribution's ``location`` is the value you would have |
| 380 | gotten from calling the ``download()`` method with the matching |
| 381 | distribution's URL or filename. If no matching distribution is found, |
| 382 | ``None`` is returned. |
| 383 | |
| 384 | If the `source` flag is set, only source distributions and source |
| 385 | checkout links will be considered. Unless the `develop_ok` flag is |
| 386 | set, development and system eggs (i.e., those using the ``.egg-info`` |
| 387 | format) will be ignored. |
| 388 | """ |
| 389 | |
| 390 | # process a Requirement |
| 391 | self.info("Searching for %s", requirement) |
| 392 | skipped = {} |
| 393 | |
| 394 | def find(req): |
| 395 | # Find a matching distribution; may be called more than once |
| 396 | |
| 397 | for dist in self[req.key]: |
| 398 | |
| 399 | if dist.precedence==DEVELOP_DIST and not develop_ok: |
| 400 | if dist not in skipped: |
| 401 | self.warn("Skipping development or system egg: %s",dist) |
| 402 | skipped[dist] = 1 |
| 403 | continue |
| 404 | |
| 405 | if dist in req and (dist.precedence<=SOURCE_DIST or not source): |
| 406 | self.info("Best match: %s", dist) |
| 407 | return dist.clone( |
| 408 | location=self.download(dist.location, tmpdir) |
| 409 | ) |
| 410 | |
| 411 | if force_scan: |
| 412 | self.prescan() |
| 413 | self.find_packages(requirement) |
| 414 | |
| 415 | dist = find(requirement) |
| 416 | if dist is None and self.to_scan is not None: |
| 417 | self.prescan() |
| 418 | dist = find(requirement) |
| 419 | |
| 420 | if dist is None and not force_scan: |
| 421 | self.find_packages(requirement) |
| 422 | dist = find(requirement) |
| 423 | |
| 424 | if dist is None: |
| 425 | self.warn( |
| 426 | "No local packages or download links found for %s%s", |
| 427 | (source and "a source distribution of " or ""), |
| 428 | requirement, |
| 429 | ) |
| 430 | return dist |
| 431 | |
| 432 | def fetch(self, requirement, tmpdir, force_scan=False, source=False): |
| 433 | """Obtain a file suitable for fulfilling `requirement` |
| 434 | |
| 435 | DEPRECATED; use the ``fetch_distribution()`` method now instead. For |
| 436 | backward compatibility, this routine is identical but returns the |
| 437 | ``location`` of the downloaded distribution instead of a distribution |
| 438 | object. |
| 439 | """ |
| 440 | dist = self.fetch_distribution(requirement,tmpdir,force_scan,source) |
| 441 | if dist is not None: |
| 442 | return dist.location |
| 443 | return None |
| 444 | |
| 445 | |
| 446 | |
| 447 | |
| 448 | |
| 449 | |
| 450 | |
| 451 | |
| 452 | def gen_setup(self, filename, fragment, tmpdir): |
| 453 | match = EGG_FRAGMENT.match(fragment); #import pdb; pdb.set_trace() |
| 454 | dists = match and [d for d in |
| 455 | interpret_distro_name(filename, match.group(1), None) if d.version |
| 456 | ] or [] |
| 457 | |
| 458 | if len(dists)==1: # unambiguous ``#egg`` fragment |
| 459 | basename = os.path.basename(filename) |
| 460 | |
| 461 | # Make sure the file has been downloaded to the temp dir. |
| 462 | if os.path.dirname(filename) != tmpdir: |
| 463 | dst = os.path.join(tmpdir, basename) |
| 464 | from setuptools.command.easy_install import samefile |
| 465 | if not samefile(filename, dst): |
| 466 | shutil.copy2(filename, dst) |
| 467 | filename=dst |
| 468 | |
| 469 | file = open(os.path.join(tmpdir, 'setup.py'), 'w') |
| 470 | file.write( |
| 471 | "from setuptools import setup\n" |
| 472 | "setup(name=%r, version=%r, py_modules=[%r])\n" |
| 473 | % ( |
| 474 | dists[0].project_name, dists[0].version, |
| 475 | os.path.splitext(basename)[0] |
| 476 | ) |
| 477 | ) |
| 478 | file.close() |
| 479 | return filename |
| 480 | |
| 481 | elif match: |
| 482 | raise DistutilsError( |
| 483 | "Can't unambiguously interpret project/version identifier %r; " |
| 484 | "any dashes in the name or version should be escaped using " |
| 485 | "underscores. %r" % (fragment,dists) |
| 486 | ) |
| 487 | else: |
| 488 | raise DistutilsError( |
| 489 | "Can't process plain .py files without an '#egg=name-version'" |
| 490 | " suffix to enable automatic setup script generation." |
| 491 | ) |
Tim Peters | 584b0e0 | 2006-04-18 17:32:12 +0000 | [diff] [blame^] | 492 | |
Phillip J. Eby | 069159b | 2006-04-18 04:05:34 +0000 | [diff] [blame] | 493 | dl_blocksize = 8192 |
| 494 | def _download_to(self, url, filename): |
| 495 | self.url_ok(url,True) # raises error if not allowed |
| 496 | self.info("Downloading %s", url) |
| 497 | # Download the file |
| 498 | fp, tfp, info = None, None, None |
| 499 | try: |
| 500 | if '#' in url: |
| 501 | url, info = url.split('#', 1) |
| 502 | fp = self.open_url(url) |
| 503 | if isinstance(fp, urllib2.HTTPError): |
| 504 | raise DistutilsError( |
| 505 | "Can't download %s: %s %s" % (url, fp.code,fp.msg) |
| 506 | ) |
| 507 | cs = md5() |
| 508 | headers = fp.info() |
| 509 | blocknum = 0 |
| 510 | bs = self.dl_blocksize |
| 511 | size = -1 |
| 512 | if "content-length" in headers: |
| 513 | size = int(headers["Content-Length"]) |
| 514 | self.reporthook(url, filename, blocknum, bs, size) |
| 515 | tfp = open(filename,'wb') |
| 516 | while True: |
| 517 | block = fp.read(bs) |
| 518 | if block: |
| 519 | cs.update(block) |
| 520 | tfp.write(block) |
| 521 | blocknum += 1 |
| 522 | self.reporthook(url, filename, blocknum, bs, size) |
| 523 | else: |
| 524 | break |
| 525 | if info: self.check_md5(cs, info, filename, tfp) |
| 526 | return headers |
| 527 | finally: |
| 528 | if fp: fp.close() |
| 529 | if tfp: tfp.close() |
| 530 | |
| 531 | def reporthook(self, url, filename, blocknum, blksize, size): |
| 532 | pass # no-op |
| 533 | |
| 534 | def retry_sf_download(self, url, filename): |
| 535 | try: |
| 536 | return self._download_to(url, filename) |
| 537 | except: |
| 538 | scheme, server, path, param, query, frag = urlparse.urlparse(url) |
| 539 | if server!='dl.sourceforge.net': |
| 540 | raise |
| 541 | |
| 542 | mirror = get_sf_ip() |
| 543 | |
| 544 | while _sf_mirrors: |
| 545 | self.warn("Download failed: %s", sys.exc_info()[1]) |
| 546 | url = urlparse.urlunparse((scheme, mirror, path, param, '', frag)) |
| 547 | try: |
| 548 | return self._download_to(url, filename) |
| 549 | except: |
| 550 | _sf_mirrors.remove(mirror) # don't retry the same mirror |
| 551 | mirror = get_sf_ip() |
| 552 | |
| 553 | raise # fail if no mirror works |
| 554 | |
| 555 | |
| 556 | |
| 557 | |
| 558 | |
| 559 | |
| 560 | |
| 561 | |
| 562 | |
| 563 | |
| 564 | |
| 565 | |
| 566 | |
| 567 | |
| 568 | |
| 569 | |
| 570 | |
| 571 | |
| 572 | |
| 573 | |
| 574 | |
| 575 | def open_url(self, url): |
| 576 | try: |
| 577 | return urllib2.urlopen(url) |
| 578 | except urllib2.HTTPError, v: |
| 579 | return v |
| 580 | except urllib2.URLError, v: |
| 581 | raise DistutilsError("Download error: %s" % v.reason) |
| 582 | |
| 583 | |
| 584 | def _download_url(self, scheme, url, tmpdir): |
| 585 | |
| 586 | # Determine download filename |
| 587 | # |
| 588 | name = filter(None,urlparse.urlparse(url)[2].split('/')) |
| 589 | if name: |
| 590 | name = name[-1] |
| 591 | while '..' in name: |
| 592 | name = name.replace('..','.').replace('\\','_') |
| 593 | else: |
| 594 | name = "__downloaded__" # default if URL has no path contents |
| 595 | |
| 596 | if name.endswith('.egg.zip'): |
| 597 | name = name[:-4] # strip the extra .zip before download |
| 598 | |
| 599 | filename = os.path.join(tmpdir,name) |
| 600 | |
| 601 | # Download the file |
| 602 | # |
| 603 | if scheme=='svn' or scheme.startswith('svn+'): |
| 604 | return self._download_svn(url, filename) |
| 605 | else: |
| 606 | headers = self.retry_sf_download(url, filename) |
| 607 | if 'html' in headers['content-type'].lower(): |
| 608 | return self._download_html(url, headers, filename, tmpdir) |
| 609 | else: |
| 610 | return filename |
| 611 | |
| 612 | def scan_url(self, url): |
| 613 | self.process_url(url, True) |
| 614 | |
| 615 | |
| 616 | def _download_html(self, url, headers, filename, tmpdir): |
| 617 | file = open(filename) |
| 618 | for line in file: |
| 619 | if line.strip(): |
| 620 | # Check for a subversion index page |
| 621 | if re.search(r'<title>Revision \d+:', line): |
| 622 | # it's a subversion index page: |
| 623 | file.close() |
| 624 | os.unlink(filename) |
| 625 | return self._download_svn(url, filename) |
| 626 | break # not an index page |
| 627 | file.close() |
| 628 | os.unlink(filename) |
| 629 | raise DistutilsError("Unexpected HTML page found at "+url) |
| 630 | |
| 631 | def _download_svn(self, url, filename): |
| 632 | url = url.split('#',1)[0] # remove any fragment for svn's sake |
| 633 | self.info("Doing subversion checkout from %s to %s", url, filename) |
| 634 | os.system("svn checkout -q %s %s" % (url, filename)) |
| 635 | return filename |
| 636 | |
| 637 | def debug(self, msg, *args): |
| 638 | log.debug(msg, *args) |
| 639 | |
| 640 | def info(self, msg, *args): |
| 641 | log.info(msg, *args) |
| 642 | |
| 643 | def warn(self, msg, *args): |
| 644 | log.warn(msg, *args) |
| 645 | |
| 646 | |
| 647 | |
| 648 | |
| 649 | |
| 650 | |
| 651 | |
| 652 | |
| 653 | |
| 654 | |
| 655 | |
| 656 | |
| 657 | def fix_sf_url(url): |
| 658 | scheme, server, path, param, query, frag = urlparse.urlparse(url) |
| 659 | if server!='prdownloads.sourceforge.net': |
| 660 | return url |
| 661 | return urlparse.urlunparse( |
| 662 | (scheme, 'dl.sourceforge.net', 'sourceforge'+path, param, '', frag) |
| 663 | ) |
| 664 | |
| 665 | _sf_mirrors = [] |
| 666 | |
| 667 | def get_sf_ip(): |
| 668 | if not _sf_mirrors: |
| 669 | try: |
| 670 | _sf_mirrors[:] = socket.gethostbyname_ex('dl.sourceforge.net')[-1] |
| 671 | except socket.error: |
| 672 | # DNS-bl0ck1n9 f1r3w4llz sUx0rs! |
| 673 | _sf_mirrors[:] = ['dl.sourceforge.net'] |
| 674 | return random.choice(_sf_mirrors) |