blob: 16510dffd843c0eed8d5d17bbda4e538ba90c670 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""Classes representing releases and distributions retrieved from indexes.
2
3A project (= unique name) can have several releases (= versions) and
4each release can have several distributions (= sdist and bdists).
5
6Release objects contain metadata-related information (see PEP 376);
7distribution objects contain download-related information.
8"""
9
10import sys
11import mimetypes
12import re
13import tempfile
14import urllib.request
15import urllib.parse
16import urllib.error
17import urllib.parse
18import hashlib
19from shutil import unpack_archive
20
21from packaging.errors import IrrationalVersionError
22from packaging.version import (suggest_normalized_version, NormalizedVersion,
23 get_version_predicate)
24from packaging.metadata import Metadata
25from packaging.pypi.errors import (HashDoesNotMatch, UnsupportedHashName,
26 CantParseArchiveName)
27
28
29__all__ = ['ReleaseInfo', 'DistInfo', 'ReleasesList', 'get_infos_from_url']
30
31EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz .egg".split()
32MD5_HASH = re.compile(r'^.*#md5=([a-f0-9]+)$')
33DIST_TYPES = ['bdist', 'sdist']
34
35
36class IndexReference:
37 """Mixin used to store the index reference"""
38 def set_index(self, index=None):
39 self._index = index
40
41
42class ReleaseInfo(IndexReference):
43 """Represent a release of a project (a project with a specific version).
44 The release contain the _metadata informations related to this specific
45 version, and is also a container for distribution related informations.
46
47 See the DistInfo class for more information about distributions.
48 """
49
50 def __init__(self, name, version, metadata=None, hidden=False,
51 index=None, **kwargs):
52 """
53 :param name: the name of the distribution
54 :param version: the version of the distribution
55 :param metadata: the metadata fields of the release.
56 :type metadata: dict
57 :param kwargs: optional arguments for a new distribution.
58 """
59 self.set_index(index)
60 self.name = name
61 self._version = None
62 self.version = version
63 if metadata:
64 self.metadata = Metadata(mapping=metadata)
65 else:
66 self.metadata = None
67 self.dists = {}
68 self.hidden = hidden
69
70 if 'dist_type' in kwargs:
71 dist_type = kwargs.pop('dist_type')
72 self.add_distribution(dist_type, **kwargs)
73
74 def set_version(self, version):
75 try:
76 self._version = NormalizedVersion(version)
77 except IrrationalVersionError:
78 suggestion = suggest_normalized_version(version)
79 if suggestion:
80 self.version = suggestion
81 else:
82 raise IrrationalVersionError(version)
83
84 def get_version(self):
85 return self._version
86
87 version = property(get_version, set_version)
88
89 def fetch_metadata(self):
90 """If the metadata is not set, use the indexes to get it"""
91 if not self.metadata:
92 self._index.get_metadata(self.name, str(self.version))
93 return self.metadata
94
95 @property
96 def is_final(self):
97 """proxy to version.is_final"""
98 return self.version.is_final
99
100 def fetch_distributions(self):
101 if self.dists is None:
102 self._index.get_distributions(self.name, str(self.version))
103 if self.dists is None:
104 self.dists = {}
105 return self.dists
106
107 def add_distribution(self, dist_type='sdist', python_version=None,
108 **params):
109 """Add distribution informations to this release.
110 If distribution information is already set for this distribution type,
111 add the given url paths to the distribution. This can be useful while
112 some of them fails to download.
113
114 :param dist_type: the distribution type (eg. "sdist", "bdist", etc.)
115 :param params: the fields to be passed to the distribution object
116 (see the :class:DistInfo constructor).
117 """
118 if dist_type not in DIST_TYPES:
119 raise ValueError(dist_type)
120 if dist_type in self.dists:
121 self.dists[dist_type].add_url(**params)
122 else:
123 self.dists[dist_type] = DistInfo(self, dist_type,
124 index=self._index, **params)
125 if python_version:
126 self.dists[dist_type].python_version = python_version
127
128 def get_distribution(self, dist_type=None, prefer_source=True):
129 """Return a distribution.
130
131 If dist_type is set, find first for this distribution type, and just
132 act as an alias of __get_item__.
133
134 If prefer_source is True, search first for source distribution, and if
135 not return one existing distribution.
136 """
137 if len(self.dists) == 0:
138 raise LookupError()
139 if dist_type:
140 return self[dist_type]
141 if prefer_source:
142 if "sdist" in self.dists:
143 dist = self["sdist"]
144 else:
145 dist = next(self.dists.values())
146 return dist
147
148 def unpack(self, path=None, prefer_source=True):
149 """Unpack the distribution to the given path.
150
151 If not destination is given, creates a temporary location.
152
153 Returns the location of the extracted files (root).
154 """
155 return self.get_distribution(prefer_source=prefer_source)\
156 .unpack(path=path)
157
158 def download(self, temp_path=None, prefer_source=True):
159 """Download the distribution, using the requirements.
160
161 If more than one distribution match the requirements, use the last
162 version.
163 Download the distribution, and put it in the temp_path. If no temp_path
164 is given, creates and return one.
165
166 Returns the complete absolute path to the downloaded archive.
167 """
168 return self.get_distribution(prefer_source=prefer_source)\
169 .download(path=temp_path)
170
171 def set_metadata(self, metadata):
172 if not self.metadata:
173 self.metadata = Metadata()
174 self.metadata.update(metadata)
175
176 def __getitem__(self, item):
177 """distributions are available using release["sdist"]"""
178 return self.dists[item]
179
180 def _check_is_comparable(self, other):
181 if not isinstance(other, ReleaseInfo):
182 raise TypeError("cannot compare %s and %s"
183 % (type(self).__name__, type(other).__name__))
184 elif self.name != other.name:
185 raise TypeError("cannot compare %s and %s"
186 % (self.name, other.name))
187
188 def __repr__(self):
189 return "<%s %s>" % (self.name, self.version)
190
191 def __eq__(self, other):
192 self._check_is_comparable(other)
193 return self.version == other.version
194
195 def __lt__(self, other):
196 self._check_is_comparable(other)
197 return self.version < other.version
198
199 def __ne__(self, other):
200 return not self.__eq__(other)
201
202 def __gt__(self, other):
203 return not (self.__lt__(other) or self.__eq__(other))
204
205 def __le__(self, other):
206 return self.__eq__(other) or self.__lt__(other)
207
208 def __ge__(self, other):
209 return self.__eq__(other) or self.__gt__(other)
210
211 # See http://docs.python.org/reference/datamodel#object.__hash__
212 __hash__ = object.__hash__
213
214
215class DistInfo(IndexReference):
216 """Represents a distribution retrieved from an index (sdist, bdist, ...)
217 """
218
219 def __init__(self, release, dist_type=None, url=None, hashname=None,
220 hashval=None, is_external=True, python_version=None,
221 index=None):
222 """Create a new instance of DistInfo.
223
224 :param release: a DistInfo class is relative to a release.
225 :param dist_type: the type of the dist (eg. source, bin-*, etc.)
226 :param url: URL where we found this distribution
227 :param hashname: the name of the hash we want to use. Refer to the
228 hashlib.new documentation for more information.
229 :param hashval: the hash value.
230 :param is_external: we need to know if the provided url comes from
231 an index browsing, or from an external resource.
232
233 """
234 self.set_index(index)
235 self.release = release
236 self.dist_type = dist_type
237 self.python_version = python_version
238 self._unpacked_dir = None
239 # set the downloaded path to None by default. The goal here
240 # is to not download distributions multiple times
241 self.downloaded_location = None
242 # We store urls in dict, because we need to have a bit more infos
243 # than the simple URL. It will be used later to find the good url to
244 # use.
245 # We have two _url* attributes: _url and urls. urls contains a list
246 # of dict for the different urls, and _url contains the choosen url, in
247 # order to dont make the selection process multiple times.
248 self.urls = []
249 self._url = None
250 self.add_url(url, hashname, hashval, is_external)
251
252 def add_url(self, url=None, hashname=None, hashval=None, is_external=True):
253 """Add a new url to the list of urls"""
254 if hashname is not None:
255 try:
256 hashlib.new(hashname)
257 except ValueError:
258 raise UnsupportedHashName(hashname)
259 if not url in [u['url'] for u in self.urls]:
260 self.urls.append({
261 'url': url,
262 'hashname': hashname,
263 'hashval': hashval,
264 'is_external': is_external,
265 })
266 # reset the url selection process
267 self._url = None
268
269 @property
270 def url(self):
271 """Pick up the right url for the list of urls in self.urls"""
272 # We return internal urls over externals.
273 # If there is more than one internal or external, return the first
274 # one.
275 if self._url is None:
276 if len(self.urls) > 1:
277 internals_urls = [u for u in self.urls \
278 if u['is_external'] == False]
279 if len(internals_urls) >= 1:
280 self._url = internals_urls[0]
281 if self._url is None:
282 self._url = self.urls[0]
283 return self._url
284
285 @property
286 def is_source(self):
287 """return if the distribution is a source one or not"""
288 return self.dist_type == 'sdist'
289
290 def download(self, path=None):
291 """Download the distribution to a path, and return it.
292
293 If the path is given in path, use this, otherwise, generates a new one
294 Return the download location.
295 """
296 if path is None:
297 path = tempfile.mkdtemp()
298
299 # if we do not have downloaded it yet, do it.
300 if self.downloaded_location is None:
301 url = self.url['url']
302 archive_name = urllib.parse.urlparse(url)[2].split('/')[-1]
303 filename, headers = urllib.request.urlretrieve(url,
304 path + "/" + archive_name)
305 self.downloaded_location = filename
306 self._check_md5(filename)
307 return self.downloaded_location
308
309 def unpack(self, path=None):
310 """Unpack the distribution to the given path.
311
312 If not destination is given, creates a temporary location.
313
314 Returns the location of the extracted files (root).
315 """
316 if not self._unpacked_dir:
317 if path is None:
318 path = tempfile.mkdtemp()
319
320 filename = self.download(path)
321 content_type = mimetypes.guess_type(filename)[0]
322 unpack_archive(filename, path)
323 self._unpacked_dir = path
324
325 return path
326
327 def _check_md5(self, filename):
328 """Check that the md5 checksum of the given file matches the one in
329 url param"""
330 hashname = self.url['hashname']
331 expected_hashval = self.url['hashval']
332 if not None in (expected_hashval, hashname):
333 with open(filename, 'rb') as f:
334 hashval = hashlib.new(hashname)
335 hashval.update(f.read())
336
337 if hashval.hexdigest() != expected_hashval:
338 raise HashDoesNotMatch("got %s instead of %s"
339 % (hashval.hexdigest(), expected_hashval))
340
341 def __repr__(self):
342 if self.release is None:
343 return "<? ? %s>" % self.dist_type
344
345 return "<%s %s %s>" % (
346 self.release.name, self.release.version, self.dist_type or "")
347
348
349class ReleasesList(IndexReference):
350 """A container of Release.
351
352 Provides useful methods and facilities to sort and filter releases.
353 """
354 def __init__(self, name, releases=None, contains_hidden=False, index=None):
355 self.set_index(index)
356 self.releases = []
357 self.name = name
358 self.contains_hidden = contains_hidden
359 if releases:
360 self.add_releases(releases)
361
362 def fetch_releases(self):
363 self._index.get_releases(self.name)
364 return self.releases
365
366 def filter(self, predicate):
367 """Filter and return a subset of releases matching the given predicate.
368 """
369 return ReleasesList(self.name, [release for release in self.releases
370 if predicate.match(release.version)],
371 index=self._index)
372
373 def get_last(self, requirements, prefer_final=None):
374 """Return the "last" release, that satisfy the given predicates.
375
376 "last" is defined by the version number of the releases, you also could
377 set prefer_final parameter to True or False to change the order results
378 """
379 predicate = get_version_predicate(requirements)
380 releases = self.filter(predicate)
381 if len(releases) == 0:
382 return None
383 releases.sort_releases(prefer_final, reverse=True)
384 return releases[0]
385
386 def add_releases(self, releases):
387 """Add releases in the release list.
388
389 :param: releases is a list of ReleaseInfo objects.
390 """
391 for r in releases:
392 self.add_release(release=r)
393
394 def add_release(self, version=None, dist_type='sdist', release=None,
395 **dist_args):
396 """Add a release to the list.
397
398 The release can be passed in the `release` parameter, and in this case,
399 it will be crawled to extract the useful informations if necessary, or
400 the release informations can be directly passed in the `version` and
401 `dist_type` arguments.
402
403 Other keywords arguments can be provided, and will be forwarded to the
404 distribution creation (eg. the arguments of the DistInfo constructor).
405 """
406 if release:
407 if release.name.lower() != self.name.lower():
408 raise ValueError("%s is not the same project as %s" %
409 (release.name, self.name))
410 version = str(release.version)
411
412 if not version in self.get_versions():
413 # append only if not already exists
414 self.releases.append(release)
415 for dist in release.dists.values():
416 for url in dist.urls:
417 self.add_release(version, dist.dist_type, **url)
418 else:
419 matches = [r for r in self.releases
420 if str(r.version) == version and r.name == self.name]
421 if not matches:
422 release = ReleaseInfo(self.name, version, index=self._index)
423 self.releases.append(release)
424 else:
425 release = matches[0]
426
427 release.add_distribution(dist_type=dist_type, **dist_args)
428
429 def sort_releases(self, prefer_final=False, reverse=True, *args, **kwargs):
430 """Sort the results with the given properties.
431
432 The `prefer_final` argument can be used to specify if final
433 distributions (eg. not dev, bet or alpha) would be prefered or not.
434
435 Results can be inverted by using `reverse`.
436
437 Any other parameter provided will be forwarded to the sorted call. You
438 cannot redefine the key argument of "sorted" here, as it is used
439 internally to sort the releases.
440 """
441
442 sort_by = []
443 if prefer_final:
444 sort_by.append("is_final")
445 sort_by.append("version")
446
447 self.releases.sort(
448 key=lambda i: tuple(getattr(i, arg) for arg in sort_by),
449 reverse=reverse, *args, **kwargs)
450
451 def get_release(self, version):
452 """Return a release from its version."""
453 matches = [r for r in self.releases if str(r.version) == version]
454 if len(matches) != 1:
455 raise KeyError(version)
456 return matches[0]
457
458 def get_versions(self):
459 """Return a list of releases versions contained"""
460 return [str(r.version) for r in self.releases]
461
462 def __getitem__(self, key):
463 return self.releases[key]
464
465 def __len__(self):
466 return len(self.releases)
467
468 def __repr__(self):
469 string = 'Project "%s"' % self.name
470 if self.get_versions():
471 string += ' versions: %s' % ', '.join(self.get_versions())
472 return '<%s>' % string
473
474
475def get_infos_from_url(url, probable_dist_name=None, is_external=True):
476 """Get useful informations from an URL.
477
478 Return a dict of (name, version, url, hashtype, hash, is_external)
479
480 :param url: complete url of the distribution
481 :param probable_dist_name: A probable name of the project.
482 :param is_external: Tell if the url commes from an index or from
483 an external URL.
484 """
485 # if the url contains a md5 hash, get it.
486 md5_hash = None
487 match = MD5_HASH.match(url)
488 if match is not None:
489 md5_hash = match.group(1)
490 # remove the hash
491 url = url.replace("#md5=%s" % md5_hash, "")
492
493 # parse the archive name to find dist name and version
494 archive_name = urllib.parse.urlparse(url)[2].split('/')[-1]
495 extension_matched = False
496 # remove the extension from the name
497 for ext in EXTENSIONS:
498 if archive_name.endswith(ext):
499 archive_name = archive_name[:-len(ext)]
500 extension_matched = True
501
502 name, version = split_archive_name(archive_name)
503 if extension_matched is True:
504 return {'name': name,
505 'version': version,
506 'url': url,
507 'hashname': "md5",
508 'hashval': md5_hash,
509 'is_external': is_external,
510 'dist_type': 'sdist'}
511
512
513def split_archive_name(archive_name, probable_name=None):
514 """Split an archive name into two parts: name and version.
515
516 Return the tuple (name, version)
517 """
518 # Try to determine wich part is the name and wich is the version using the
519 # "-" separator. Take the larger part to be the version number then reduce
520 # if this not works.
521 def eager_split(str, maxsplit=2):
522 # split using the "-" separator
523 splits = str.rsplit("-", maxsplit)
524 name = splits[0]
525 version = "-".join(splits[1:])
526 if version.startswith("-"):
527 version = version[1:]
528 if suggest_normalized_version(version) is None and maxsplit >= 0:
529 # we dont get a good version number: recurse !
530 return eager_split(str, maxsplit - 1)
531 else:
532 return name, version
533 if probable_name is not None:
534 probable_name = probable_name.lower()
535 name = None
536 if probable_name is not None and probable_name in archive_name:
537 # we get the name from probable_name, if given.
538 name = probable_name
539 version = archive_name.lstrip(name)
540 else:
541 name, version = eager_split(archive_name)
542
543 version = suggest_normalized_version(version)
544 if version is not None and name != "":
545 return name.lower(), version
546 else:
547 raise CantParseArchiveName(archive_name)