blob: dbf64592730eb55a31e4f603fa8d6ea3507c586b [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""Classes representing releases and distributions retrieved from indexes.
2
3A project (= unique name) can have several releases (= versions) and
4each release can have several distributions (= sdist and bdists).
5
6Release objects contain metadata-related information (see PEP 376);
7distribution objects contain download-related information.
8"""
9
Tarek Ziade1231a4e2011-05-19 13:07:25 +020010import re
Éric Araujo3e85e542011-06-16 23:50:17 +020011import hashlib
Tarek Ziade1231a4e2011-05-19 13:07:25 +020012import tempfile
13import urllib.request
14import urllib.parse
15import urllib.error
16import urllib.parse
Tarek Ziade1231a4e2011-05-19 13:07:25 +020017from shutil import unpack_archive
18
19from packaging.errors import IrrationalVersionError
20from packaging.version import (suggest_normalized_version, NormalizedVersion,
21 get_version_predicate)
22from packaging.metadata import Metadata
23from packaging.pypi.errors import (HashDoesNotMatch, UnsupportedHashName,
24 CantParseArchiveName)
25
26
27__all__ = ['ReleaseInfo', 'DistInfo', 'ReleasesList', 'get_infos_from_url']
28
29EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz .egg".split()
30MD5_HASH = re.compile(r'^.*#md5=([a-f0-9]+)$')
31DIST_TYPES = ['bdist', 'sdist']
32
33
34class IndexReference:
35 """Mixin used to store the index reference"""
36 def set_index(self, index=None):
37 self._index = index
38
39
40class ReleaseInfo(IndexReference):
41 """Represent a release of a project (a project with a specific version).
42 The release contain the _metadata informations related to this specific
43 version, and is also a container for distribution related informations.
44
45 See the DistInfo class for more information about distributions.
46 """
47
48 def __init__(self, name, version, metadata=None, hidden=False,
49 index=None, **kwargs):
50 """
51 :param name: the name of the distribution
52 :param version: the version of the distribution
53 :param metadata: the metadata fields of the release.
54 :type metadata: dict
55 :param kwargs: optional arguments for a new distribution.
56 """
57 self.set_index(index)
58 self.name = name
59 self._version = None
60 self.version = version
61 if metadata:
62 self.metadata = Metadata(mapping=metadata)
63 else:
64 self.metadata = None
65 self.dists = {}
66 self.hidden = hidden
67
68 if 'dist_type' in kwargs:
69 dist_type = kwargs.pop('dist_type')
70 self.add_distribution(dist_type, **kwargs)
71
72 def set_version(self, version):
73 try:
74 self._version = NormalizedVersion(version)
75 except IrrationalVersionError:
76 suggestion = suggest_normalized_version(version)
77 if suggestion:
78 self.version = suggestion
79 else:
80 raise IrrationalVersionError(version)
81
82 def get_version(self):
83 return self._version
84
85 version = property(get_version, set_version)
86
87 def fetch_metadata(self):
88 """If the metadata is not set, use the indexes to get it"""
89 if not self.metadata:
90 self._index.get_metadata(self.name, str(self.version))
91 return self.metadata
92
93 @property
94 def is_final(self):
95 """proxy to version.is_final"""
96 return self.version.is_final
97
98 def fetch_distributions(self):
99 if self.dists is None:
100 self._index.get_distributions(self.name, str(self.version))
101 if self.dists is None:
102 self.dists = {}
103 return self.dists
104
105 def add_distribution(self, dist_type='sdist', python_version=None,
106 **params):
107 """Add distribution informations to this release.
108 If distribution information is already set for this distribution type,
109 add the given url paths to the distribution. This can be useful while
110 some of them fails to download.
111
112 :param dist_type: the distribution type (eg. "sdist", "bdist", etc.)
113 :param params: the fields to be passed to the distribution object
114 (see the :class:DistInfo constructor).
115 """
116 if dist_type not in DIST_TYPES:
117 raise ValueError(dist_type)
118 if dist_type in self.dists:
119 self.dists[dist_type].add_url(**params)
120 else:
121 self.dists[dist_type] = DistInfo(self, dist_type,
122 index=self._index, **params)
123 if python_version:
124 self.dists[dist_type].python_version = python_version
125
126 def get_distribution(self, dist_type=None, prefer_source=True):
127 """Return a distribution.
128
129 If dist_type is set, find first for this distribution type, and just
130 act as an alias of __get_item__.
131
132 If prefer_source is True, search first for source distribution, and if
133 not return one existing distribution.
134 """
135 if len(self.dists) == 0:
Éric Araujo2ef747c2011-06-04 22:33:16 +0200136 raise LookupError
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200137 if dist_type:
138 return self[dist_type]
139 if prefer_source:
140 if "sdist" in self.dists:
141 dist = self["sdist"]
142 else:
143 dist = next(self.dists.values())
144 return dist
145
146 def unpack(self, path=None, prefer_source=True):
147 """Unpack the distribution to the given path.
148
149 If not destination is given, creates a temporary location.
150
151 Returns the location of the extracted files (root).
152 """
153 return self.get_distribution(prefer_source=prefer_source)\
154 .unpack(path=path)
155
156 def download(self, temp_path=None, prefer_source=True):
157 """Download the distribution, using the requirements.
158
159 If more than one distribution match the requirements, use the last
160 version.
161 Download the distribution, and put it in the temp_path. If no temp_path
162 is given, creates and return one.
163
164 Returns the complete absolute path to the downloaded archive.
165 """
166 return self.get_distribution(prefer_source=prefer_source)\
167 .download(path=temp_path)
168
169 def set_metadata(self, metadata):
170 if not self.metadata:
171 self.metadata = Metadata()
172 self.metadata.update(metadata)
173
174 def __getitem__(self, item):
175 """distributions are available using release["sdist"]"""
176 return self.dists[item]
177
178 def _check_is_comparable(self, other):
179 if not isinstance(other, ReleaseInfo):
180 raise TypeError("cannot compare %s and %s"
181 % (type(self).__name__, type(other).__name__))
182 elif self.name != other.name:
183 raise TypeError("cannot compare %s and %s"
184 % (self.name, other.name))
185
186 def __repr__(self):
187 return "<%s %s>" % (self.name, self.version)
188
189 def __eq__(self, other):
190 self._check_is_comparable(other)
191 return self.version == other.version
192
193 def __lt__(self, other):
194 self._check_is_comparable(other)
195 return self.version < other.version
196
197 def __ne__(self, other):
198 return not self.__eq__(other)
199
200 def __gt__(self, other):
201 return not (self.__lt__(other) or self.__eq__(other))
202
203 def __le__(self, other):
204 return self.__eq__(other) or self.__lt__(other)
205
206 def __ge__(self, other):
207 return self.__eq__(other) or self.__gt__(other)
208
209 # See http://docs.python.org/reference/datamodel#object.__hash__
210 __hash__ = object.__hash__
211
212
213class DistInfo(IndexReference):
214 """Represents a distribution retrieved from an index (sdist, bdist, ...)
215 """
216
217 def __init__(self, release, dist_type=None, url=None, hashname=None,
218 hashval=None, is_external=True, python_version=None,
219 index=None):
220 """Create a new instance of DistInfo.
221
222 :param release: a DistInfo class is relative to a release.
223 :param dist_type: the type of the dist (eg. source, bin-*, etc.)
224 :param url: URL where we found this distribution
225 :param hashname: the name of the hash we want to use. Refer to the
226 hashlib.new documentation for more information.
227 :param hashval: the hash value.
228 :param is_external: we need to know if the provided url comes from
229 an index browsing, or from an external resource.
230
231 """
232 self.set_index(index)
233 self.release = release
234 self.dist_type = dist_type
235 self.python_version = python_version
236 self._unpacked_dir = None
237 # set the downloaded path to None by default. The goal here
238 # is to not download distributions multiple times
239 self.downloaded_location = None
240 # We store urls in dict, because we need to have a bit more infos
241 # than the simple URL. It will be used later to find the good url to
242 # use.
243 # We have two _url* attributes: _url and urls. urls contains a list
244 # of dict for the different urls, and _url contains the choosen url, in
245 # order to dont make the selection process multiple times.
246 self.urls = []
247 self._url = None
248 self.add_url(url, hashname, hashval, is_external)
249
250 def add_url(self, url=None, hashname=None, hashval=None, is_external=True):
251 """Add a new url to the list of urls"""
252 if hashname is not None:
253 try:
254 hashlib.new(hashname)
255 except ValueError:
256 raise UnsupportedHashName(hashname)
Éric Araujodf8ef022011-06-08 04:47:13 +0200257 if url not in [u['url'] for u in self.urls]:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200258 self.urls.append({
259 'url': url,
260 'hashname': hashname,
261 'hashval': hashval,
262 'is_external': is_external,
263 })
264 # reset the url selection process
265 self._url = None
266
267 @property
268 def url(self):
269 """Pick up the right url for the list of urls in self.urls"""
270 # We return internal urls over externals.
271 # If there is more than one internal or external, return the first
272 # one.
273 if self._url is None:
274 if len(self.urls) > 1:
275 internals_urls = [u for u in self.urls \
276 if u['is_external'] == False]
277 if len(internals_urls) >= 1:
278 self._url = internals_urls[0]
279 if self._url is None:
280 self._url = self.urls[0]
281 return self._url
282
283 @property
284 def is_source(self):
285 """return if the distribution is a source one or not"""
286 return self.dist_type == 'sdist'
287
288 def download(self, path=None):
289 """Download the distribution to a path, and return it.
290
291 If the path is given in path, use this, otherwise, generates a new one
292 Return the download location.
293 """
294 if path is None:
295 path = tempfile.mkdtemp()
296
297 # if we do not have downloaded it yet, do it.
298 if self.downloaded_location is None:
299 url = self.url['url']
300 archive_name = urllib.parse.urlparse(url)[2].split('/')[-1]
301 filename, headers = urllib.request.urlretrieve(url,
302 path + "/" + archive_name)
303 self.downloaded_location = filename
304 self._check_md5(filename)
305 return self.downloaded_location
306
307 def unpack(self, path=None):
308 """Unpack the distribution to the given path.
309
310 If not destination is given, creates a temporary location.
311
312 Returns the location of the extracted files (root).
313 """
314 if not self._unpacked_dir:
315 if path is None:
316 path = tempfile.mkdtemp()
317
318 filename = self.download(path)
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200319 unpack_archive(filename, path)
320 self._unpacked_dir = path
321
322 return path
323
324 def _check_md5(self, filename):
325 """Check that the md5 checksum of the given file matches the one in
326 url param"""
327 hashname = self.url['hashname']
328 expected_hashval = self.url['hashval']
Éric Araujodf8ef022011-06-08 04:47:13 +0200329 if None not in (expected_hashval, hashname):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200330 with open(filename, 'rb') as f:
331 hashval = hashlib.new(hashname)
332 hashval.update(f.read())
333
334 if hashval.hexdigest() != expected_hashval:
335 raise HashDoesNotMatch("got %s instead of %s"
336 % (hashval.hexdigest(), expected_hashval))
337
338 def __repr__(self):
339 if self.release is None:
340 return "<? ? %s>" % self.dist_type
341
342 return "<%s %s %s>" % (
343 self.release.name, self.release.version, self.dist_type or "")
344
345
346class ReleasesList(IndexReference):
347 """A container of Release.
348
349 Provides useful methods and facilities to sort and filter releases.
350 """
351 def __init__(self, name, releases=None, contains_hidden=False, index=None):
352 self.set_index(index)
353 self.releases = []
354 self.name = name
355 self.contains_hidden = contains_hidden
356 if releases:
357 self.add_releases(releases)
358
359 def fetch_releases(self):
360 self._index.get_releases(self.name)
361 return self.releases
362
363 def filter(self, predicate):
364 """Filter and return a subset of releases matching the given predicate.
365 """
366 return ReleasesList(self.name, [release for release in self.releases
367 if predicate.match(release.version)],
368 index=self._index)
369
370 def get_last(self, requirements, prefer_final=None):
371 """Return the "last" release, that satisfy the given predicates.
372
373 "last" is defined by the version number of the releases, you also could
374 set prefer_final parameter to True or False to change the order results
375 """
376 predicate = get_version_predicate(requirements)
377 releases = self.filter(predicate)
378 if len(releases) == 0:
379 return None
380 releases.sort_releases(prefer_final, reverse=True)
381 return releases[0]
382
383 def add_releases(self, releases):
384 """Add releases in the release list.
385
386 :param: releases is a list of ReleaseInfo objects.
387 """
388 for r in releases:
389 self.add_release(release=r)
390
391 def add_release(self, version=None, dist_type='sdist', release=None,
392 **dist_args):
393 """Add a release to the list.
394
395 The release can be passed in the `release` parameter, and in this case,
396 it will be crawled to extract the useful informations if necessary, or
397 the release informations can be directly passed in the `version` and
398 `dist_type` arguments.
399
400 Other keywords arguments can be provided, and will be forwarded to the
401 distribution creation (eg. the arguments of the DistInfo constructor).
402 """
403 if release:
404 if release.name.lower() != self.name.lower():
405 raise ValueError("%s is not the same project as %s" %
406 (release.name, self.name))
407 version = str(release.version)
408
Éric Araujodf8ef022011-06-08 04:47:13 +0200409 if version not in self.get_versions():
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200410 # append only if not already exists
411 self.releases.append(release)
412 for dist in release.dists.values():
413 for url in dist.urls:
414 self.add_release(version, dist.dist_type, **url)
415 else:
416 matches = [r for r in self.releases
417 if str(r.version) == version and r.name == self.name]
418 if not matches:
419 release = ReleaseInfo(self.name, version, index=self._index)
420 self.releases.append(release)
421 else:
422 release = matches[0]
423
424 release.add_distribution(dist_type=dist_type, **dist_args)
425
426 def sort_releases(self, prefer_final=False, reverse=True, *args, **kwargs):
427 """Sort the results with the given properties.
428
429 The `prefer_final` argument can be used to specify if final
430 distributions (eg. not dev, bet or alpha) would be prefered or not.
431
432 Results can be inverted by using `reverse`.
433
434 Any other parameter provided will be forwarded to the sorted call. You
435 cannot redefine the key argument of "sorted" here, as it is used
436 internally to sort the releases.
437 """
438
439 sort_by = []
440 if prefer_final:
441 sort_by.append("is_final")
442 sort_by.append("version")
443
444 self.releases.sort(
445 key=lambda i: tuple(getattr(i, arg) for arg in sort_by),
446 reverse=reverse, *args, **kwargs)
447
448 def get_release(self, version):
449 """Return a release from its version."""
450 matches = [r for r in self.releases if str(r.version) == version]
451 if len(matches) != 1:
452 raise KeyError(version)
453 return matches[0]
454
455 def get_versions(self):
456 """Return a list of releases versions contained"""
457 return [str(r.version) for r in self.releases]
458
459 def __getitem__(self, key):
460 return self.releases[key]
461
462 def __len__(self):
463 return len(self.releases)
464
465 def __repr__(self):
466 string = 'Project "%s"' % self.name
467 if self.get_versions():
468 string += ' versions: %s' % ', '.join(self.get_versions())
469 return '<%s>' % string
470
471
472def get_infos_from_url(url, probable_dist_name=None, is_external=True):
473 """Get useful informations from an URL.
474
475 Return a dict of (name, version, url, hashtype, hash, is_external)
476
477 :param url: complete url of the distribution
478 :param probable_dist_name: A probable name of the project.
479 :param is_external: Tell if the url commes from an index or from
480 an external URL.
481 """
482 # if the url contains a md5 hash, get it.
483 md5_hash = None
484 match = MD5_HASH.match(url)
485 if match is not None:
486 md5_hash = match.group(1)
487 # remove the hash
488 url = url.replace("#md5=%s" % md5_hash, "")
489
490 # parse the archive name to find dist name and version
491 archive_name = urllib.parse.urlparse(url)[2].split('/')[-1]
492 extension_matched = False
493 # remove the extension from the name
494 for ext in EXTENSIONS:
495 if archive_name.endswith(ext):
496 archive_name = archive_name[:-len(ext)]
497 extension_matched = True
498
499 name, version = split_archive_name(archive_name)
500 if extension_matched is True:
501 return {'name': name,
502 'version': version,
503 'url': url,
504 'hashname': "md5",
505 'hashval': md5_hash,
506 'is_external': is_external,
507 'dist_type': 'sdist'}
508
509
510def split_archive_name(archive_name, probable_name=None):
511 """Split an archive name into two parts: name and version.
512
513 Return the tuple (name, version)
514 """
515 # Try to determine wich part is the name and wich is the version using the
516 # "-" separator. Take the larger part to be the version number then reduce
517 # if this not works.
518 def eager_split(str, maxsplit=2):
519 # split using the "-" separator
520 splits = str.rsplit("-", maxsplit)
521 name = splits[0]
522 version = "-".join(splits[1:])
523 if version.startswith("-"):
524 version = version[1:]
525 if suggest_normalized_version(version) is None and maxsplit >= 0:
526 # we dont get a good version number: recurse !
527 return eager_split(str, maxsplit - 1)
528 else:
529 return name, version
530 if probable_name is not None:
531 probable_name = probable_name.lower()
532 name = None
533 if probable_name is not None and probable_name in archive_name:
534 # we get the name from probable_name, if given.
535 name = probable_name
536 version = archive_name.lstrip(name)
537 else:
538 name, version = eager_split(archive_name)
539
540 version = suggest_normalized_version(version)
541 if version is not None and name != "":
542 return name.lower(), version
543 else:
544 raise CantParseArchiveName(archive_name)