blob: b2fcb97c37b243d41598a5e37d246b6ef99f2c00 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""PEP 376 implementation."""
2
Tarek Ziade1231a4e2011-05-19 13:07:25 +02003import os
4import re
5import csv
6import sys
7import zipimport
Éric Araujo229011d2011-09-18 20:11:48 +02008from io import StringIO
Tarek Ziade1231a4e2011-05-19 13:07:25 +02009from hashlib import md5
Éric Araujo229011d2011-09-18 20:11:48 +020010
Tarek Ziade1231a4e2011-05-19 13:07:25 +020011from packaging import logger
12from packaging.errors import PackagingError
13from packaging.version import suggest_normalized_version, VersionPredicate
14from packaging.metadata import Metadata
15
16
17__all__ = [
18 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
19 'get_distributions', 'get_distribution', 'get_file_users',
20 'provides_distribution', 'obsoletes_distribution',
21 'enable_cache', 'disable_cache', 'clear_cache',
Tarek Ziade43f289a2011-05-30 11:07:54 +020022 'get_file_path', 'get_file']
Tarek Ziade1231a4e2011-05-19 13:07:25 +020023
24
25# TODO update docs
26
27DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
28
29# Cache
30_cache_name = {} # maps names to Distribution instances
31_cache_name_egg = {} # maps names to EggInfoDistribution instances
32_cache_path = {} # maps paths to Distribution instances
33_cache_path_egg = {} # maps paths to EggInfoDistribution instances
34_cache_generated = False # indicates if .dist-info distributions are cached
35_cache_generated_egg = False # indicates if .dist-info and .egg are cached
36_cache_enabled = True
37
38
39def enable_cache():
40 """
41 Enables the internal cache.
42
43 Note that this function will not clear the cache in any case, for that
44 functionality see :func:`clear_cache`.
45 """
46 global _cache_enabled
47
48 _cache_enabled = True
49
50
51def disable_cache():
52 """
53 Disables the internal cache.
54
55 Note that this function will not clear the cache in any case, for that
56 functionality see :func:`clear_cache`.
57 """
58 global _cache_enabled
59
60 _cache_enabled = False
61
62
63def clear_cache():
64 """ Clears the internal cache. """
Éric Araujofe958482011-07-15 17:47:46 +020065 global _cache_generated, _cache_generated_egg
Tarek Ziade1231a4e2011-05-19 13:07:25 +020066
Éric Araujofe958482011-07-15 17:47:46 +020067 _cache_name.clear()
68 _cache_name_egg.clear()
69 _cache_path.clear()
70 _cache_path_egg.clear()
Tarek Ziade1231a4e2011-05-19 13:07:25 +020071 _cache_generated = False
72 _cache_generated_egg = False
73
74
Éric Araujo6f677652011-06-16 23:43:15 +020075def _yield_distributions(include_dist, include_egg, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020076 """
77 Yield .dist-info and .egg(-info) distributions, based on the arguments
78
79 :parameter include_dist: yield .dist-info distributions
80 :parameter include_egg: yield .egg(-info) distributions
81 """
82 for path in paths:
83 realpath = os.path.realpath(path)
84 if not os.path.isdir(realpath):
85 continue
86 for dir in os.listdir(realpath):
87 dist_path = os.path.join(realpath, dir)
88 if include_dist and dir.endswith('.dist-info'):
89 yield Distribution(dist_path)
90 elif include_egg and (dir.endswith('.egg-info') or
91 dir.endswith('.egg')):
92 yield EggInfoDistribution(dist_path)
93
94
Éric Araujo6f677652011-06-16 23:43:15 +020095def _generate_cache(use_egg_info, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020096 global _cache_generated, _cache_generated_egg
97
98 if _cache_generated_egg or (_cache_generated and not use_egg_info):
99 return
100 else:
101 gen_dist = not _cache_generated
102 gen_egg = use_egg_info
103
104 for dist in _yield_distributions(gen_dist, gen_egg, paths):
105 if isinstance(dist, Distribution):
106 _cache_path[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200107 if dist.name not in _cache_name:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200108 _cache_name[dist.name] = []
109 _cache_name[dist.name].append(dist)
110 else:
111 _cache_path_egg[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200112 if dist.name not in _cache_name_egg:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200113 _cache_name_egg[dist.name] = []
114 _cache_name_egg[dist.name].append(dist)
115
116 if gen_dist:
117 _cache_generated = True
118 if gen_egg:
119 _cache_generated_egg = True
120
121
122class Distribution:
123 """Created with the *path* of the ``.dist-info`` directory provided to the
124 constructor. It reads the metadata contained in ``METADATA`` when it is
125 instantiated."""
126
127 name = ''
128 """The name of the distribution."""
129
130 version = ''
131 """The version of the distribution."""
132
133 metadata = None
134 """A :class:`packaging.metadata.Metadata` instance loaded with
135 the distribution's ``METADATA`` file."""
136
137 requested = False
138 """A boolean that indicates whether the ``REQUESTED`` metadata file is
139 present (in other words, whether the package was installed by user
140 request or it was installed as a dependency)."""
141
142 def __init__(self, path):
143 if _cache_enabled and path in _cache_path:
144 self.metadata = _cache_path[path].metadata
145 else:
146 metadata_path = os.path.join(path, 'METADATA')
147 self.metadata = Metadata(path=metadata_path)
148
149 self.name = self.metadata['Name']
150 self.version = self.metadata['Version']
151 self.path = path
152
Éric Araujodf8ef022011-06-08 04:47:13 +0200153 if _cache_enabled and path not in _cache_path:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200154 _cache_path[path] = self
155
156 def __repr__(self):
157 return '<Distribution %r %s at %r>' % (
158 self.name, self.version, self.path)
159
160 def _get_records(self, local=False):
Éric Araujo4468e552011-07-08 17:22:19 +0200161 results = []
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200162 with self.get_distinfo_file('RECORD') as record:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200163 record_reader = csv.reader(record, delimiter=',',
164 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200165 for row in record_reader:
Éric Araujo4468e552011-07-08 17:22:19 +0200166 missing = [None for i in range(len(row), 3)]
167 path, checksum, size = row + missing
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200168 if local:
169 path = path.replace('/', os.sep)
170 path = os.path.join(sys.prefix, path)
Éric Araujo4468e552011-07-08 17:22:19 +0200171 results.append((path, checksum, size))
172 return results
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200173
174 def get_resource_path(self, relative_path):
175 with self.get_distinfo_file('RESOURCES') as resources_file:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200176 resources_reader = csv.reader(resources_file, delimiter=',',
Éric Araujo229011d2011-09-18 20:11:48 +0200177 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200178 for relative, destination in resources_reader:
179 if relative == relative_path:
180 return destination
181 raise KeyError(
182 'no resource file with relative path %r is installed' %
183 relative_path)
184
185 def list_installed_files(self, local=False):
186 """
187 Iterates over the ``RECORD`` entries and returns a tuple
188 ``(path, md5, size)`` for each line. If *local* is ``True``,
189 the returned path is transformed into a local absolute path.
190 Otherwise the raw value from RECORD is returned.
191
192 A local absolute path is an absolute path in which occurrences of
193 ``'/'`` have been replaced by the system separator given by ``os.sep``.
194
Éric Araujob931ab42011-08-19 00:53:20 +0200195 :parameter local: flag to say if the path should be returned as a local
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200196 absolute path
197
198 :type local: boolean
199 :returns: iterator of (path, md5, size)
200 """
Éric Araujo4468e552011-07-08 17:22:19 +0200201 for result in self._get_records(local):
202 yield result
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200203
204 def uses(self, path):
205 """
206 Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
207 absolute path or a relative ``'/'``-separated path.
208
209 :rtype: boolean
210 """
211 for p, checksum, size in self._get_records():
212 local_absolute = os.path.join(sys.prefix, p)
213 if path == p or path == local_absolute:
214 return True
215 return False
216
217 def get_distinfo_file(self, path, binary=False):
218 """
219 Returns a file located under the ``.dist-info`` directory. Returns a
220 ``file`` instance for the file pointed by *path*.
221
222 :parameter path: a ``'/'``-separated path relative to the
223 ``.dist-info`` directory or an absolute path;
224 If *path* is an absolute path and doesn't start
225 with the ``.dist-info`` directory path,
226 a :class:`PackagingError` is raised
227 :type path: string
228 :parameter binary: If *binary* is ``True``, opens the file in read-only
229 binary mode (``rb``), otherwise opens it in
230 read-only mode (``r``).
231 :rtype: file object
232 """
233 open_flags = 'r'
234 if binary:
235 open_flags += 'b'
236
237 # Check if it is an absolute path # XXX use relpath, add tests
238 if path.find(os.sep) >= 0:
239 # it's an absolute path?
240 distinfo_dirname, path = path.split(os.sep)[-2:]
241 if distinfo_dirname != self.path.split(os.sep)[-1]:
242 raise PackagingError(
243 'dist-info file %r does not belong to the %r %s '
244 'distribution' % (path, self.name, self.version))
245
246 # The file must be relative
247 if path not in DIST_FILES:
248 raise PackagingError('invalid path for a dist-info file: %r' %
249 path)
250
251 path = os.path.join(self.path, path)
252 return open(path, open_flags)
253
254 def list_distinfo_files(self, local=False):
255 """
256 Iterates over the ``RECORD`` entries and returns paths for each line if
257 the path is pointing to a file located in the ``.dist-info`` directory
258 or one of its subdirectories.
259
260 :parameter local: If *local* is ``True``, each returned path is
261 transformed into a local absolute path. Otherwise the
262 raw value from ``RECORD`` is returned.
263 :type local: boolean
264 :returns: iterator of paths
265 """
266 for path, checksum, size in self._get_records(local):
Éric Araujo37ccd6f2011-09-15 18:18:51 +0200267 # XXX add separator or use real relpath algo
268 if path.startswith(self.path):
269 yield path
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200270
271 def __eq__(self, other):
272 return isinstance(other, Distribution) and self.path == other.path
273
274 # See http://docs.python.org/reference/datamodel#object.__hash__
275 __hash__ = object.__hash__
276
277
278class EggInfoDistribution:
279 """Created with the *path* of the ``.egg-info`` directory or file provided
280 to the constructor. It reads the metadata contained in the file itself, or
281 if the given path happens to be a directory, the metadata is read from the
282 file ``PKG-INFO`` under that directory."""
283
284 name = ''
285 """The name of the distribution."""
286
287 version = ''
288 """The version of the distribution."""
289
290 metadata = None
291 """A :class:`packaging.metadata.Metadata` instance loaded with
292 the distribution's ``METADATA`` file."""
293
294 _REQUIREMENT = re.compile(
295 r'(?P<name>[-A-Za-z0-9_.]+)\s*'
296 r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
297 r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
298 r'(?P<extras>\[.*\])?')
299
300 def __init__(self, path):
301 self.path = path
302 if _cache_enabled and path in _cache_path_egg:
303 self.metadata = _cache_path_egg[path].metadata
304 self.name = self.metadata['Name']
305 self.version = self.metadata['Version']
306 return
307
308 # reused from Distribute's pkg_resources
309 def yield_lines(strs):
310 """Yield non-empty/non-comment lines of a ``basestring``
311 or sequence"""
312 if isinstance(strs, str):
313 for s in strs.splitlines():
314 s = s.strip()
315 # skip blank lines/comments
316 if s and not s.startswith('#'):
317 yield s
318 else:
319 for ss in strs:
320 for s in yield_lines(ss):
321 yield s
322
323 requires = None
324
325 if path.endswith('.egg'):
326 if os.path.isdir(path):
327 meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
328 self.metadata = Metadata(path=meta_path)
329 try:
330 req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
331 with open(req_path, 'r') as fp:
332 requires = fp.read()
333 except IOError:
334 requires = None
335 else:
336 # FIXME handle the case where zipfile is not available
337 zipf = zipimport.zipimporter(path)
Éric Araujo229011d2011-09-18 20:11:48 +0200338 fileobj = StringIO(
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200339 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
340 self.metadata = Metadata(fileobj=fileobj)
341 try:
342 requires = zipf.get_data('EGG-INFO/requires.txt')
343 except IOError:
344 requires = None
345 self.name = self.metadata['Name']
346 self.version = self.metadata['Version']
347
348 elif path.endswith('.egg-info'):
349 if os.path.isdir(path):
350 path = os.path.join(path, 'PKG-INFO')
351 try:
352 with open(os.path.join(path, 'requires.txt'), 'r') as fp:
353 requires = fp.read()
354 except IOError:
355 requires = None
356 self.metadata = Metadata(path=path)
Éric Araujobab50cb2011-07-29 02:37:21 +0200357 self.name = self.metadata['Name']
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200358 self.version = self.metadata['Version']
359
360 else:
361 raise ValueError('path must end with .egg-info or .egg, got %r' %
362 path)
363
364 if requires is not None:
365 if self.metadata['Metadata-Version'] == '1.1':
366 # we can't have 1.1 metadata *and* Setuptools requires
367 for field in ('Obsoletes', 'Requires', 'Provides'):
368 del self.metadata[field]
369
370 reqs = []
371
372 if requires is not None:
373 for line in yield_lines(requires):
374 if line.startswith('['):
375 logger.warning(
376 'extensions in requires.txt are not supported '
377 '(used by %r %s)', self.name, self.version)
378 break
379 else:
380 match = self._REQUIREMENT.match(line.strip())
381 if not match:
382 # this happens when we encounter extras; since they
383 # are written at the end of the file we just exit
384 break
385 else:
386 if match.group('extras'):
387 msg = ('extra requirements are not supported '
388 '(used by %r %s)', self.name, self.version)
389 logger.warning(msg, self.name)
390 name = match.group('name')
391 version = None
392 if match.group('first'):
393 version = match.group('first')
394 if match.group('rest'):
395 version += match.group('rest')
396 version = version.replace(' ', '') # trim spaces
397 if version is None:
398 reqs.append(name)
399 else:
400 reqs.append('%s (%s)' % (name, version))
401
402 if len(reqs) > 0:
403 self.metadata['Requires-Dist'] += reqs
404
405 if _cache_enabled:
406 _cache_path_egg[self.path] = self
407
408 def __repr__(self):
409 return '<EggInfoDistribution %r %s at %r>' % (
410 self.name, self.version, self.path)
411
412 def list_installed_files(self, local=False):
413
414 def _md5(path):
415 with open(path, 'rb') as f:
416 content = f.read()
417 return md5(content).hexdigest()
418
419 def _size(path):
420 return os.stat(path).st_size
421
422 path = self.path
423 if local:
424 path = path.replace('/', os.sep)
425
426 # XXX What about scripts and data files ?
427 if os.path.isfile(path):
428 return [(path, _md5(path), _size(path))]
429 else:
430 files = []
431 for root, dir, files_ in os.walk(path):
432 for item in files_:
433 item = os.path.join(root, item)
434 files.append((item, _md5(item), _size(item)))
435 return files
436
437 return []
438
439 def uses(self, path):
440 return False
441
442 def __eq__(self, other):
443 return (isinstance(other, EggInfoDistribution) and
444 self.path == other.path)
445
446 # See http://docs.python.org/reference/datamodel#object.__hash__
447 __hash__ = object.__hash__
448
449
450def distinfo_dirname(name, version):
451 """
452 The *name* and *version* parameters are converted into their
453 filename-escaped form, i.e. any ``'-'`` characters are replaced
454 with ``'_'`` other than the one in ``'dist-info'`` and the one
455 separating the name from the version number.
456
457 :parameter name: is converted to a standard distribution name by replacing
458 any runs of non- alphanumeric characters with a single
459 ``'-'``.
460 :type name: string
461 :parameter version: is converted to a standard version string. Spaces
462 become dots, and all other non-alphanumeric characters
463 (except dots) become dashes, with runs of multiple
464 dashes condensed to a single dash.
465 :type version: string
466 :returns: directory name
467 :rtype: string"""
468 file_extension = '.dist-info'
469 name = name.replace('-', '_')
470 normalized_version = suggest_normalized_version(version)
471 # Because this is a lookup procedure, something will be returned even if
472 # it is a version that cannot be normalized
473 if normalized_version is None:
474 # Unable to achieve normality?
475 normalized_version = version
476 return '-'.join([name, normalized_version]) + file_extension
477
478
Éric Araujo6f677652011-06-16 23:43:15 +0200479def get_distributions(use_egg_info=False, paths=None):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200480 """
481 Provides an iterator that looks for ``.dist-info`` directories in
482 ``sys.path`` and returns :class:`Distribution` instances for each one of
483 them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
484 files and directores are iterated as well.
485
486 :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
487 instances
488 """
Éric Araujo6f677652011-06-16 23:43:15 +0200489 if paths is None:
490 paths = sys.path
491
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200492 if not _cache_enabled:
493 for dist in _yield_distributions(True, use_egg_info, paths):
494 yield dist
495 else:
496 _generate_cache(use_egg_info, paths)
497
498 for dist in _cache_path.values():
499 yield dist
500
501 if use_egg_info:
502 for dist in _cache_path_egg.values():
503 yield dist
504
505
506def get_distribution(name, use_egg_info=False, paths=None):
507 """
508 Scans all elements in ``sys.path`` and looks for all directories
509 ending with ``.dist-info``. Returns a :class:`Distribution`
510 corresponding to the ``.dist-info`` directory that contains the
511 ``METADATA`` that matches *name* for the *name* metadata field.
512 If no distribution exists with the given *name* and the parameter
513 *use_egg_info* is set to ``True``, then all files and directories ending
514 with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
515 returned if one is found that has metadata that matches *name* for the
516 *name* metadata field.
517
518 This function only returns the first result found, as no more than one
519 value is expected. If the directory is not found, ``None`` is returned.
520
521 :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
522 """
Éric Araujo6f677652011-06-16 23:43:15 +0200523 if paths is None:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200524 paths = sys.path
525
526 if not _cache_enabled:
527 for dist in _yield_distributions(True, use_egg_info, paths):
528 if dist.name == name:
529 return dist
530 else:
531 _generate_cache(use_egg_info, paths)
532
533 if name in _cache_name:
534 return _cache_name[name][0]
535 elif use_egg_info and name in _cache_name_egg:
536 return _cache_name_egg[name][0]
537 else:
538 return None
539
540
541def obsoletes_distribution(name, version=None, use_egg_info=False):
542 """
543 Iterates over all distributions to find which distributions obsolete
544 *name*.
545
546 If a *version* is provided, it will be used to filter the results.
547 If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
548 distributions will be considered as well.
549
550 :type name: string
551 :type version: string
552 :parameter name:
553 """
554 for dist in get_distributions(use_egg_info):
555 obsoleted = (dist.metadata['Obsoletes-Dist'] +
556 dist.metadata['Obsoletes'])
557 for obs in obsoleted:
558 o_components = obs.split(' ', 1)
559 if len(o_components) == 1 or version is None:
560 if name == o_components[0]:
561 yield dist
562 break
563 else:
564 try:
565 predicate = VersionPredicate(obs)
566 except ValueError:
567 raise PackagingError(
568 'distribution %r has ill-formed obsoletes field: '
569 '%r' % (dist.name, obs))
570 if name == o_components[0] and predicate.match(version):
571 yield dist
572 break
573
574
575def provides_distribution(name, version=None, use_egg_info=False):
576 """
577 Iterates over all distributions to find which distributions provide *name*.
578 If a *version* is provided, it will be used to filter the results. Scans
579 all elements in ``sys.path`` and looks for all directories ending with
580 ``.dist-info``. Returns a :class:`Distribution` corresponding to the
581 ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
582 for the name metadata. If the argument *use_egg_info* is set to ``True``,
583 then all files and directories ending with ``.egg-info`` are considered
584 as well and returns an :class:`EggInfoDistribution` instance.
585
586 This function only returns the first result found, since no more than
587 one values are expected. If the directory is not found, returns ``None``.
588
589 :parameter version: a version specifier that indicates the version
590 required, conforming to the format in ``PEP-345``
591
592 :type name: string
593 :type version: string
594 """
595 predicate = None
596 if not version is None:
597 try:
598 predicate = VersionPredicate(name + ' (' + version + ')')
599 except ValueError:
600 raise PackagingError('invalid name or version: %r, %r' %
601 (name, version))
602
603 for dist in get_distributions(use_egg_info):
604 provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
605
606 for p in provided:
607 p_components = p.rsplit(' ', 1)
608 if len(p_components) == 1 or predicate is None:
609 if name == p_components[0]:
610 yield dist
611 break
612 else:
613 p_name, p_ver = p_components
614 if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
615 raise PackagingError(
616 'distribution %r has invalid Provides field: %r' %
617 (dist.name, p))
618 p_ver = p_ver[1:-1] # trim off the parenthesis
619 if p_name == name and predicate.match(p_ver):
620 yield dist
621 break
622
623
624def get_file_users(path):
625 """
626 Iterates over all distributions to find out which distributions use
627 *path*.
628
629 :parameter path: can be a local absolute path or a relative
630 ``'/'``-separated path.
631 :type path: string
632 :rtype: iterator of :class:`Distribution` instances
633 """
634 for dist in get_distributions():
635 if dist.uses(path):
636 yield dist
Tarek Ziadea17d8882011-05-30 10:57:44 +0200637
638
639def get_file_path(distribution_name, relative_path):
640 """Return the path to a resource file."""
641 dist = get_distribution(distribution_name)
Éric Araujo6f677652011-06-16 23:43:15 +0200642 if dist is not None:
Tarek Ziadea17d8882011-05-30 10:57:44 +0200643 return dist.get_resource_path(relative_path)
644 raise LookupError('no distribution named %r found' % distribution_name)
645
646
647def get_file(distribution_name, relative_path, *args, **kwargs):
648 """Open and return a resource file."""
649 return open(get_file_path(distribution_name, relative_path),
650 *args, **kwargs)