blob: 67946a249b744ed4aa75d244e63babcbef32c946 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""PEP 376 implementation."""
2
3import io
4import os
5import re
6import csv
7import sys
8import zipimport
9from hashlib import md5
10from packaging import logger
11from packaging.errors import PackagingError
12from packaging.version import suggest_normalized_version, VersionPredicate
13from packaging.metadata import Metadata
14
15
16__all__ = [
17 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
18 'get_distributions', 'get_distribution', 'get_file_users',
19 'provides_distribution', 'obsoletes_distribution',
20 'enable_cache', 'disable_cache', 'clear_cache',
Tarek Ziade43f289a2011-05-30 11:07:54 +020021 'get_file_path', 'get_file']
Tarek Ziade1231a4e2011-05-19 13:07:25 +020022
23
24# TODO update docs
25
26DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
27
28# Cache
29_cache_name = {} # maps names to Distribution instances
30_cache_name_egg = {} # maps names to EggInfoDistribution instances
31_cache_path = {} # maps paths to Distribution instances
32_cache_path_egg = {} # maps paths to EggInfoDistribution instances
33_cache_generated = False # indicates if .dist-info distributions are cached
34_cache_generated_egg = False # indicates if .dist-info and .egg are cached
35_cache_enabled = True
36
37
38def enable_cache():
39 """
40 Enables the internal cache.
41
42 Note that this function will not clear the cache in any case, for that
43 functionality see :func:`clear_cache`.
44 """
45 global _cache_enabled
46
47 _cache_enabled = True
48
49
50def disable_cache():
51 """
52 Disables the internal cache.
53
54 Note that this function will not clear the cache in any case, for that
55 functionality see :func:`clear_cache`.
56 """
57 global _cache_enabled
58
59 _cache_enabled = False
60
61
62def clear_cache():
63 """ Clears the internal cache. """
64 global _cache_name, _cache_name_egg, _cache_path, _cache_path_egg, \
65 _cache_generated, _cache_generated_egg
66
67 _cache_name = {}
68 _cache_name_egg = {}
69 _cache_path = {}
70 _cache_path_egg = {}
71 _cache_generated = False
72 _cache_generated_egg = False
73
74
Éric Araujo6f677652011-06-16 23:43:15 +020075def _yield_distributions(include_dist, include_egg, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020076 """
77 Yield .dist-info and .egg(-info) distributions, based on the arguments
78
79 :parameter include_dist: yield .dist-info distributions
80 :parameter include_egg: yield .egg(-info) distributions
81 """
82 for path in paths:
83 realpath = os.path.realpath(path)
84 if not os.path.isdir(realpath):
85 continue
86 for dir in os.listdir(realpath):
87 dist_path = os.path.join(realpath, dir)
88 if include_dist and dir.endswith('.dist-info'):
89 yield Distribution(dist_path)
90 elif include_egg and (dir.endswith('.egg-info') or
91 dir.endswith('.egg')):
92 yield EggInfoDistribution(dist_path)
93
94
Éric Araujo6f677652011-06-16 23:43:15 +020095def _generate_cache(use_egg_info, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020096 global _cache_generated, _cache_generated_egg
97
98 if _cache_generated_egg or (_cache_generated and not use_egg_info):
99 return
100 else:
101 gen_dist = not _cache_generated
102 gen_egg = use_egg_info
103
104 for dist in _yield_distributions(gen_dist, gen_egg, paths):
105 if isinstance(dist, Distribution):
106 _cache_path[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200107 if dist.name not in _cache_name:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200108 _cache_name[dist.name] = []
109 _cache_name[dist.name].append(dist)
110 else:
111 _cache_path_egg[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200112 if dist.name not in _cache_name_egg:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200113 _cache_name_egg[dist.name] = []
114 _cache_name_egg[dist.name].append(dist)
115
116 if gen_dist:
117 _cache_generated = True
118 if gen_egg:
119 _cache_generated_egg = True
120
121
122class Distribution:
123 """Created with the *path* of the ``.dist-info`` directory provided to the
124 constructor. It reads the metadata contained in ``METADATA`` when it is
125 instantiated."""
126
127 name = ''
128 """The name of the distribution."""
129
130 version = ''
131 """The version of the distribution."""
132
133 metadata = None
134 """A :class:`packaging.metadata.Metadata` instance loaded with
135 the distribution's ``METADATA`` file."""
136
137 requested = False
138 """A boolean that indicates whether the ``REQUESTED`` metadata file is
139 present (in other words, whether the package was installed by user
140 request or it was installed as a dependency)."""
141
142 def __init__(self, path):
143 if _cache_enabled and path in _cache_path:
144 self.metadata = _cache_path[path].metadata
145 else:
146 metadata_path = os.path.join(path, 'METADATA')
147 self.metadata = Metadata(path=metadata_path)
148
149 self.name = self.metadata['Name']
150 self.version = self.metadata['Version']
151 self.path = path
152
Éric Araujodf8ef022011-06-08 04:47:13 +0200153 if _cache_enabled and path not in _cache_path:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200154 _cache_path[path] = self
155
156 def __repr__(self):
157 return '<Distribution %r %s at %r>' % (
158 self.name, self.version, self.path)
159
160 def _get_records(self, local=False):
161 with self.get_distinfo_file('RECORD') as record:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200162 record_reader = csv.reader(record, delimiter=',',
163 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200164 # XXX needs an explaining comment
165 for row in record_reader:
166 path, checksum, size = (row[:] +
167 [None for i in range(len(row), 3)])
168 if local:
169 path = path.replace('/', os.sep)
170 path = os.path.join(sys.prefix, path)
171 yield path, checksum, size
172
173 def get_resource_path(self, relative_path):
174 with self.get_distinfo_file('RESOURCES') as resources_file:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200175 resources_reader = csv.reader(resources_file, delimiter=',',
176 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200177 for relative, destination in resources_reader:
178 if relative == relative_path:
179 return destination
180 raise KeyError(
181 'no resource file with relative path %r is installed' %
182 relative_path)
183
184 def list_installed_files(self, local=False):
185 """
186 Iterates over the ``RECORD`` entries and returns a tuple
187 ``(path, md5, size)`` for each line. If *local* is ``True``,
188 the returned path is transformed into a local absolute path.
189 Otherwise the raw value from RECORD is returned.
190
191 A local absolute path is an absolute path in which occurrences of
192 ``'/'`` have been replaced by the system separator given by ``os.sep``.
193
194 :parameter local: flag to say if the path should be returned a local
195 absolute path
196
197 :type local: boolean
198 :returns: iterator of (path, md5, size)
199 """
200 return self._get_records(local)
201
202 def uses(self, path):
203 """
204 Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
205 absolute path or a relative ``'/'``-separated path.
206
207 :rtype: boolean
208 """
209 for p, checksum, size in self._get_records():
210 local_absolute = os.path.join(sys.prefix, p)
211 if path == p or path == local_absolute:
212 return True
213 return False
214
215 def get_distinfo_file(self, path, binary=False):
216 """
217 Returns a file located under the ``.dist-info`` directory. Returns a
218 ``file`` instance for the file pointed by *path*.
219
220 :parameter path: a ``'/'``-separated path relative to the
221 ``.dist-info`` directory or an absolute path;
222 If *path* is an absolute path and doesn't start
223 with the ``.dist-info`` directory path,
224 a :class:`PackagingError` is raised
225 :type path: string
226 :parameter binary: If *binary* is ``True``, opens the file in read-only
227 binary mode (``rb``), otherwise opens it in
228 read-only mode (``r``).
229 :rtype: file object
230 """
231 open_flags = 'r'
232 if binary:
233 open_flags += 'b'
234
235 # Check if it is an absolute path # XXX use relpath, add tests
236 if path.find(os.sep) >= 0:
237 # it's an absolute path?
238 distinfo_dirname, path = path.split(os.sep)[-2:]
239 if distinfo_dirname != self.path.split(os.sep)[-1]:
240 raise PackagingError(
241 'dist-info file %r does not belong to the %r %s '
242 'distribution' % (path, self.name, self.version))
243
244 # The file must be relative
245 if path not in DIST_FILES:
246 raise PackagingError('invalid path for a dist-info file: %r' %
247 path)
248
249 path = os.path.join(self.path, path)
250 return open(path, open_flags)
251
252 def list_distinfo_files(self, local=False):
253 """
254 Iterates over the ``RECORD`` entries and returns paths for each line if
255 the path is pointing to a file located in the ``.dist-info`` directory
256 or one of its subdirectories.
257
258 :parameter local: If *local* is ``True``, each returned path is
259 transformed into a local absolute path. Otherwise the
260 raw value from ``RECORD`` is returned.
261 :type local: boolean
262 :returns: iterator of paths
263 """
264 for path, checksum, size in self._get_records(local):
265 yield path
266
267 def __eq__(self, other):
268 return isinstance(other, Distribution) and self.path == other.path
269
270 # See http://docs.python.org/reference/datamodel#object.__hash__
271 __hash__ = object.__hash__
272
273
274class EggInfoDistribution:
275 """Created with the *path* of the ``.egg-info`` directory or file provided
276 to the constructor. It reads the metadata contained in the file itself, or
277 if the given path happens to be a directory, the metadata is read from the
278 file ``PKG-INFO`` under that directory."""
279
280 name = ''
281 """The name of the distribution."""
282
283 version = ''
284 """The version of the distribution."""
285
286 metadata = None
287 """A :class:`packaging.metadata.Metadata` instance loaded with
288 the distribution's ``METADATA`` file."""
289
290 _REQUIREMENT = re.compile(
291 r'(?P<name>[-A-Za-z0-9_.]+)\s*'
292 r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
293 r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
294 r'(?P<extras>\[.*\])?')
295
296 def __init__(self, path):
297 self.path = path
298 if _cache_enabled and path in _cache_path_egg:
299 self.metadata = _cache_path_egg[path].metadata
300 self.name = self.metadata['Name']
301 self.version = self.metadata['Version']
302 return
303
304 # reused from Distribute's pkg_resources
305 def yield_lines(strs):
306 """Yield non-empty/non-comment lines of a ``basestring``
307 or sequence"""
308 if isinstance(strs, str):
309 for s in strs.splitlines():
310 s = s.strip()
311 # skip blank lines/comments
312 if s and not s.startswith('#'):
313 yield s
314 else:
315 for ss in strs:
316 for s in yield_lines(ss):
317 yield s
318
319 requires = None
320
321 if path.endswith('.egg'):
322 if os.path.isdir(path):
323 meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
324 self.metadata = Metadata(path=meta_path)
325 try:
326 req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
327 with open(req_path, 'r') as fp:
328 requires = fp.read()
329 except IOError:
330 requires = None
331 else:
332 # FIXME handle the case where zipfile is not available
333 zipf = zipimport.zipimporter(path)
334 fileobj = io.StringIO(
335 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
336 self.metadata = Metadata(fileobj=fileobj)
337 try:
338 requires = zipf.get_data('EGG-INFO/requires.txt')
339 except IOError:
340 requires = None
341 self.name = self.metadata['Name']
342 self.version = self.metadata['Version']
343
344 elif path.endswith('.egg-info'):
345 if os.path.isdir(path):
346 path = os.path.join(path, 'PKG-INFO')
347 try:
348 with open(os.path.join(path, 'requires.txt'), 'r') as fp:
349 requires = fp.read()
350 except IOError:
351 requires = None
352 self.metadata = Metadata(path=path)
353 self.name = self.metadata['name']
354 self.version = self.metadata['Version']
355
356 else:
357 raise ValueError('path must end with .egg-info or .egg, got %r' %
358 path)
359
360 if requires is not None:
361 if self.metadata['Metadata-Version'] == '1.1':
362 # we can't have 1.1 metadata *and* Setuptools requires
363 for field in ('Obsoletes', 'Requires', 'Provides'):
364 del self.metadata[field]
365
366 reqs = []
367
368 if requires is not None:
369 for line in yield_lines(requires):
370 if line.startswith('['):
371 logger.warning(
372 'extensions in requires.txt are not supported '
373 '(used by %r %s)', self.name, self.version)
374 break
375 else:
376 match = self._REQUIREMENT.match(line.strip())
377 if not match:
378 # this happens when we encounter extras; since they
379 # are written at the end of the file we just exit
380 break
381 else:
382 if match.group('extras'):
383 msg = ('extra requirements are not supported '
384 '(used by %r %s)', self.name, self.version)
385 logger.warning(msg, self.name)
386 name = match.group('name')
387 version = None
388 if match.group('first'):
389 version = match.group('first')
390 if match.group('rest'):
391 version += match.group('rest')
392 version = version.replace(' ', '') # trim spaces
393 if version is None:
394 reqs.append(name)
395 else:
396 reqs.append('%s (%s)' % (name, version))
397
398 if len(reqs) > 0:
399 self.metadata['Requires-Dist'] += reqs
400
401 if _cache_enabled:
402 _cache_path_egg[self.path] = self
403
404 def __repr__(self):
405 return '<EggInfoDistribution %r %s at %r>' % (
406 self.name, self.version, self.path)
407
408 def list_installed_files(self, local=False):
409
410 def _md5(path):
411 with open(path, 'rb') as f:
412 content = f.read()
413 return md5(content).hexdigest()
414
415 def _size(path):
416 return os.stat(path).st_size
417
418 path = self.path
419 if local:
420 path = path.replace('/', os.sep)
421
422 # XXX What about scripts and data files ?
423 if os.path.isfile(path):
424 return [(path, _md5(path), _size(path))]
425 else:
426 files = []
427 for root, dir, files_ in os.walk(path):
428 for item in files_:
429 item = os.path.join(root, item)
430 files.append((item, _md5(item), _size(item)))
431 return files
432
433 return []
434
435 def uses(self, path):
436 return False
437
438 def __eq__(self, other):
439 return (isinstance(other, EggInfoDistribution) and
440 self.path == other.path)
441
442 # See http://docs.python.org/reference/datamodel#object.__hash__
443 __hash__ = object.__hash__
444
445
446def distinfo_dirname(name, version):
447 """
448 The *name* and *version* parameters are converted into their
449 filename-escaped form, i.e. any ``'-'`` characters are replaced
450 with ``'_'`` other than the one in ``'dist-info'`` and the one
451 separating the name from the version number.
452
453 :parameter name: is converted to a standard distribution name by replacing
454 any runs of non- alphanumeric characters with a single
455 ``'-'``.
456 :type name: string
457 :parameter version: is converted to a standard version string. Spaces
458 become dots, and all other non-alphanumeric characters
459 (except dots) become dashes, with runs of multiple
460 dashes condensed to a single dash.
461 :type version: string
462 :returns: directory name
463 :rtype: string"""
464 file_extension = '.dist-info'
465 name = name.replace('-', '_')
466 normalized_version = suggest_normalized_version(version)
467 # Because this is a lookup procedure, something will be returned even if
468 # it is a version that cannot be normalized
469 if normalized_version is None:
470 # Unable to achieve normality?
471 normalized_version = version
472 return '-'.join([name, normalized_version]) + file_extension
473
474
Éric Araujo6f677652011-06-16 23:43:15 +0200475def get_distributions(use_egg_info=False, paths=None):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200476 """
477 Provides an iterator that looks for ``.dist-info`` directories in
478 ``sys.path`` and returns :class:`Distribution` instances for each one of
479 them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
480 files and directores are iterated as well.
481
482 :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
483 instances
484 """
Éric Araujo6f677652011-06-16 23:43:15 +0200485 if paths is None:
486 paths = sys.path
487
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200488 if not _cache_enabled:
489 for dist in _yield_distributions(True, use_egg_info, paths):
490 yield dist
491 else:
492 _generate_cache(use_egg_info, paths)
493
494 for dist in _cache_path.values():
495 yield dist
496
497 if use_egg_info:
498 for dist in _cache_path_egg.values():
499 yield dist
500
501
502def get_distribution(name, use_egg_info=False, paths=None):
503 """
504 Scans all elements in ``sys.path`` and looks for all directories
505 ending with ``.dist-info``. Returns a :class:`Distribution`
506 corresponding to the ``.dist-info`` directory that contains the
507 ``METADATA`` that matches *name* for the *name* metadata field.
508 If no distribution exists with the given *name* and the parameter
509 *use_egg_info* is set to ``True``, then all files and directories ending
510 with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
511 returned if one is found that has metadata that matches *name* for the
512 *name* metadata field.
513
514 This function only returns the first result found, as no more than one
515 value is expected. If the directory is not found, ``None`` is returned.
516
517 :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
518 """
Éric Araujo6f677652011-06-16 23:43:15 +0200519 if paths is None:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200520 paths = sys.path
521
522 if not _cache_enabled:
523 for dist in _yield_distributions(True, use_egg_info, paths):
524 if dist.name == name:
525 return dist
526 else:
527 _generate_cache(use_egg_info, paths)
528
529 if name in _cache_name:
530 return _cache_name[name][0]
531 elif use_egg_info and name in _cache_name_egg:
532 return _cache_name_egg[name][0]
533 else:
534 return None
535
536
537def obsoletes_distribution(name, version=None, use_egg_info=False):
538 """
539 Iterates over all distributions to find which distributions obsolete
540 *name*.
541
542 If a *version* is provided, it will be used to filter the results.
543 If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
544 distributions will be considered as well.
545
546 :type name: string
547 :type version: string
548 :parameter name:
549 """
550 for dist in get_distributions(use_egg_info):
551 obsoleted = (dist.metadata['Obsoletes-Dist'] +
552 dist.metadata['Obsoletes'])
553 for obs in obsoleted:
554 o_components = obs.split(' ', 1)
555 if len(o_components) == 1 or version is None:
556 if name == o_components[0]:
557 yield dist
558 break
559 else:
560 try:
561 predicate = VersionPredicate(obs)
562 except ValueError:
563 raise PackagingError(
564 'distribution %r has ill-formed obsoletes field: '
565 '%r' % (dist.name, obs))
566 if name == o_components[0] and predicate.match(version):
567 yield dist
568 break
569
570
571def provides_distribution(name, version=None, use_egg_info=False):
572 """
573 Iterates over all distributions to find which distributions provide *name*.
574 If a *version* is provided, it will be used to filter the results. Scans
575 all elements in ``sys.path`` and looks for all directories ending with
576 ``.dist-info``. Returns a :class:`Distribution` corresponding to the
577 ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
578 for the name metadata. If the argument *use_egg_info* is set to ``True``,
579 then all files and directories ending with ``.egg-info`` are considered
580 as well and returns an :class:`EggInfoDistribution` instance.
581
582 This function only returns the first result found, since no more than
583 one values are expected. If the directory is not found, returns ``None``.
584
585 :parameter version: a version specifier that indicates the version
586 required, conforming to the format in ``PEP-345``
587
588 :type name: string
589 :type version: string
590 """
591 predicate = None
592 if not version is None:
593 try:
594 predicate = VersionPredicate(name + ' (' + version + ')')
595 except ValueError:
596 raise PackagingError('invalid name or version: %r, %r' %
597 (name, version))
598
599 for dist in get_distributions(use_egg_info):
600 provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
601
602 for p in provided:
603 p_components = p.rsplit(' ', 1)
604 if len(p_components) == 1 or predicate is None:
605 if name == p_components[0]:
606 yield dist
607 break
608 else:
609 p_name, p_ver = p_components
610 if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
611 raise PackagingError(
612 'distribution %r has invalid Provides field: %r' %
613 (dist.name, p))
614 p_ver = p_ver[1:-1] # trim off the parenthesis
615 if p_name == name and predicate.match(p_ver):
616 yield dist
617 break
618
619
620def get_file_users(path):
621 """
622 Iterates over all distributions to find out which distributions use
623 *path*.
624
625 :parameter path: can be a local absolute path or a relative
626 ``'/'``-separated path.
627 :type path: string
628 :rtype: iterator of :class:`Distribution` instances
629 """
630 for dist in get_distributions():
631 if dist.uses(path):
632 yield dist
Tarek Ziadea17d8882011-05-30 10:57:44 +0200633
634
635def get_file_path(distribution_name, relative_path):
636 """Return the path to a resource file."""
637 dist = get_distribution(distribution_name)
Éric Araujo6f677652011-06-16 23:43:15 +0200638 if dist is not None:
Tarek Ziadea17d8882011-05-30 10:57:44 +0200639 return dist.get_resource_path(relative_path)
640 raise LookupError('no distribution named %r found' % distribution_name)
641
642
643def get_file(distribution_name, relative_path, *args, **kwargs):
644 """Open and return a resource file."""
645 return open(get_file_path(distribution_name, relative_path),
646 *args, **kwargs)