blob: c71d608491a91934616c7b60f122fc9bc45e2cd7 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""PEP 376 implementation."""
2
3import io
4import os
5import re
6import csv
7import sys
8import zipimport
9from hashlib import md5
10from packaging import logger
11from packaging.errors import PackagingError
12from packaging.version import suggest_normalized_version, VersionPredicate
13from packaging.metadata import Metadata
14
15
16__all__ = [
17 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
18 'get_distributions', 'get_distribution', 'get_file_users',
19 'provides_distribution', 'obsoletes_distribution',
20 'enable_cache', 'disable_cache', 'clear_cache',
Tarek Ziade43f289a2011-05-30 11:07:54 +020021 'get_file_path', 'get_file']
Tarek Ziade1231a4e2011-05-19 13:07:25 +020022
23
24# TODO update docs
25
26DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
27
28# Cache
29_cache_name = {} # maps names to Distribution instances
30_cache_name_egg = {} # maps names to EggInfoDistribution instances
31_cache_path = {} # maps paths to Distribution instances
32_cache_path_egg = {} # maps paths to EggInfoDistribution instances
33_cache_generated = False # indicates if .dist-info distributions are cached
34_cache_generated_egg = False # indicates if .dist-info and .egg are cached
35_cache_enabled = True
36
37
38def enable_cache():
39 """
40 Enables the internal cache.
41
42 Note that this function will not clear the cache in any case, for that
43 functionality see :func:`clear_cache`.
44 """
45 global _cache_enabled
46
47 _cache_enabled = True
48
49
50def disable_cache():
51 """
52 Disables the internal cache.
53
54 Note that this function will not clear the cache in any case, for that
55 functionality see :func:`clear_cache`.
56 """
57 global _cache_enabled
58
59 _cache_enabled = False
60
61
62def clear_cache():
63 """ Clears the internal cache. """
64 global _cache_name, _cache_name_egg, _cache_path, _cache_path_egg, \
65 _cache_generated, _cache_generated_egg
66
67 _cache_name = {}
68 _cache_name_egg = {}
69 _cache_path = {}
70 _cache_path_egg = {}
71 _cache_generated = False
72 _cache_generated_egg = False
73
74
Éric Araujo6f677652011-06-16 23:43:15 +020075def _yield_distributions(include_dist, include_egg, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020076 """
77 Yield .dist-info and .egg(-info) distributions, based on the arguments
78
79 :parameter include_dist: yield .dist-info distributions
80 :parameter include_egg: yield .egg(-info) distributions
81 """
82 for path in paths:
83 realpath = os.path.realpath(path)
84 if not os.path.isdir(realpath):
85 continue
86 for dir in os.listdir(realpath):
87 dist_path = os.path.join(realpath, dir)
88 if include_dist and dir.endswith('.dist-info'):
89 yield Distribution(dist_path)
90 elif include_egg and (dir.endswith('.egg-info') or
91 dir.endswith('.egg')):
92 yield EggInfoDistribution(dist_path)
93
94
Éric Araujo6f677652011-06-16 23:43:15 +020095def _generate_cache(use_egg_info, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020096 global _cache_generated, _cache_generated_egg
97
98 if _cache_generated_egg or (_cache_generated and not use_egg_info):
99 return
100 else:
101 gen_dist = not _cache_generated
102 gen_egg = use_egg_info
103
104 for dist in _yield_distributions(gen_dist, gen_egg, paths):
105 if isinstance(dist, Distribution):
106 _cache_path[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200107 if dist.name not in _cache_name:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200108 _cache_name[dist.name] = []
109 _cache_name[dist.name].append(dist)
110 else:
111 _cache_path_egg[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200112 if dist.name not in _cache_name_egg:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200113 _cache_name_egg[dist.name] = []
114 _cache_name_egg[dist.name].append(dist)
115
116 if gen_dist:
117 _cache_generated = True
118 if gen_egg:
119 _cache_generated_egg = True
120
121
122class Distribution:
123 """Created with the *path* of the ``.dist-info`` directory provided to the
124 constructor. It reads the metadata contained in ``METADATA`` when it is
125 instantiated."""
126
127 name = ''
128 """The name of the distribution."""
129
130 version = ''
131 """The version of the distribution."""
132
133 metadata = None
134 """A :class:`packaging.metadata.Metadata` instance loaded with
135 the distribution's ``METADATA`` file."""
136
137 requested = False
138 """A boolean that indicates whether the ``REQUESTED`` metadata file is
139 present (in other words, whether the package was installed by user
140 request or it was installed as a dependency)."""
141
142 def __init__(self, path):
143 if _cache_enabled and path in _cache_path:
144 self.metadata = _cache_path[path].metadata
145 else:
146 metadata_path = os.path.join(path, 'METADATA')
147 self.metadata = Metadata(path=metadata_path)
148
149 self.name = self.metadata['Name']
150 self.version = self.metadata['Version']
151 self.path = path
152
Éric Araujodf8ef022011-06-08 04:47:13 +0200153 if _cache_enabled and path not in _cache_path:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200154 _cache_path[path] = self
155
156 def __repr__(self):
157 return '<Distribution %r %s at %r>' % (
158 self.name, self.version, self.path)
159
160 def _get_records(self, local=False):
Éric Araujo4468e552011-07-08 17:22:19 +0200161 results = []
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200162 with self.get_distinfo_file('RECORD') as record:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200163 record_reader = csv.reader(record, delimiter=',',
164 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200165 for row in record_reader:
Éric Araujo4468e552011-07-08 17:22:19 +0200166 missing = [None for i in range(len(row), 3)]
167 path, checksum, size = row + missing
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200168 if local:
169 path = path.replace('/', os.sep)
170 path = os.path.join(sys.prefix, path)
Éric Araujo4468e552011-07-08 17:22:19 +0200171 results.append((path, checksum, size))
172 return results
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200173
174 def get_resource_path(self, relative_path):
175 with self.get_distinfo_file('RESOURCES') as resources_file:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200176 resources_reader = csv.reader(resources_file, delimiter=',',
177 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200178 for relative, destination in resources_reader:
179 if relative == relative_path:
180 return destination
181 raise KeyError(
182 'no resource file with relative path %r is installed' %
183 relative_path)
184
185 def list_installed_files(self, local=False):
186 """
187 Iterates over the ``RECORD`` entries and returns a tuple
188 ``(path, md5, size)`` for each line. If *local* is ``True``,
189 the returned path is transformed into a local absolute path.
190 Otherwise the raw value from RECORD is returned.
191
192 A local absolute path is an absolute path in which occurrences of
193 ``'/'`` have been replaced by the system separator given by ``os.sep``.
194
195 :parameter local: flag to say if the path should be returned a local
196 absolute path
197
198 :type local: boolean
199 :returns: iterator of (path, md5, size)
200 """
Éric Araujo4468e552011-07-08 17:22:19 +0200201 for result in self._get_records(local):
202 yield result
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200203
204 def uses(self, path):
205 """
206 Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
207 absolute path or a relative ``'/'``-separated path.
208
209 :rtype: boolean
210 """
211 for p, checksum, size in self._get_records():
212 local_absolute = os.path.join(sys.prefix, p)
213 if path == p or path == local_absolute:
214 return True
215 return False
216
217 def get_distinfo_file(self, path, binary=False):
218 """
219 Returns a file located under the ``.dist-info`` directory. Returns a
220 ``file`` instance for the file pointed by *path*.
221
222 :parameter path: a ``'/'``-separated path relative to the
223 ``.dist-info`` directory or an absolute path;
224 If *path* is an absolute path and doesn't start
225 with the ``.dist-info`` directory path,
226 a :class:`PackagingError` is raised
227 :type path: string
228 :parameter binary: If *binary* is ``True``, opens the file in read-only
229 binary mode (``rb``), otherwise opens it in
230 read-only mode (``r``).
231 :rtype: file object
232 """
233 open_flags = 'r'
234 if binary:
235 open_flags += 'b'
236
237 # Check if it is an absolute path # XXX use relpath, add tests
238 if path.find(os.sep) >= 0:
239 # it's an absolute path?
240 distinfo_dirname, path = path.split(os.sep)[-2:]
241 if distinfo_dirname != self.path.split(os.sep)[-1]:
242 raise PackagingError(
243 'dist-info file %r does not belong to the %r %s '
244 'distribution' % (path, self.name, self.version))
245
246 # The file must be relative
247 if path not in DIST_FILES:
248 raise PackagingError('invalid path for a dist-info file: %r' %
249 path)
250
251 path = os.path.join(self.path, path)
252 return open(path, open_flags)
253
254 def list_distinfo_files(self, local=False):
255 """
256 Iterates over the ``RECORD`` entries and returns paths for each line if
257 the path is pointing to a file located in the ``.dist-info`` directory
258 or one of its subdirectories.
259
260 :parameter local: If *local* is ``True``, each returned path is
261 transformed into a local absolute path. Otherwise the
262 raw value from ``RECORD`` is returned.
263 :type local: boolean
264 :returns: iterator of paths
265 """
266 for path, checksum, size in self._get_records(local):
267 yield path
268
269 def __eq__(self, other):
270 return isinstance(other, Distribution) and self.path == other.path
271
272 # See http://docs.python.org/reference/datamodel#object.__hash__
273 __hash__ = object.__hash__
274
275
276class EggInfoDistribution:
277 """Created with the *path* of the ``.egg-info`` directory or file provided
278 to the constructor. It reads the metadata contained in the file itself, or
279 if the given path happens to be a directory, the metadata is read from the
280 file ``PKG-INFO`` under that directory."""
281
282 name = ''
283 """The name of the distribution."""
284
285 version = ''
286 """The version of the distribution."""
287
288 metadata = None
289 """A :class:`packaging.metadata.Metadata` instance loaded with
290 the distribution's ``METADATA`` file."""
291
292 _REQUIREMENT = re.compile(
293 r'(?P<name>[-A-Za-z0-9_.]+)\s*'
294 r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
295 r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
296 r'(?P<extras>\[.*\])?')
297
298 def __init__(self, path):
299 self.path = path
300 if _cache_enabled and path in _cache_path_egg:
301 self.metadata = _cache_path_egg[path].metadata
302 self.name = self.metadata['Name']
303 self.version = self.metadata['Version']
304 return
305
306 # reused from Distribute's pkg_resources
307 def yield_lines(strs):
308 """Yield non-empty/non-comment lines of a ``basestring``
309 or sequence"""
310 if isinstance(strs, str):
311 for s in strs.splitlines():
312 s = s.strip()
313 # skip blank lines/comments
314 if s and not s.startswith('#'):
315 yield s
316 else:
317 for ss in strs:
318 for s in yield_lines(ss):
319 yield s
320
321 requires = None
322
323 if path.endswith('.egg'):
324 if os.path.isdir(path):
325 meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
326 self.metadata = Metadata(path=meta_path)
327 try:
328 req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
329 with open(req_path, 'r') as fp:
330 requires = fp.read()
331 except IOError:
332 requires = None
333 else:
334 # FIXME handle the case where zipfile is not available
335 zipf = zipimport.zipimporter(path)
336 fileobj = io.StringIO(
337 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
338 self.metadata = Metadata(fileobj=fileobj)
339 try:
340 requires = zipf.get_data('EGG-INFO/requires.txt')
341 except IOError:
342 requires = None
343 self.name = self.metadata['Name']
344 self.version = self.metadata['Version']
345
346 elif path.endswith('.egg-info'):
347 if os.path.isdir(path):
348 path = os.path.join(path, 'PKG-INFO')
349 try:
350 with open(os.path.join(path, 'requires.txt'), 'r') as fp:
351 requires = fp.read()
352 except IOError:
353 requires = None
354 self.metadata = Metadata(path=path)
355 self.name = self.metadata['name']
356 self.version = self.metadata['Version']
357
358 else:
359 raise ValueError('path must end with .egg-info or .egg, got %r' %
360 path)
361
362 if requires is not None:
363 if self.metadata['Metadata-Version'] == '1.1':
364 # we can't have 1.1 metadata *and* Setuptools requires
365 for field in ('Obsoletes', 'Requires', 'Provides'):
366 del self.metadata[field]
367
368 reqs = []
369
370 if requires is not None:
371 for line in yield_lines(requires):
372 if line.startswith('['):
373 logger.warning(
374 'extensions in requires.txt are not supported '
375 '(used by %r %s)', self.name, self.version)
376 break
377 else:
378 match = self._REQUIREMENT.match(line.strip())
379 if not match:
380 # this happens when we encounter extras; since they
381 # are written at the end of the file we just exit
382 break
383 else:
384 if match.group('extras'):
385 msg = ('extra requirements are not supported '
386 '(used by %r %s)', self.name, self.version)
387 logger.warning(msg, self.name)
388 name = match.group('name')
389 version = None
390 if match.group('first'):
391 version = match.group('first')
392 if match.group('rest'):
393 version += match.group('rest')
394 version = version.replace(' ', '') # trim spaces
395 if version is None:
396 reqs.append(name)
397 else:
398 reqs.append('%s (%s)' % (name, version))
399
400 if len(reqs) > 0:
401 self.metadata['Requires-Dist'] += reqs
402
403 if _cache_enabled:
404 _cache_path_egg[self.path] = self
405
406 def __repr__(self):
407 return '<EggInfoDistribution %r %s at %r>' % (
408 self.name, self.version, self.path)
409
410 def list_installed_files(self, local=False):
411
412 def _md5(path):
413 with open(path, 'rb') as f:
414 content = f.read()
415 return md5(content).hexdigest()
416
417 def _size(path):
418 return os.stat(path).st_size
419
420 path = self.path
421 if local:
422 path = path.replace('/', os.sep)
423
424 # XXX What about scripts and data files ?
425 if os.path.isfile(path):
426 return [(path, _md5(path), _size(path))]
427 else:
428 files = []
429 for root, dir, files_ in os.walk(path):
430 for item in files_:
431 item = os.path.join(root, item)
432 files.append((item, _md5(item), _size(item)))
433 return files
434
435 return []
436
437 def uses(self, path):
438 return False
439
440 def __eq__(self, other):
441 return (isinstance(other, EggInfoDistribution) and
442 self.path == other.path)
443
444 # See http://docs.python.org/reference/datamodel#object.__hash__
445 __hash__ = object.__hash__
446
447
448def distinfo_dirname(name, version):
449 """
450 The *name* and *version* parameters are converted into their
451 filename-escaped form, i.e. any ``'-'`` characters are replaced
452 with ``'_'`` other than the one in ``'dist-info'`` and the one
453 separating the name from the version number.
454
455 :parameter name: is converted to a standard distribution name by replacing
456 any runs of non- alphanumeric characters with a single
457 ``'-'``.
458 :type name: string
459 :parameter version: is converted to a standard version string. Spaces
460 become dots, and all other non-alphanumeric characters
461 (except dots) become dashes, with runs of multiple
462 dashes condensed to a single dash.
463 :type version: string
464 :returns: directory name
465 :rtype: string"""
466 file_extension = '.dist-info'
467 name = name.replace('-', '_')
468 normalized_version = suggest_normalized_version(version)
469 # Because this is a lookup procedure, something will be returned even if
470 # it is a version that cannot be normalized
471 if normalized_version is None:
472 # Unable to achieve normality?
473 normalized_version = version
474 return '-'.join([name, normalized_version]) + file_extension
475
476
Éric Araujo6f677652011-06-16 23:43:15 +0200477def get_distributions(use_egg_info=False, paths=None):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200478 """
479 Provides an iterator that looks for ``.dist-info`` directories in
480 ``sys.path`` and returns :class:`Distribution` instances for each one of
481 them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
482 files and directores are iterated as well.
483
484 :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
485 instances
486 """
Éric Araujo6f677652011-06-16 23:43:15 +0200487 if paths is None:
488 paths = sys.path
489
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200490 if not _cache_enabled:
491 for dist in _yield_distributions(True, use_egg_info, paths):
492 yield dist
493 else:
494 _generate_cache(use_egg_info, paths)
495
496 for dist in _cache_path.values():
497 yield dist
498
499 if use_egg_info:
500 for dist in _cache_path_egg.values():
501 yield dist
502
503
504def get_distribution(name, use_egg_info=False, paths=None):
505 """
506 Scans all elements in ``sys.path`` and looks for all directories
507 ending with ``.dist-info``. Returns a :class:`Distribution`
508 corresponding to the ``.dist-info`` directory that contains the
509 ``METADATA`` that matches *name* for the *name* metadata field.
510 If no distribution exists with the given *name* and the parameter
511 *use_egg_info* is set to ``True``, then all files and directories ending
512 with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
513 returned if one is found that has metadata that matches *name* for the
514 *name* metadata field.
515
516 This function only returns the first result found, as no more than one
517 value is expected. If the directory is not found, ``None`` is returned.
518
519 :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
520 """
Éric Araujo6f677652011-06-16 23:43:15 +0200521 if paths is None:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200522 paths = sys.path
523
524 if not _cache_enabled:
525 for dist in _yield_distributions(True, use_egg_info, paths):
526 if dist.name == name:
527 return dist
528 else:
529 _generate_cache(use_egg_info, paths)
530
531 if name in _cache_name:
532 return _cache_name[name][0]
533 elif use_egg_info and name in _cache_name_egg:
534 return _cache_name_egg[name][0]
535 else:
536 return None
537
538
539def obsoletes_distribution(name, version=None, use_egg_info=False):
540 """
541 Iterates over all distributions to find which distributions obsolete
542 *name*.
543
544 If a *version* is provided, it will be used to filter the results.
545 If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
546 distributions will be considered as well.
547
548 :type name: string
549 :type version: string
550 :parameter name:
551 """
552 for dist in get_distributions(use_egg_info):
553 obsoleted = (dist.metadata['Obsoletes-Dist'] +
554 dist.metadata['Obsoletes'])
555 for obs in obsoleted:
556 o_components = obs.split(' ', 1)
557 if len(o_components) == 1 or version is None:
558 if name == o_components[0]:
559 yield dist
560 break
561 else:
562 try:
563 predicate = VersionPredicate(obs)
564 except ValueError:
565 raise PackagingError(
566 'distribution %r has ill-formed obsoletes field: '
567 '%r' % (dist.name, obs))
568 if name == o_components[0] and predicate.match(version):
569 yield dist
570 break
571
572
573def provides_distribution(name, version=None, use_egg_info=False):
574 """
575 Iterates over all distributions to find which distributions provide *name*.
576 If a *version* is provided, it will be used to filter the results. Scans
577 all elements in ``sys.path`` and looks for all directories ending with
578 ``.dist-info``. Returns a :class:`Distribution` corresponding to the
579 ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
580 for the name metadata. If the argument *use_egg_info* is set to ``True``,
581 then all files and directories ending with ``.egg-info`` are considered
582 as well and returns an :class:`EggInfoDistribution` instance.
583
584 This function only returns the first result found, since no more than
585 one values are expected. If the directory is not found, returns ``None``.
586
587 :parameter version: a version specifier that indicates the version
588 required, conforming to the format in ``PEP-345``
589
590 :type name: string
591 :type version: string
592 """
593 predicate = None
594 if not version is None:
595 try:
596 predicate = VersionPredicate(name + ' (' + version + ')')
597 except ValueError:
598 raise PackagingError('invalid name or version: %r, %r' %
599 (name, version))
600
601 for dist in get_distributions(use_egg_info):
602 provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
603
604 for p in provided:
605 p_components = p.rsplit(' ', 1)
606 if len(p_components) == 1 or predicate is None:
607 if name == p_components[0]:
608 yield dist
609 break
610 else:
611 p_name, p_ver = p_components
612 if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
613 raise PackagingError(
614 'distribution %r has invalid Provides field: %r' %
615 (dist.name, p))
616 p_ver = p_ver[1:-1] # trim off the parenthesis
617 if p_name == name and predicate.match(p_ver):
618 yield dist
619 break
620
621
622def get_file_users(path):
623 """
624 Iterates over all distributions to find out which distributions use
625 *path*.
626
627 :parameter path: can be a local absolute path or a relative
628 ``'/'``-separated path.
629 :type path: string
630 :rtype: iterator of :class:`Distribution` instances
631 """
632 for dist in get_distributions():
633 if dist.uses(path):
634 yield dist
Tarek Ziadea17d8882011-05-30 10:57:44 +0200635
636
637def get_file_path(distribution_name, relative_path):
638 """Return the path to a resource file."""
639 dist = get_distribution(distribution_name)
Éric Araujo6f677652011-06-16 23:43:15 +0200640 if dist is not None:
Tarek Ziadea17d8882011-05-30 10:57:44 +0200641 return dist.get_resource_path(relative_path)
642 raise LookupError('no distribution named %r found' % distribution_name)
643
644
645def get_file(distribution_name, relative_path, *args, **kwargs):
646 """Open and return a resource file."""
647 return open(get_file_path(distribution_name, relative_path),
648 *args, **kwargs)