blob: b606db6b0547b19e12db3ed4bea2e19863101c8d [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""PEP 376 implementation."""
2
3import io
4import os
5import re
6import csv
7import sys
8import zipimport
9from hashlib import md5
10from packaging import logger
11from packaging.errors import PackagingError
12from packaging.version import suggest_normalized_version, VersionPredicate
13from packaging.metadata import Metadata
14
15
16__all__ = [
17 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
18 'get_distributions', 'get_distribution', 'get_file_users',
19 'provides_distribution', 'obsoletes_distribution',
20 'enable_cache', 'disable_cache', 'clear_cache',
Tarek Ziade43f289a2011-05-30 11:07:54 +020021 'get_file_path', 'get_file']
Tarek Ziade1231a4e2011-05-19 13:07:25 +020022
23
24# TODO update docs
25
26DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
27
28# Cache
29_cache_name = {} # maps names to Distribution instances
30_cache_name_egg = {} # maps names to EggInfoDistribution instances
31_cache_path = {} # maps paths to Distribution instances
32_cache_path_egg = {} # maps paths to EggInfoDistribution instances
33_cache_generated = False # indicates if .dist-info distributions are cached
34_cache_generated_egg = False # indicates if .dist-info and .egg are cached
35_cache_enabled = True
36
37
38def enable_cache():
39 """
40 Enables the internal cache.
41
42 Note that this function will not clear the cache in any case, for that
43 functionality see :func:`clear_cache`.
44 """
45 global _cache_enabled
46
47 _cache_enabled = True
48
49
50def disable_cache():
51 """
52 Disables the internal cache.
53
54 Note that this function will not clear the cache in any case, for that
55 functionality see :func:`clear_cache`.
56 """
57 global _cache_enabled
58
59 _cache_enabled = False
60
61
62def clear_cache():
63 """ Clears the internal cache. """
Éric Araujofe958482011-07-15 17:47:46 +020064 global _cache_generated, _cache_generated_egg
Tarek Ziade1231a4e2011-05-19 13:07:25 +020065
Éric Araujofe958482011-07-15 17:47:46 +020066 _cache_name.clear()
67 _cache_name_egg.clear()
68 _cache_path.clear()
69 _cache_path_egg.clear()
Tarek Ziade1231a4e2011-05-19 13:07:25 +020070 _cache_generated = False
71 _cache_generated_egg = False
72
73
Éric Araujo6f677652011-06-16 23:43:15 +020074def _yield_distributions(include_dist, include_egg, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020075 """
76 Yield .dist-info and .egg(-info) distributions, based on the arguments
77
78 :parameter include_dist: yield .dist-info distributions
79 :parameter include_egg: yield .egg(-info) distributions
80 """
81 for path in paths:
82 realpath = os.path.realpath(path)
83 if not os.path.isdir(realpath):
84 continue
85 for dir in os.listdir(realpath):
86 dist_path = os.path.join(realpath, dir)
87 if include_dist and dir.endswith('.dist-info'):
88 yield Distribution(dist_path)
89 elif include_egg and (dir.endswith('.egg-info') or
90 dir.endswith('.egg')):
91 yield EggInfoDistribution(dist_path)
92
93
Éric Araujo6f677652011-06-16 23:43:15 +020094def _generate_cache(use_egg_info, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020095 global _cache_generated, _cache_generated_egg
96
97 if _cache_generated_egg or (_cache_generated and not use_egg_info):
98 return
99 else:
100 gen_dist = not _cache_generated
101 gen_egg = use_egg_info
102
103 for dist in _yield_distributions(gen_dist, gen_egg, paths):
104 if isinstance(dist, Distribution):
105 _cache_path[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200106 if dist.name not in _cache_name:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200107 _cache_name[dist.name] = []
108 _cache_name[dist.name].append(dist)
109 else:
110 _cache_path_egg[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200111 if dist.name not in _cache_name_egg:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200112 _cache_name_egg[dist.name] = []
113 _cache_name_egg[dist.name].append(dist)
114
115 if gen_dist:
116 _cache_generated = True
117 if gen_egg:
118 _cache_generated_egg = True
119
120
121class Distribution:
122 """Created with the *path* of the ``.dist-info`` directory provided to the
123 constructor. It reads the metadata contained in ``METADATA`` when it is
124 instantiated."""
125
126 name = ''
127 """The name of the distribution."""
128
129 version = ''
130 """The version of the distribution."""
131
132 metadata = None
133 """A :class:`packaging.metadata.Metadata` instance loaded with
134 the distribution's ``METADATA`` file."""
135
136 requested = False
137 """A boolean that indicates whether the ``REQUESTED`` metadata file is
138 present (in other words, whether the package was installed by user
139 request or it was installed as a dependency)."""
140
141 def __init__(self, path):
142 if _cache_enabled and path in _cache_path:
143 self.metadata = _cache_path[path].metadata
144 else:
145 metadata_path = os.path.join(path, 'METADATA')
146 self.metadata = Metadata(path=metadata_path)
147
148 self.name = self.metadata['Name']
149 self.version = self.metadata['Version']
150 self.path = path
151
Éric Araujodf8ef022011-06-08 04:47:13 +0200152 if _cache_enabled and path not in _cache_path:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200153 _cache_path[path] = self
154
155 def __repr__(self):
156 return '<Distribution %r %s at %r>' % (
157 self.name, self.version, self.path)
158
159 def _get_records(self, local=False):
Éric Araujo4468e552011-07-08 17:22:19 +0200160 results = []
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200161 with self.get_distinfo_file('RECORD') as record:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200162 record_reader = csv.reader(record, delimiter=',',
163 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200164 for row in record_reader:
Éric Araujo4468e552011-07-08 17:22:19 +0200165 missing = [None for i in range(len(row), 3)]
166 path, checksum, size = row + missing
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200167 if local:
168 path = path.replace('/', os.sep)
169 path = os.path.join(sys.prefix, path)
Éric Araujo4468e552011-07-08 17:22:19 +0200170 results.append((path, checksum, size))
171 return results
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200172
173 def get_resource_path(self, relative_path):
174 with self.get_distinfo_file('RESOURCES') as resources_file:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200175 resources_reader = csv.reader(resources_file, delimiter=',',
176 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200177 for relative, destination in resources_reader:
178 if relative == relative_path:
179 return destination
180 raise KeyError(
181 'no resource file with relative path %r is installed' %
182 relative_path)
183
184 def list_installed_files(self, local=False):
185 """
186 Iterates over the ``RECORD`` entries and returns a tuple
187 ``(path, md5, size)`` for each line. If *local* is ``True``,
188 the returned path is transformed into a local absolute path.
189 Otherwise the raw value from RECORD is returned.
190
191 A local absolute path is an absolute path in which occurrences of
192 ``'/'`` have been replaced by the system separator given by ``os.sep``.
193
Éric Araujob931ab42011-08-19 00:53:20 +0200194 :parameter local: flag to say if the path should be returned as a local
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200195 absolute path
196
197 :type local: boolean
198 :returns: iterator of (path, md5, size)
199 """
Éric Araujo4468e552011-07-08 17:22:19 +0200200 for result in self._get_records(local):
201 yield result
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200202
203 def uses(self, path):
204 """
205 Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
206 absolute path or a relative ``'/'``-separated path.
207
208 :rtype: boolean
209 """
210 for p, checksum, size in self._get_records():
211 local_absolute = os.path.join(sys.prefix, p)
212 if path == p or path == local_absolute:
213 return True
214 return False
215
216 def get_distinfo_file(self, path, binary=False):
217 """
218 Returns a file located under the ``.dist-info`` directory. Returns a
219 ``file`` instance for the file pointed by *path*.
220
221 :parameter path: a ``'/'``-separated path relative to the
222 ``.dist-info`` directory or an absolute path;
223 If *path* is an absolute path and doesn't start
224 with the ``.dist-info`` directory path,
225 a :class:`PackagingError` is raised
226 :type path: string
227 :parameter binary: If *binary* is ``True``, opens the file in read-only
228 binary mode (``rb``), otherwise opens it in
229 read-only mode (``r``).
230 :rtype: file object
231 """
232 open_flags = 'r'
233 if binary:
234 open_flags += 'b'
235
236 # Check if it is an absolute path # XXX use relpath, add tests
237 if path.find(os.sep) >= 0:
238 # it's an absolute path?
239 distinfo_dirname, path = path.split(os.sep)[-2:]
240 if distinfo_dirname != self.path.split(os.sep)[-1]:
241 raise PackagingError(
242 'dist-info file %r does not belong to the %r %s '
243 'distribution' % (path, self.name, self.version))
244
245 # The file must be relative
246 if path not in DIST_FILES:
247 raise PackagingError('invalid path for a dist-info file: %r' %
248 path)
249
250 path = os.path.join(self.path, path)
251 return open(path, open_flags)
252
253 def list_distinfo_files(self, local=False):
254 """
255 Iterates over the ``RECORD`` entries and returns paths for each line if
256 the path is pointing to a file located in the ``.dist-info`` directory
257 or one of its subdirectories.
258
259 :parameter local: If *local* is ``True``, each returned path is
260 transformed into a local absolute path. Otherwise the
261 raw value from ``RECORD`` is returned.
262 :type local: boolean
263 :returns: iterator of paths
264 """
265 for path, checksum, size in self._get_records(local):
Éric Araujo37ccd6f2011-09-15 18:18:51 +0200266 # XXX add separator or use real relpath algo
267 if path.startswith(self.path):
268 yield path
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200269
270 def __eq__(self, other):
271 return isinstance(other, Distribution) and self.path == other.path
272
273 # See http://docs.python.org/reference/datamodel#object.__hash__
274 __hash__ = object.__hash__
275
276
277class EggInfoDistribution:
278 """Created with the *path* of the ``.egg-info`` directory or file provided
279 to the constructor. It reads the metadata contained in the file itself, or
280 if the given path happens to be a directory, the metadata is read from the
281 file ``PKG-INFO`` under that directory."""
282
283 name = ''
284 """The name of the distribution."""
285
286 version = ''
287 """The version of the distribution."""
288
289 metadata = None
290 """A :class:`packaging.metadata.Metadata` instance loaded with
291 the distribution's ``METADATA`` file."""
292
293 _REQUIREMENT = re.compile(
294 r'(?P<name>[-A-Za-z0-9_.]+)\s*'
295 r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
296 r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
297 r'(?P<extras>\[.*\])?')
298
299 def __init__(self, path):
300 self.path = path
301 if _cache_enabled and path in _cache_path_egg:
302 self.metadata = _cache_path_egg[path].metadata
303 self.name = self.metadata['Name']
304 self.version = self.metadata['Version']
305 return
306
307 # reused from Distribute's pkg_resources
308 def yield_lines(strs):
309 """Yield non-empty/non-comment lines of a ``basestring``
310 or sequence"""
311 if isinstance(strs, str):
312 for s in strs.splitlines():
313 s = s.strip()
314 # skip blank lines/comments
315 if s and not s.startswith('#'):
316 yield s
317 else:
318 for ss in strs:
319 for s in yield_lines(ss):
320 yield s
321
322 requires = None
323
324 if path.endswith('.egg'):
325 if os.path.isdir(path):
326 meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
327 self.metadata = Metadata(path=meta_path)
328 try:
329 req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
330 with open(req_path, 'r') as fp:
331 requires = fp.read()
332 except IOError:
333 requires = None
334 else:
335 # FIXME handle the case where zipfile is not available
336 zipf = zipimport.zipimporter(path)
337 fileobj = io.StringIO(
338 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
339 self.metadata = Metadata(fileobj=fileobj)
340 try:
341 requires = zipf.get_data('EGG-INFO/requires.txt')
342 except IOError:
343 requires = None
344 self.name = self.metadata['Name']
345 self.version = self.metadata['Version']
346
347 elif path.endswith('.egg-info'):
348 if os.path.isdir(path):
349 path = os.path.join(path, 'PKG-INFO')
350 try:
351 with open(os.path.join(path, 'requires.txt'), 'r') as fp:
352 requires = fp.read()
353 except IOError:
354 requires = None
355 self.metadata = Metadata(path=path)
Éric Araujobab50cb2011-07-29 02:37:21 +0200356 self.name = self.metadata['Name']
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200357 self.version = self.metadata['Version']
358
359 else:
360 raise ValueError('path must end with .egg-info or .egg, got %r' %
361 path)
362
363 if requires is not None:
364 if self.metadata['Metadata-Version'] == '1.1':
365 # we can't have 1.1 metadata *and* Setuptools requires
366 for field in ('Obsoletes', 'Requires', 'Provides'):
367 del self.metadata[field]
368
369 reqs = []
370
371 if requires is not None:
372 for line in yield_lines(requires):
373 if line.startswith('['):
374 logger.warning(
375 'extensions in requires.txt are not supported '
376 '(used by %r %s)', self.name, self.version)
377 break
378 else:
379 match = self._REQUIREMENT.match(line.strip())
380 if not match:
381 # this happens when we encounter extras; since they
382 # are written at the end of the file we just exit
383 break
384 else:
385 if match.group('extras'):
386 msg = ('extra requirements are not supported '
387 '(used by %r %s)', self.name, self.version)
388 logger.warning(msg, self.name)
389 name = match.group('name')
390 version = None
391 if match.group('first'):
392 version = match.group('first')
393 if match.group('rest'):
394 version += match.group('rest')
395 version = version.replace(' ', '') # trim spaces
396 if version is None:
397 reqs.append(name)
398 else:
399 reqs.append('%s (%s)' % (name, version))
400
401 if len(reqs) > 0:
402 self.metadata['Requires-Dist'] += reqs
403
404 if _cache_enabled:
405 _cache_path_egg[self.path] = self
406
407 def __repr__(self):
408 return '<EggInfoDistribution %r %s at %r>' % (
409 self.name, self.version, self.path)
410
411 def list_installed_files(self, local=False):
412
413 def _md5(path):
414 with open(path, 'rb') as f:
415 content = f.read()
416 return md5(content).hexdigest()
417
418 def _size(path):
419 return os.stat(path).st_size
420
421 path = self.path
422 if local:
423 path = path.replace('/', os.sep)
424
425 # XXX What about scripts and data files ?
426 if os.path.isfile(path):
427 return [(path, _md5(path), _size(path))]
428 else:
429 files = []
430 for root, dir, files_ in os.walk(path):
431 for item in files_:
432 item = os.path.join(root, item)
433 files.append((item, _md5(item), _size(item)))
434 return files
435
436 return []
437
438 def uses(self, path):
439 return False
440
441 def __eq__(self, other):
442 return (isinstance(other, EggInfoDistribution) and
443 self.path == other.path)
444
445 # See http://docs.python.org/reference/datamodel#object.__hash__
446 __hash__ = object.__hash__
447
448
449def distinfo_dirname(name, version):
450 """
451 The *name* and *version* parameters are converted into their
452 filename-escaped form, i.e. any ``'-'`` characters are replaced
453 with ``'_'`` other than the one in ``'dist-info'`` and the one
454 separating the name from the version number.
455
456 :parameter name: is converted to a standard distribution name by replacing
457 any runs of non- alphanumeric characters with a single
458 ``'-'``.
459 :type name: string
460 :parameter version: is converted to a standard version string. Spaces
461 become dots, and all other non-alphanumeric characters
462 (except dots) become dashes, with runs of multiple
463 dashes condensed to a single dash.
464 :type version: string
465 :returns: directory name
466 :rtype: string"""
467 file_extension = '.dist-info'
468 name = name.replace('-', '_')
469 normalized_version = suggest_normalized_version(version)
470 # Because this is a lookup procedure, something will be returned even if
471 # it is a version that cannot be normalized
472 if normalized_version is None:
473 # Unable to achieve normality?
474 normalized_version = version
475 return '-'.join([name, normalized_version]) + file_extension
476
477
Éric Araujo6f677652011-06-16 23:43:15 +0200478def get_distributions(use_egg_info=False, paths=None):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200479 """
480 Provides an iterator that looks for ``.dist-info`` directories in
481 ``sys.path`` and returns :class:`Distribution` instances for each one of
482 them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
483 files and directores are iterated as well.
484
485 :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
486 instances
487 """
Éric Araujo6f677652011-06-16 23:43:15 +0200488 if paths is None:
489 paths = sys.path
490
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200491 if not _cache_enabled:
492 for dist in _yield_distributions(True, use_egg_info, paths):
493 yield dist
494 else:
495 _generate_cache(use_egg_info, paths)
496
497 for dist in _cache_path.values():
498 yield dist
499
500 if use_egg_info:
501 for dist in _cache_path_egg.values():
502 yield dist
503
504
505def get_distribution(name, use_egg_info=False, paths=None):
506 """
507 Scans all elements in ``sys.path`` and looks for all directories
508 ending with ``.dist-info``. Returns a :class:`Distribution`
509 corresponding to the ``.dist-info`` directory that contains the
510 ``METADATA`` that matches *name* for the *name* metadata field.
511 If no distribution exists with the given *name* and the parameter
512 *use_egg_info* is set to ``True``, then all files and directories ending
513 with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
514 returned if one is found that has metadata that matches *name* for the
515 *name* metadata field.
516
517 This function only returns the first result found, as no more than one
518 value is expected. If the directory is not found, ``None`` is returned.
519
520 :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
521 """
Éric Araujo6f677652011-06-16 23:43:15 +0200522 if paths is None:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200523 paths = sys.path
524
525 if not _cache_enabled:
526 for dist in _yield_distributions(True, use_egg_info, paths):
527 if dist.name == name:
528 return dist
529 else:
530 _generate_cache(use_egg_info, paths)
531
532 if name in _cache_name:
533 return _cache_name[name][0]
534 elif use_egg_info and name in _cache_name_egg:
535 return _cache_name_egg[name][0]
536 else:
537 return None
538
539
540def obsoletes_distribution(name, version=None, use_egg_info=False):
541 """
542 Iterates over all distributions to find which distributions obsolete
543 *name*.
544
545 If a *version* is provided, it will be used to filter the results.
546 If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
547 distributions will be considered as well.
548
549 :type name: string
550 :type version: string
551 :parameter name:
552 """
553 for dist in get_distributions(use_egg_info):
554 obsoleted = (dist.metadata['Obsoletes-Dist'] +
555 dist.metadata['Obsoletes'])
556 for obs in obsoleted:
557 o_components = obs.split(' ', 1)
558 if len(o_components) == 1 or version is None:
559 if name == o_components[0]:
560 yield dist
561 break
562 else:
563 try:
564 predicate = VersionPredicate(obs)
565 except ValueError:
566 raise PackagingError(
567 'distribution %r has ill-formed obsoletes field: '
568 '%r' % (dist.name, obs))
569 if name == o_components[0] and predicate.match(version):
570 yield dist
571 break
572
573
574def provides_distribution(name, version=None, use_egg_info=False):
575 """
576 Iterates over all distributions to find which distributions provide *name*.
577 If a *version* is provided, it will be used to filter the results. Scans
578 all elements in ``sys.path`` and looks for all directories ending with
579 ``.dist-info``. Returns a :class:`Distribution` corresponding to the
580 ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
581 for the name metadata. If the argument *use_egg_info* is set to ``True``,
582 then all files and directories ending with ``.egg-info`` are considered
583 as well and returns an :class:`EggInfoDistribution` instance.
584
585 This function only returns the first result found, since no more than
586 one values are expected. If the directory is not found, returns ``None``.
587
588 :parameter version: a version specifier that indicates the version
589 required, conforming to the format in ``PEP-345``
590
591 :type name: string
592 :type version: string
593 """
594 predicate = None
595 if not version is None:
596 try:
597 predicate = VersionPredicate(name + ' (' + version + ')')
598 except ValueError:
599 raise PackagingError('invalid name or version: %r, %r' %
600 (name, version))
601
602 for dist in get_distributions(use_egg_info):
603 provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
604
605 for p in provided:
606 p_components = p.rsplit(' ', 1)
607 if len(p_components) == 1 or predicate is None:
608 if name == p_components[0]:
609 yield dist
610 break
611 else:
612 p_name, p_ver = p_components
613 if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
614 raise PackagingError(
615 'distribution %r has invalid Provides field: %r' %
616 (dist.name, p))
617 p_ver = p_ver[1:-1] # trim off the parenthesis
618 if p_name == name and predicate.match(p_ver):
619 yield dist
620 break
621
622
623def get_file_users(path):
624 """
625 Iterates over all distributions to find out which distributions use
626 *path*.
627
628 :parameter path: can be a local absolute path or a relative
629 ``'/'``-separated path.
630 :type path: string
631 :rtype: iterator of :class:`Distribution` instances
632 """
633 for dist in get_distributions():
634 if dist.uses(path):
635 yield dist
Tarek Ziadea17d8882011-05-30 10:57:44 +0200636
637
638def get_file_path(distribution_name, relative_path):
639 """Return the path to a resource file."""
640 dist = get_distribution(distribution_name)
Éric Araujo6f677652011-06-16 23:43:15 +0200641 if dist is not None:
Tarek Ziadea17d8882011-05-30 10:57:44 +0200642 return dist.get_resource_path(relative_path)
643 raise LookupError('no distribution named %r found' % distribution_name)
644
645
646def get_file(distribution_name, relative_path, *args, **kwargs):
647 """Open and return a resource file."""
648 return open(get_file_path(distribution_name, relative_path),
649 *args, **kwargs)