blob: 0aaa0cdc91f37e797c6522ae24fd88fab738bac9 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""PEP 376 implementation."""
2
3import io
4import os
5import re
6import csv
7import sys
8import zipimport
9from hashlib import md5
10from packaging import logger
11from packaging.errors import PackagingError
12from packaging.version import suggest_normalized_version, VersionPredicate
13from packaging.metadata import Metadata
14
15
16__all__ = [
17 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
18 'get_distributions', 'get_distribution', 'get_file_users',
19 'provides_distribution', 'obsoletes_distribution',
20 'enable_cache', 'disable_cache', 'clear_cache',
Tarek Ziade43f289a2011-05-30 11:07:54 +020021 'get_file_path', 'get_file']
Tarek Ziade1231a4e2011-05-19 13:07:25 +020022
23
24# TODO update docs
25
26DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
27
28# Cache
29_cache_name = {} # maps names to Distribution instances
30_cache_name_egg = {} # maps names to EggInfoDistribution instances
31_cache_path = {} # maps paths to Distribution instances
32_cache_path_egg = {} # maps paths to EggInfoDistribution instances
33_cache_generated = False # indicates if .dist-info distributions are cached
34_cache_generated_egg = False # indicates if .dist-info and .egg are cached
35_cache_enabled = True
36
37
38def enable_cache():
39 """
40 Enables the internal cache.
41
42 Note that this function will not clear the cache in any case, for that
43 functionality see :func:`clear_cache`.
44 """
45 global _cache_enabled
46
47 _cache_enabled = True
48
49
50def disable_cache():
51 """
52 Disables the internal cache.
53
54 Note that this function will not clear the cache in any case, for that
55 functionality see :func:`clear_cache`.
56 """
57 global _cache_enabled
58
59 _cache_enabled = False
60
61
62def clear_cache():
63 """ Clears the internal cache. """
Éric Araujofe958482011-07-15 17:47:46 +020064 global _cache_generated, _cache_generated_egg
Tarek Ziade1231a4e2011-05-19 13:07:25 +020065
Éric Araujofe958482011-07-15 17:47:46 +020066 _cache_name.clear()
67 _cache_name_egg.clear()
68 _cache_path.clear()
69 _cache_path_egg.clear()
Tarek Ziade1231a4e2011-05-19 13:07:25 +020070 _cache_generated = False
71 _cache_generated_egg = False
72
73
Éric Araujo6f677652011-06-16 23:43:15 +020074def _yield_distributions(include_dist, include_egg, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020075 """
76 Yield .dist-info and .egg(-info) distributions, based on the arguments
77
78 :parameter include_dist: yield .dist-info distributions
79 :parameter include_egg: yield .egg(-info) distributions
80 """
81 for path in paths:
82 realpath = os.path.realpath(path)
83 if not os.path.isdir(realpath):
84 continue
85 for dir in os.listdir(realpath):
86 dist_path = os.path.join(realpath, dir)
87 if include_dist and dir.endswith('.dist-info'):
88 yield Distribution(dist_path)
89 elif include_egg and (dir.endswith('.egg-info') or
90 dir.endswith('.egg')):
91 yield EggInfoDistribution(dist_path)
92
93
Éric Araujo6f677652011-06-16 23:43:15 +020094def _generate_cache(use_egg_info, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020095 global _cache_generated, _cache_generated_egg
96
97 if _cache_generated_egg or (_cache_generated and not use_egg_info):
98 return
99 else:
100 gen_dist = not _cache_generated
101 gen_egg = use_egg_info
102
103 for dist in _yield_distributions(gen_dist, gen_egg, paths):
104 if isinstance(dist, Distribution):
105 _cache_path[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200106 if dist.name not in _cache_name:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200107 _cache_name[dist.name] = []
108 _cache_name[dist.name].append(dist)
109 else:
110 _cache_path_egg[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200111 if dist.name not in _cache_name_egg:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200112 _cache_name_egg[dist.name] = []
113 _cache_name_egg[dist.name].append(dist)
114
115 if gen_dist:
116 _cache_generated = True
117 if gen_egg:
118 _cache_generated_egg = True
119
120
121class Distribution:
122 """Created with the *path* of the ``.dist-info`` directory provided to the
123 constructor. It reads the metadata contained in ``METADATA`` when it is
124 instantiated."""
125
126 name = ''
127 """The name of the distribution."""
128
129 version = ''
130 """The version of the distribution."""
131
132 metadata = None
133 """A :class:`packaging.metadata.Metadata` instance loaded with
134 the distribution's ``METADATA`` file."""
135
136 requested = False
137 """A boolean that indicates whether the ``REQUESTED`` metadata file is
138 present (in other words, whether the package was installed by user
139 request or it was installed as a dependency)."""
140
141 def __init__(self, path):
142 if _cache_enabled and path in _cache_path:
143 self.metadata = _cache_path[path].metadata
144 else:
145 metadata_path = os.path.join(path, 'METADATA')
146 self.metadata = Metadata(path=metadata_path)
147
148 self.name = self.metadata['Name']
149 self.version = self.metadata['Version']
150 self.path = path
151
Éric Araujodf8ef022011-06-08 04:47:13 +0200152 if _cache_enabled and path not in _cache_path:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200153 _cache_path[path] = self
154
155 def __repr__(self):
156 return '<Distribution %r %s at %r>' % (
157 self.name, self.version, self.path)
158
159 def _get_records(self, local=False):
Éric Araujo4468e552011-07-08 17:22:19 +0200160 results = []
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200161 with self.get_distinfo_file('RECORD') as record:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200162 record_reader = csv.reader(record, delimiter=',',
163 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200164 for row in record_reader:
Éric Araujo4468e552011-07-08 17:22:19 +0200165 missing = [None for i in range(len(row), 3)]
166 path, checksum, size = row + missing
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200167 if local:
168 path = path.replace('/', os.sep)
169 path = os.path.join(sys.prefix, path)
Éric Araujo4468e552011-07-08 17:22:19 +0200170 results.append((path, checksum, size))
171 return results
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200172
173 def get_resource_path(self, relative_path):
174 with self.get_distinfo_file('RESOURCES') as resources_file:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200175 resources_reader = csv.reader(resources_file, delimiter=',',
176 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200177 for relative, destination in resources_reader:
178 if relative == relative_path:
179 return destination
180 raise KeyError(
181 'no resource file with relative path %r is installed' %
182 relative_path)
183
184 def list_installed_files(self, local=False):
185 """
186 Iterates over the ``RECORD`` entries and returns a tuple
187 ``(path, md5, size)`` for each line. If *local* is ``True``,
188 the returned path is transformed into a local absolute path.
189 Otherwise the raw value from RECORD is returned.
190
191 A local absolute path is an absolute path in which occurrences of
192 ``'/'`` have been replaced by the system separator given by ``os.sep``.
193
194 :parameter local: flag to say if the path should be returned a local
195 absolute path
196
197 :type local: boolean
198 :returns: iterator of (path, md5, size)
199 """
Éric Araujo4468e552011-07-08 17:22:19 +0200200 for result in self._get_records(local):
201 yield result
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200202
203 def uses(self, path):
204 """
205 Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
206 absolute path or a relative ``'/'``-separated path.
207
208 :rtype: boolean
209 """
210 for p, checksum, size in self._get_records():
211 local_absolute = os.path.join(sys.prefix, p)
212 if path == p or path == local_absolute:
213 return True
214 return False
215
216 def get_distinfo_file(self, path, binary=False):
217 """
218 Returns a file located under the ``.dist-info`` directory. Returns a
219 ``file`` instance for the file pointed by *path*.
220
221 :parameter path: a ``'/'``-separated path relative to the
222 ``.dist-info`` directory or an absolute path;
223 If *path* is an absolute path and doesn't start
224 with the ``.dist-info`` directory path,
225 a :class:`PackagingError` is raised
226 :type path: string
227 :parameter binary: If *binary* is ``True``, opens the file in read-only
228 binary mode (``rb``), otherwise opens it in
229 read-only mode (``r``).
230 :rtype: file object
231 """
232 open_flags = 'r'
233 if binary:
234 open_flags += 'b'
235
236 # Check if it is an absolute path # XXX use relpath, add tests
237 if path.find(os.sep) >= 0:
238 # it's an absolute path?
239 distinfo_dirname, path = path.split(os.sep)[-2:]
240 if distinfo_dirname != self.path.split(os.sep)[-1]:
241 raise PackagingError(
242 'dist-info file %r does not belong to the %r %s '
243 'distribution' % (path, self.name, self.version))
244
245 # The file must be relative
246 if path not in DIST_FILES:
247 raise PackagingError('invalid path for a dist-info file: %r' %
248 path)
249
250 path = os.path.join(self.path, path)
251 return open(path, open_flags)
252
253 def list_distinfo_files(self, local=False):
254 """
255 Iterates over the ``RECORD`` entries and returns paths for each line if
256 the path is pointing to a file located in the ``.dist-info`` directory
257 or one of its subdirectories.
258
259 :parameter local: If *local* is ``True``, each returned path is
260 transformed into a local absolute path. Otherwise the
261 raw value from ``RECORD`` is returned.
262 :type local: boolean
263 :returns: iterator of paths
264 """
265 for path, checksum, size in self._get_records(local):
266 yield path
267
268 def __eq__(self, other):
269 return isinstance(other, Distribution) and self.path == other.path
270
271 # See http://docs.python.org/reference/datamodel#object.__hash__
272 __hash__ = object.__hash__
273
274
275class EggInfoDistribution:
276 """Created with the *path* of the ``.egg-info`` directory or file provided
277 to the constructor. It reads the metadata contained in the file itself, or
278 if the given path happens to be a directory, the metadata is read from the
279 file ``PKG-INFO`` under that directory."""
280
281 name = ''
282 """The name of the distribution."""
283
284 version = ''
285 """The version of the distribution."""
286
287 metadata = None
288 """A :class:`packaging.metadata.Metadata` instance loaded with
289 the distribution's ``METADATA`` file."""
290
291 _REQUIREMENT = re.compile(
292 r'(?P<name>[-A-Za-z0-9_.]+)\s*'
293 r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
294 r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
295 r'(?P<extras>\[.*\])?')
296
297 def __init__(self, path):
298 self.path = path
299 if _cache_enabled and path in _cache_path_egg:
300 self.metadata = _cache_path_egg[path].metadata
301 self.name = self.metadata['Name']
302 self.version = self.metadata['Version']
303 return
304
305 # reused from Distribute's pkg_resources
306 def yield_lines(strs):
307 """Yield non-empty/non-comment lines of a ``basestring``
308 or sequence"""
309 if isinstance(strs, str):
310 for s in strs.splitlines():
311 s = s.strip()
312 # skip blank lines/comments
313 if s and not s.startswith('#'):
314 yield s
315 else:
316 for ss in strs:
317 for s in yield_lines(ss):
318 yield s
319
320 requires = None
321
322 if path.endswith('.egg'):
323 if os.path.isdir(path):
324 meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
325 self.metadata = Metadata(path=meta_path)
326 try:
327 req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
328 with open(req_path, 'r') as fp:
329 requires = fp.read()
330 except IOError:
331 requires = None
332 else:
333 # FIXME handle the case where zipfile is not available
334 zipf = zipimport.zipimporter(path)
335 fileobj = io.StringIO(
336 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
337 self.metadata = Metadata(fileobj=fileobj)
338 try:
339 requires = zipf.get_data('EGG-INFO/requires.txt')
340 except IOError:
341 requires = None
342 self.name = self.metadata['Name']
343 self.version = self.metadata['Version']
344
345 elif path.endswith('.egg-info'):
346 if os.path.isdir(path):
347 path = os.path.join(path, 'PKG-INFO')
348 try:
349 with open(os.path.join(path, 'requires.txt'), 'r') as fp:
350 requires = fp.read()
351 except IOError:
352 requires = None
353 self.metadata = Metadata(path=path)
Éric Araujobab50cb2011-07-29 02:37:21 +0200354 self.name = self.metadata['Name']
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200355 self.version = self.metadata['Version']
356
357 else:
358 raise ValueError('path must end with .egg-info or .egg, got %r' %
359 path)
360
361 if requires is not None:
362 if self.metadata['Metadata-Version'] == '1.1':
363 # we can't have 1.1 metadata *and* Setuptools requires
364 for field in ('Obsoletes', 'Requires', 'Provides'):
365 del self.metadata[field]
366
367 reqs = []
368
369 if requires is not None:
370 for line in yield_lines(requires):
371 if line.startswith('['):
372 logger.warning(
373 'extensions in requires.txt are not supported '
374 '(used by %r %s)', self.name, self.version)
375 break
376 else:
377 match = self._REQUIREMENT.match(line.strip())
378 if not match:
379 # this happens when we encounter extras; since they
380 # are written at the end of the file we just exit
381 break
382 else:
383 if match.group('extras'):
384 msg = ('extra requirements are not supported '
385 '(used by %r %s)', self.name, self.version)
386 logger.warning(msg, self.name)
387 name = match.group('name')
388 version = None
389 if match.group('first'):
390 version = match.group('first')
391 if match.group('rest'):
392 version += match.group('rest')
393 version = version.replace(' ', '') # trim spaces
394 if version is None:
395 reqs.append(name)
396 else:
397 reqs.append('%s (%s)' % (name, version))
398
399 if len(reqs) > 0:
400 self.metadata['Requires-Dist'] += reqs
401
402 if _cache_enabled:
403 _cache_path_egg[self.path] = self
404
405 def __repr__(self):
406 return '<EggInfoDistribution %r %s at %r>' % (
407 self.name, self.version, self.path)
408
409 def list_installed_files(self, local=False):
410
411 def _md5(path):
412 with open(path, 'rb') as f:
413 content = f.read()
414 return md5(content).hexdigest()
415
416 def _size(path):
417 return os.stat(path).st_size
418
419 path = self.path
420 if local:
421 path = path.replace('/', os.sep)
422
423 # XXX What about scripts and data files ?
424 if os.path.isfile(path):
425 return [(path, _md5(path), _size(path))]
426 else:
427 files = []
428 for root, dir, files_ in os.walk(path):
429 for item in files_:
430 item = os.path.join(root, item)
431 files.append((item, _md5(item), _size(item)))
432 return files
433
434 return []
435
436 def uses(self, path):
437 return False
438
439 def __eq__(self, other):
440 return (isinstance(other, EggInfoDistribution) and
441 self.path == other.path)
442
443 # See http://docs.python.org/reference/datamodel#object.__hash__
444 __hash__ = object.__hash__
445
446
447def distinfo_dirname(name, version):
448 """
449 The *name* and *version* parameters are converted into their
450 filename-escaped form, i.e. any ``'-'`` characters are replaced
451 with ``'_'`` other than the one in ``'dist-info'`` and the one
452 separating the name from the version number.
453
454 :parameter name: is converted to a standard distribution name by replacing
455 any runs of non- alphanumeric characters with a single
456 ``'-'``.
457 :type name: string
458 :parameter version: is converted to a standard version string. Spaces
459 become dots, and all other non-alphanumeric characters
460 (except dots) become dashes, with runs of multiple
461 dashes condensed to a single dash.
462 :type version: string
463 :returns: directory name
464 :rtype: string"""
465 file_extension = '.dist-info'
466 name = name.replace('-', '_')
467 normalized_version = suggest_normalized_version(version)
468 # Because this is a lookup procedure, something will be returned even if
469 # it is a version that cannot be normalized
470 if normalized_version is None:
471 # Unable to achieve normality?
472 normalized_version = version
473 return '-'.join([name, normalized_version]) + file_extension
474
475
Éric Araujo6f677652011-06-16 23:43:15 +0200476def get_distributions(use_egg_info=False, paths=None):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200477 """
478 Provides an iterator that looks for ``.dist-info`` directories in
479 ``sys.path`` and returns :class:`Distribution` instances for each one of
480 them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
481 files and directores are iterated as well.
482
483 :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
484 instances
485 """
Éric Araujo6f677652011-06-16 23:43:15 +0200486 if paths is None:
487 paths = sys.path
488
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200489 if not _cache_enabled:
490 for dist in _yield_distributions(True, use_egg_info, paths):
491 yield dist
492 else:
493 _generate_cache(use_egg_info, paths)
494
495 for dist in _cache_path.values():
496 yield dist
497
498 if use_egg_info:
499 for dist in _cache_path_egg.values():
500 yield dist
501
502
503def get_distribution(name, use_egg_info=False, paths=None):
504 """
505 Scans all elements in ``sys.path`` and looks for all directories
506 ending with ``.dist-info``. Returns a :class:`Distribution`
507 corresponding to the ``.dist-info`` directory that contains the
508 ``METADATA`` that matches *name* for the *name* metadata field.
509 If no distribution exists with the given *name* and the parameter
510 *use_egg_info* is set to ``True``, then all files and directories ending
511 with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
512 returned if one is found that has metadata that matches *name* for the
513 *name* metadata field.
514
515 This function only returns the first result found, as no more than one
516 value is expected. If the directory is not found, ``None`` is returned.
517
518 :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
519 """
Éric Araujo6f677652011-06-16 23:43:15 +0200520 if paths is None:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200521 paths = sys.path
522
523 if not _cache_enabled:
524 for dist in _yield_distributions(True, use_egg_info, paths):
525 if dist.name == name:
526 return dist
527 else:
528 _generate_cache(use_egg_info, paths)
529
530 if name in _cache_name:
531 return _cache_name[name][0]
532 elif use_egg_info and name in _cache_name_egg:
533 return _cache_name_egg[name][0]
534 else:
535 return None
536
537
538def obsoletes_distribution(name, version=None, use_egg_info=False):
539 """
540 Iterates over all distributions to find which distributions obsolete
541 *name*.
542
543 If a *version* is provided, it will be used to filter the results.
544 If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
545 distributions will be considered as well.
546
547 :type name: string
548 :type version: string
549 :parameter name:
550 """
551 for dist in get_distributions(use_egg_info):
552 obsoleted = (dist.metadata['Obsoletes-Dist'] +
553 dist.metadata['Obsoletes'])
554 for obs in obsoleted:
555 o_components = obs.split(' ', 1)
556 if len(o_components) == 1 or version is None:
557 if name == o_components[0]:
558 yield dist
559 break
560 else:
561 try:
562 predicate = VersionPredicate(obs)
563 except ValueError:
564 raise PackagingError(
565 'distribution %r has ill-formed obsoletes field: '
566 '%r' % (dist.name, obs))
567 if name == o_components[0] and predicate.match(version):
568 yield dist
569 break
570
571
572def provides_distribution(name, version=None, use_egg_info=False):
573 """
574 Iterates over all distributions to find which distributions provide *name*.
575 If a *version* is provided, it will be used to filter the results. Scans
576 all elements in ``sys.path`` and looks for all directories ending with
577 ``.dist-info``. Returns a :class:`Distribution` corresponding to the
578 ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
579 for the name metadata. If the argument *use_egg_info* is set to ``True``,
580 then all files and directories ending with ``.egg-info`` are considered
581 as well and returns an :class:`EggInfoDistribution` instance.
582
583 This function only returns the first result found, since no more than
584 one values are expected. If the directory is not found, returns ``None``.
585
586 :parameter version: a version specifier that indicates the version
587 required, conforming to the format in ``PEP-345``
588
589 :type name: string
590 :type version: string
591 """
592 predicate = None
593 if not version is None:
594 try:
595 predicate = VersionPredicate(name + ' (' + version + ')')
596 except ValueError:
597 raise PackagingError('invalid name or version: %r, %r' %
598 (name, version))
599
600 for dist in get_distributions(use_egg_info):
601 provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
602
603 for p in provided:
604 p_components = p.rsplit(' ', 1)
605 if len(p_components) == 1 or predicate is None:
606 if name == p_components[0]:
607 yield dist
608 break
609 else:
610 p_name, p_ver = p_components
611 if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
612 raise PackagingError(
613 'distribution %r has invalid Provides field: %r' %
614 (dist.name, p))
615 p_ver = p_ver[1:-1] # trim off the parenthesis
616 if p_name == name and predicate.match(p_ver):
617 yield dist
618 break
619
620
621def get_file_users(path):
622 """
623 Iterates over all distributions to find out which distributions use
624 *path*.
625
626 :parameter path: can be a local absolute path or a relative
627 ``'/'``-separated path.
628 :type path: string
629 :rtype: iterator of :class:`Distribution` instances
630 """
631 for dist in get_distributions():
632 if dist.uses(path):
633 yield dist
Tarek Ziadea17d8882011-05-30 10:57:44 +0200634
635
636def get_file_path(distribution_name, relative_path):
637 """Return the path to a resource file."""
638 dist = get_distribution(distribution_name)
Éric Araujo6f677652011-06-16 23:43:15 +0200639 if dist is not None:
Tarek Ziadea17d8882011-05-30 10:57:44 +0200640 return dist.get_resource_path(relative_path)
641 raise LookupError('no distribution named %r found' % distribution_name)
642
643
644def get_file(distribution_name, relative_path, *args, **kwargs):
645 """Open and return a resource file."""
646 return open(get_file_path(distribution_name, relative_path),
647 *args, **kwargs)