blob: e028dc55fb1da771890d08dd9c2cef35f56f73e4 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""PEP 376 implementation."""
2
Tarek Ziade1231a4e2011-05-19 13:07:25 +02003import os
4import re
5import csv
6import sys
7import zipimport
Éric Araujo229011d2011-09-18 20:11:48 +02008from io import StringIO
Tarek Ziade1231a4e2011-05-19 13:07:25 +02009from hashlib import md5
Éric Araujo229011d2011-09-18 20:11:48 +020010
Tarek Ziade1231a4e2011-05-19 13:07:25 +020011from packaging import logger
12from packaging.errors import PackagingError
13from packaging.version import suggest_normalized_version, VersionPredicate
14from packaging.metadata import Metadata
15
16
17__all__ = [
18 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
19 'get_distributions', 'get_distribution', 'get_file_users',
20 'provides_distribution', 'obsoletes_distribution',
21 'enable_cache', 'disable_cache', 'clear_cache',
Éric Araujo80f71022012-03-05 16:16:37 +010022 # XXX these functions' names look like get_file_users but are not related
Tarek Ziade43f289a2011-05-30 11:07:54 +020023 'get_file_path', 'get_file']
Tarek Ziade1231a4e2011-05-19 13:07:25 +020024
25
26# TODO update docs
27
28DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
29
30# Cache
31_cache_name = {} # maps names to Distribution instances
32_cache_name_egg = {} # maps names to EggInfoDistribution instances
33_cache_path = {} # maps paths to Distribution instances
34_cache_path_egg = {} # maps paths to EggInfoDistribution instances
35_cache_generated = False # indicates if .dist-info distributions are cached
36_cache_generated_egg = False # indicates if .dist-info and .egg are cached
37_cache_enabled = True
38
39
40def enable_cache():
41 """
42 Enables the internal cache.
43
44 Note that this function will not clear the cache in any case, for that
45 functionality see :func:`clear_cache`.
46 """
47 global _cache_enabled
48
49 _cache_enabled = True
50
51
52def disable_cache():
53 """
54 Disables the internal cache.
55
56 Note that this function will not clear the cache in any case, for that
57 functionality see :func:`clear_cache`.
58 """
59 global _cache_enabled
60
61 _cache_enabled = False
62
63
64def clear_cache():
65 """ Clears the internal cache. """
Éric Araujofe958482011-07-15 17:47:46 +020066 global _cache_generated, _cache_generated_egg
Tarek Ziade1231a4e2011-05-19 13:07:25 +020067
Éric Araujofe958482011-07-15 17:47:46 +020068 _cache_name.clear()
69 _cache_name_egg.clear()
70 _cache_path.clear()
71 _cache_path_egg.clear()
Tarek Ziade1231a4e2011-05-19 13:07:25 +020072 _cache_generated = False
73 _cache_generated_egg = False
74
75
Éric Araujo6f677652011-06-16 23:43:15 +020076def _yield_distributions(include_dist, include_egg, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020077 """
78 Yield .dist-info and .egg(-info) distributions, based on the arguments
79
80 :parameter include_dist: yield .dist-info distributions
81 :parameter include_egg: yield .egg(-info) distributions
82 """
83 for path in paths:
84 realpath = os.path.realpath(path)
85 if not os.path.isdir(realpath):
86 continue
87 for dir in os.listdir(realpath):
88 dist_path = os.path.join(realpath, dir)
89 if include_dist and dir.endswith('.dist-info'):
90 yield Distribution(dist_path)
91 elif include_egg and (dir.endswith('.egg-info') or
92 dir.endswith('.egg')):
93 yield EggInfoDistribution(dist_path)
94
95
Éric Araujo6f677652011-06-16 23:43:15 +020096def _generate_cache(use_egg_info, paths):
Tarek Ziade1231a4e2011-05-19 13:07:25 +020097 global _cache_generated, _cache_generated_egg
98
99 if _cache_generated_egg or (_cache_generated and not use_egg_info):
100 return
101 else:
102 gen_dist = not _cache_generated
103 gen_egg = use_egg_info
104
105 for dist in _yield_distributions(gen_dist, gen_egg, paths):
106 if isinstance(dist, Distribution):
107 _cache_path[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200108 if dist.name not in _cache_name:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200109 _cache_name[dist.name] = []
110 _cache_name[dist.name].append(dist)
111 else:
112 _cache_path_egg[dist.path] = dist
Éric Araujodf8ef022011-06-08 04:47:13 +0200113 if dist.name not in _cache_name_egg:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200114 _cache_name_egg[dist.name] = []
115 _cache_name_egg[dist.name].append(dist)
116
117 if gen_dist:
118 _cache_generated = True
119 if gen_egg:
120 _cache_generated_egg = True
121
122
123class Distribution:
124 """Created with the *path* of the ``.dist-info`` directory provided to the
125 constructor. It reads the metadata contained in ``METADATA`` when it is
126 instantiated."""
127
128 name = ''
129 """The name of the distribution."""
130
131 version = ''
132 """The version of the distribution."""
133
134 metadata = None
135 """A :class:`packaging.metadata.Metadata` instance loaded with
136 the distribution's ``METADATA`` file."""
137
138 requested = False
139 """A boolean that indicates whether the ``REQUESTED`` metadata file is
140 present (in other words, whether the package was installed by user
141 request or it was installed as a dependency)."""
142
143 def __init__(self, path):
144 if _cache_enabled and path in _cache_path:
145 self.metadata = _cache_path[path].metadata
146 else:
147 metadata_path = os.path.join(path, 'METADATA')
148 self.metadata = Metadata(path=metadata_path)
149
150 self.name = self.metadata['Name']
151 self.version = self.metadata['Version']
152 self.path = path
153
Éric Araujodf8ef022011-06-08 04:47:13 +0200154 if _cache_enabled and path not in _cache_path:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200155 _cache_path[path] = self
156
157 def __repr__(self):
158 return '<Distribution %r %s at %r>' % (
159 self.name, self.version, self.path)
160
161 def _get_records(self, local=False):
Éric Araujo4468e552011-07-08 17:22:19 +0200162 results = []
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200163 with self.get_distinfo_file('RECORD') as record:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200164 record_reader = csv.reader(record, delimiter=',',
165 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200166 for row in record_reader:
Éric Araujo4468e552011-07-08 17:22:19 +0200167 missing = [None for i in range(len(row), 3)]
168 path, checksum, size = row + missing
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200169 if local:
170 path = path.replace('/', os.sep)
171 path = os.path.join(sys.prefix, path)
Éric Araujo4468e552011-07-08 17:22:19 +0200172 results.append((path, checksum, size))
173 return results
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200174
175 def get_resource_path(self, relative_path):
176 with self.get_distinfo_file('RESOURCES') as resources_file:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200177 resources_reader = csv.reader(resources_file, delimiter=',',
Éric Araujo229011d2011-09-18 20:11:48 +0200178 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200179 for relative, destination in resources_reader:
180 if relative == relative_path:
181 return destination
182 raise KeyError(
183 'no resource file with relative path %r is installed' %
184 relative_path)
185
186 def list_installed_files(self, local=False):
187 """
188 Iterates over the ``RECORD`` entries and returns a tuple
189 ``(path, md5, size)`` for each line. If *local* is ``True``,
190 the returned path is transformed into a local absolute path.
191 Otherwise the raw value from RECORD is returned.
192
193 A local absolute path is an absolute path in which occurrences of
194 ``'/'`` have been replaced by the system separator given by ``os.sep``.
195
Éric Araujob931ab42011-08-19 00:53:20 +0200196 :parameter local: flag to say if the path should be returned as a local
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200197 absolute path
198
199 :type local: boolean
200 :returns: iterator of (path, md5, size)
201 """
Éric Araujo4468e552011-07-08 17:22:19 +0200202 for result in self._get_records(local):
203 yield result
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200204
205 def uses(self, path):
206 """
207 Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
208 absolute path or a relative ``'/'``-separated path.
209
210 :rtype: boolean
211 """
212 for p, checksum, size in self._get_records():
213 local_absolute = os.path.join(sys.prefix, p)
214 if path == p or path == local_absolute:
215 return True
216 return False
217
218 def get_distinfo_file(self, path, binary=False):
219 """
220 Returns a file located under the ``.dist-info`` directory. Returns a
221 ``file`` instance for the file pointed by *path*.
222
223 :parameter path: a ``'/'``-separated path relative to the
224 ``.dist-info`` directory or an absolute path;
225 If *path* is an absolute path and doesn't start
226 with the ``.dist-info`` directory path,
227 a :class:`PackagingError` is raised
228 :type path: string
229 :parameter binary: If *binary* is ``True``, opens the file in read-only
230 binary mode (``rb``), otherwise opens it in
231 read-only mode (``r``).
232 :rtype: file object
233 """
234 open_flags = 'r'
235 if binary:
236 open_flags += 'b'
237
238 # Check if it is an absolute path # XXX use relpath, add tests
239 if path.find(os.sep) >= 0:
240 # it's an absolute path?
241 distinfo_dirname, path = path.split(os.sep)[-2:]
242 if distinfo_dirname != self.path.split(os.sep)[-1]:
243 raise PackagingError(
244 'dist-info file %r does not belong to the %r %s '
245 'distribution' % (path, self.name, self.version))
246
247 # The file must be relative
248 if path not in DIST_FILES:
249 raise PackagingError('invalid path for a dist-info file: %r' %
250 path)
251
252 path = os.path.join(self.path, path)
253 return open(path, open_flags)
254
255 def list_distinfo_files(self, local=False):
256 """
257 Iterates over the ``RECORD`` entries and returns paths for each line if
258 the path is pointing to a file located in the ``.dist-info`` directory
259 or one of its subdirectories.
260
261 :parameter local: If *local* is ``True``, each returned path is
262 transformed into a local absolute path. Otherwise the
263 raw value from ``RECORD`` is returned.
264 :type local: boolean
265 :returns: iterator of paths
266 """
267 for path, checksum, size in self._get_records(local):
Éric Araujo37ccd6f2011-09-15 18:18:51 +0200268 # XXX add separator or use real relpath algo
269 if path.startswith(self.path):
270 yield path
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200271
272 def __eq__(self, other):
273 return isinstance(other, Distribution) and self.path == other.path
274
275 # See http://docs.python.org/reference/datamodel#object.__hash__
276 __hash__ = object.__hash__
277
278
279class EggInfoDistribution:
280 """Created with the *path* of the ``.egg-info`` directory or file provided
281 to the constructor. It reads the metadata contained in the file itself, or
282 if the given path happens to be a directory, the metadata is read from the
283 file ``PKG-INFO`` under that directory."""
284
285 name = ''
286 """The name of the distribution."""
287
288 version = ''
289 """The version of the distribution."""
290
291 metadata = None
292 """A :class:`packaging.metadata.Metadata` instance loaded with
293 the distribution's ``METADATA`` file."""
294
295 _REQUIREMENT = re.compile(
296 r'(?P<name>[-A-Za-z0-9_.]+)\s*'
297 r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
298 r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
299 r'(?P<extras>\[.*\])?')
300
301 def __init__(self, path):
302 self.path = path
303 if _cache_enabled and path in _cache_path_egg:
304 self.metadata = _cache_path_egg[path].metadata
305 self.name = self.metadata['Name']
306 self.version = self.metadata['Version']
307 return
308
309 # reused from Distribute's pkg_resources
310 def yield_lines(strs):
311 """Yield non-empty/non-comment lines of a ``basestring``
312 or sequence"""
313 if isinstance(strs, str):
314 for s in strs.splitlines():
315 s = s.strip()
316 # skip blank lines/comments
317 if s and not s.startswith('#'):
318 yield s
319 else:
320 for ss in strs:
321 for s in yield_lines(ss):
322 yield s
323
324 requires = None
325
326 if path.endswith('.egg'):
327 if os.path.isdir(path):
328 meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
329 self.metadata = Metadata(path=meta_path)
330 try:
331 req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
332 with open(req_path, 'r') as fp:
333 requires = fp.read()
334 except IOError:
335 requires = None
336 else:
337 # FIXME handle the case where zipfile is not available
338 zipf = zipimport.zipimporter(path)
Éric Araujo229011d2011-09-18 20:11:48 +0200339 fileobj = StringIO(
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200340 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
341 self.metadata = Metadata(fileobj=fileobj)
342 try:
343 requires = zipf.get_data('EGG-INFO/requires.txt')
344 except IOError:
345 requires = None
346 self.name = self.metadata['Name']
347 self.version = self.metadata['Version']
348
349 elif path.endswith('.egg-info'):
350 if os.path.isdir(path):
351 path = os.path.join(path, 'PKG-INFO')
352 try:
353 with open(os.path.join(path, 'requires.txt'), 'r') as fp:
354 requires = fp.read()
355 except IOError:
356 requires = None
357 self.metadata = Metadata(path=path)
Éric Araujobab50cb2011-07-29 02:37:21 +0200358 self.name = self.metadata['Name']
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200359 self.version = self.metadata['Version']
360
361 else:
362 raise ValueError('path must end with .egg-info or .egg, got %r' %
363 path)
364
365 if requires is not None:
366 if self.metadata['Metadata-Version'] == '1.1':
367 # we can't have 1.1 metadata *and* Setuptools requires
368 for field in ('Obsoletes', 'Requires', 'Provides'):
369 del self.metadata[field]
370
371 reqs = []
372
373 if requires is not None:
374 for line in yield_lines(requires):
375 if line.startswith('['):
376 logger.warning(
377 'extensions in requires.txt are not supported '
378 '(used by %r %s)', self.name, self.version)
379 break
380 else:
381 match = self._REQUIREMENT.match(line.strip())
382 if not match:
383 # this happens when we encounter extras; since they
384 # are written at the end of the file we just exit
385 break
386 else:
387 if match.group('extras'):
388 msg = ('extra requirements are not supported '
389 '(used by %r %s)', self.name, self.version)
390 logger.warning(msg, self.name)
391 name = match.group('name')
392 version = None
393 if match.group('first'):
394 version = match.group('first')
395 if match.group('rest'):
396 version += match.group('rest')
397 version = version.replace(' ', '') # trim spaces
398 if version is None:
399 reqs.append(name)
400 else:
401 reqs.append('%s (%s)' % (name, version))
402
403 if len(reqs) > 0:
404 self.metadata['Requires-Dist'] += reqs
405
406 if _cache_enabled:
407 _cache_path_egg[self.path] = self
408
409 def __repr__(self):
410 return '<EggInfoDistribution %r %s at %r>' % (
411 self.name, self.version, self.path)
412
413 def list_installed_files(self, local=False):
414
415 def _md5(path):
416 with open(path, 'rb') as f:
417 content = f.read()
418 return md5(content).hexdigest()
419
420 def _size(path):
421 return os.stat(path).st_size
422
423 path = self.path
424 if local:
425 path = path.replace('/', os.sep)
426
427 # XXX What about scripts and data files ?
428 if os.path.isfile(path):
429 return [(path, _md5(path), _size(path))]
430 else:
431 files = []
432 for root, dir, files_ in os.walk(path):
433 for item in files_:
434 item = os.path.join(root, item)
435 files.append((item, _md5(item), _size(item)))
436 return files
437
438 return []
439
440 def uses(self, path):
441 return False
442
443 def __eq__(self, other):
444 return (isinstance(other, EggInfoDistribution) and
445 self.path == other.path)
446
447 # See http://docs.python.org/reference/datamodel#object.__hash__
448 __hash__ = object.__hash__
449
450
451def distinfo_dirname(name, version):
452 """
453 The *name* and *version* parameters are converted into their
454 filename-escaped form, i.e. any ``'-'`` characters are replaced
455 with ``'_'`` other than the one in ``'dist-info'`` and the one
456 separating the name from the version number.
457
458 :parameter name: is converted to a standard distribution name by replacing
459 any runs of non- alphanumeric characters with a single
460 ``'-'``.
461 :type name: string
462 :parameter version: is converted to a standard version string. Spaces
463 become dots, and all other non-alphanumeric characters
464 (except dots) become dashes, with runs of multiple
465 dashes condensed to a single dash.
466 :type version: string
467 :returns: directory name
468 :rtype: string"""
469 file_extension = '.dist-info'
470 name = name.replace('-', '_')
471 normalized_version = suggest_normalized_version(version)
472 # Because this is a lookup procedure, something will be returned even if
473 # it is a version that cannot be normalized
474 if normalized_version is None:
475 # Unable to achieve normality?
476 normalized_version = version
477 return '-'.join([name, normalized_version]) + file_extension
478
479
Éric Araujo6f677652011-06-16 23:43:15 +0200480def get_distributions(use_egg_info=False, paths=None):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200481 """
482 Provides an iterator that looks for ``.dist-info`` directories in
483 ``sys.path`` and returns :class:`Distribution` instances for each one of
484 them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
485 files and directores are iterated as well.
486
487 :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
488 instances
489 """
Éric Araujo6f677652011-06-16 23:43:15 +0200490 if paths is None:
491 paths = sys.path
492
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200493 if not _cache_enabled:
494 for dist in _yield_distributions(True, use_egg_info, paths):
495 yield dist
496 else:
497 _generate_cache(use_egg_info, paths)
498
499 for dist in _cache_path.values():
500 yield dist
501
502 if use_egg_info:
503 for dist in _cache_path_egg.values():
504 yield dist
505
506
507def get_distribution(name, use_egg_info=False, paths=None):
508 """
509 Scans all elements in ``sys.path`` and looks for all directories
510 ending with ``.dist-info``. Returns a :class:`Distribution`
511 corresponding to the ``.dist-info`` directory that contains the
512 ``METADATA`` that matches *name* for the *name* metadata field.
513 If no distribution exists with the given *name* and the parameter
514 *use_egg_info* is set to ``True``, then all files and directories ending
515 with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
516 returned if one is found that has metadata that matches *name* for the
517 *name* metadata field.
518
519 This function only returns the first result found, as no more than one
520 value is expected. If the directory is not found, ``None`` is returned.
521
522 :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
523 """
Éric Araujo6f677652011-06-16 23:43:15 +0200524 if paths is None:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200525 paths = sys.path
526
527 if not _cache_enabled:
528 for dist in _yield_distributions(True, use_egg_info, paths):
529 if dist.name == name:
530 return dist
531 else:
532 _generate_cache(use_egg_info, paths)
533
534 if name in _cache_name:
535 return _cache_name[name][0]
536 elif use_egg_info and name in _cache_name_egg:
537 return _cache_name_egg[name][0]
538 else:
539 return None
540
541
542def obsoletes_distribution(name, version=None, use_egg_info=False):
543 """
544 Iterates over all distributions to find which distributions obsolete
545 *name*.
546
547 If a *version* is provided, it will be used to filter the results.
548 If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
549 distributions will be considered as well.
550
551 :type name: string
552 :type version: string
553 :parameter name:
554 """
555 for dist in get_distributions(use_egg_info):
556 obsoleted = (dist.metadata['Obsoletes-Dist'] +
557 dist.metadata['Obsoletes'])
558 for obs in obsoleted:
559 o_components = obs.split(' ', 1)
560 if len(o_components) == 1 or version is None:
561 if name == o_components[0]:
562 yield dist
563 break
564 else:
565 try:
566 predicate = VersionPredicate(obs)
567 except ValueError:
568 raise PackagingError(
569 'distribution %r has ill-formed obsoletes field: '
570 '%r' % (dist.name, obs))
571 if name == o_components[0] and predicate.match(version):
572 yield dist
573 break
574
575
576def provides_distribution(name, version=None, use_egg_info=False):
577 """
578 Iterates over all distributions to find which distributions provide *name*.
579 If a *version* is provided, it will be used to filter the results. Scans
580 all elements in ``sys.path`` and looks for all directories ending with
581 ``.dist-info``. Returns a :class:`Distribution` corresponding to the
582 ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
583 for the name metadata. If the argument *use_egg_info* is set to ``True``,
584 then all files and directories ending with ``.egg-info`` are considered
585 as well and returns an :class:`EggInfoDistribution` instance.
586
587 This function only returns the first result found, since no more than
588 one values are expected. If the directory is not found, returns ``None``.
589
590 :parameter version: a version specifier that indicates the version
591 required, conforming to the format in ``PEP-345``
592
593 :type name: string
594 :type version: string
595 """
596 predicate = None
597 if not version is None:
598 try:
599 predicate = VersionPredicate(name + ' (' + version + ')')
600 except ValueError:
601 raise PackagingError('invalid name or version: %r, %r' %
602 (name, version))
603
604 for dist in get_distributions(use_egg_info):
605 provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
606
607 for p in provided:
608 p_components = p.rsplit(' ', 1)
609 if len(p_components) == 1 or predicate is None:
610 if name == p_components[0]:
611 yield dist
612 break
613 else:
614 p_name, p_ver = p_components
615 if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
616 raise PackagingError(
617 'distribution %r has invalid Provides field: %r' %
618 (dist.name, p))
619 p_ver = p_ver[1:-1] # trim off the parenthesis
620 if p_name == name and predicate.match(p_ver):
621 yield dist
622 break
623
624
625def get_file_users(path):
626 """
627 Iterates over all distributions to find out which distributions use
628 *path*.
629
630 :parameter path: can be a local absolute path or a relative
631 ``'/'``-separated path.
632 :type path: string
633 :rtype: iterator of :class:`Distribution` instances
634 """
635 for dist in get_distributions():
636 if dist.uses(path):
637 yield dist
Tarek Ziadea17d8882011-05-30 10:57:44 +0200638
639
640def get_file_path(distribution_name, relative_path):
641 """Return the path to a resource file."""
642 dist = get_distribution(distribution_name)
Éric Araujo6f677652011-06-16 23:43:15 +0200643 if dist is not None:
Tarek Ziadea17d8882011-05-30 10:57:44 +0200644 return dist.get_resource_path(relative_path)
645 raise LookupError('no distribution named %r found' % distribution_name)
646
647
648def get_file(distribution_name, relative_path, *args, **kwargs):
649 """Open and return a resource file."""
650 return open(get_file_path(distribution_name, relative_path),
651 *args, **kwargs)