blob: 010c4eb572c93470a6228e8b5a3d6805c1159f1b [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""PEP 376 implementation."""
2
3import io
4import os
5import re
6import csv
7import sys
8import zipimport
9from hashlib import md5
10from packaging import logger
11from packaging.errors import PackagingError
12from packaging.version import suggest_normalized_version, VersionPredicate
13from packaging.metadata import Metadata
14
15
16__all__ = [
17 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
18 'get_distributions', 'get_distribution', 'get_file_users',
19 'provides_distribution', 'obsoletes_distribution',
20 'enable_cache', 'disable_cache', 'clear_cache',
Tarek Ziadea17d8882011-05-30 10:57:44 +020021 'get_file_path', 'get_file'
Tarek Ziade1231a4e2011-05-19 13:07:25 +020022]
23
24
25# TODO update docs
26
27DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
28
29# Cache
30_cache_name = {} # maps names to Distribution instances
31_cache_name_egg = {} # maps names to EggInfoDistribution instances
32_cache_path = {} # maps paths to Distribution instances
33_cache_path_egg = {} # maps paths to EggInfoDistribution instances
34_cache_generated = False # indicates if .dist-info distributions are cached
35_cache_generated_egg = False # indicates if .dist-info and .egg are cached
36_cache_enabled = True
37
38
39def enable_cache():
40 """
41 Enables the internal cache.
42
43 Note that this function will not clear the cache in any case, for that
44 functionality see :func:`clear_cache`.
45 """
46 global _cache_enabled
47
48 _cache_enabled = True
49
50
51def disable_cache():
52 """
53 Disables the internal cache.
54
55 Note that this function will not clear the cache in any case, for that
56 functionality see :func:`clear_cache`.
57 """
58 global _cache_enabled
59
60 _cache_enabled = False
61
62
63def clear_cache():
64 """ Clears the internal cache. """
65 global _cache_name, _cache_name_egg, _cache_path, _cache_path_egg, \
66 _cache_generated, _cache_generated_egg
67
68 _cache_name = {}
69 _cache_name_egg = {}
70 _cache_path = {}
71 _cache_path_egg = {}
72 _cache_generated = False
73 _cache_generated_egg = False
74
75
76def _yield_distributions(include_dist, include_egg, paths=sys.path):
77 """
78 Yield .dist-info and .egg(-info) distributions, based on the arguments
79
80 :parameter include_dist: yield .dist-info distributions
81 :parameter include_egg: yield .egg(-info) distributions
82 """
83 for path in paths:
84 realpath = os.path.realpath(path)
85 if not os.path.isdir(realpath):
86 continue
87 for dir in os.listdir(realpath):
88 dist_path = os.path.join(realpath, dir)
89 if include_dist and dir.endswith('.dist-info'):
90 yield Distribution(dist_path)
91 elif include_egg and (dir.endswith('.egg-info') or
92 dir.endswith('.egg')):
93 yield EggInfoDistribution(dist_path)
94
95
96def _generate_cache(use_egg_info=False, paths=sys.path):
97 global _cache_generated, _cache_generated_egg
98
99 if _cache_generated_egg or (_cache_generated and not use_egg_info):
100 return
101 else:
102 gen_dist = not _cache_generated
103 gen_egg = use_egg_info
104
105 for dist in _yield_distributions(gen_dist, gen_egg, paths):
106 if isinstance(dist, Distribution):
107 _cache_path[dist.path] = dist
108 if not dist.name in _cache_name:
109 _cache_name[dist.name] = []
110 _cache_name[dist.name].append(dist)
111 else:
112 _cache_path_egg[dist.path] = dist
113 if not dist.name in _cache_name_egg:
114 _cache_name_egg[dist.name] = []
115 _cache_name_egg[dist.name].append(dist)
116
117 if gen_dist:
118 _cache_generated = True
119 if gen_egg:
120 _cache_generated_egg = True
121
122
123class Distribution:
124 """Created with the *path* of the ``.dist-info`` directory provided to the
125 constructor. It reads the metadata contained in ``METADATA`` when it is
126 instantiated."""
127
128 name = ''
129 """The name of the distribution."""
130
131 version = ''
132 """The version of the distribution."""
133
134 metadata = None
135 """A :class:`packaging.metadata.Metadata` instance loaded with
136 the distribution's ``METADATA`` file."""
137
138 requested = False
139 """A boolean that indicates whether the ``REQUESTED`` metadata file is
140 present (in other words, whether the package was installed by user
141 request or it was installed as a dependency)."""
142
143 def __init__(self, path):
144 if _cache_enabled and path in _cache_path:
145 self.metadata = _cache_path[path].metadata
146 else:
147 metadata_path = os.path.join(path, 'METADATA')
148 self.metadata = Metadata(path=metadata_path)
149
150 self.name = self.metadata['Name']
151 self.version = self.metadata['Version']
152 self.path = path
153
154 if _cache_enabled and not path in _cache_path:
155 _cache_path[path] = self
156
157 def __repr__(self):
158 return '<Distribution %r %s at %r>' % (
159 self.name, self.version, self.path)
160
161 def _get_records(self, local=False):
162 with self.get_distinfo_file('RECORD') as record:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200163 record_reader = csv.reader(record, delimiter=',',
164 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200165 # XXX needs an explaining comment
166 for row in record_reader:
167 path, checksum, size = (row[:] +
168 [None for i in range(len(row), 3)])
169 if local:
170 path = path.replace('/', os.sep)
171 path = os.path.join(sys.prefix, path)
172 yield path, checksum, size
173
174 def get_resource_path(self, relative_path):
175 with self.get_distinfo_file('RESOURCES') as resources_file:
Tarek Ziadebe20be12011-05-21 19:45:48 +0200176 resources_reader = csv.reader(resources_file, delimiter=',',
177 lineterminator='\n')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200178 for relative, destination in resources_reader:
179 if relative == relative_path:
180 return destination
181 raise KeyError(
182 'no resource file with relative path %r is installed' %
183 relative_path)
184
185 def list_installed_files(self, local=False):
186 """
187 Iterates over the ``RECORD`` entries and returns a tuple
188 ``(path, md5, size)`` for each line. If *local* is ``True``,
189 the returned path is transformed into a local absolute path.
190 Otherwise the raw value from RECORD is returned.
191
192 A local absolute path is an absolute path in which occurrences of
193 ``'/'`` have been replaced by the system separator given by ``os.sep``.
194
195 :parameter local: flag to say if the path should be returned a local
196 absolute path
197
198 :type local: boolean
199 :returns: iterator of (path, md5, size)
200 """
201 return self._get_records(local)
202
203 def uses(self, path):
204 """
205 Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
206 absolute path or a relative ``'/'``-separated path.
207
208 :rtype: boolean
209 """
210 for p, checksum, size in self._get_records():
211 local_absolute = os.path.join(sys.prefix, p)
212 if path == p or path == local_absolute:
213 return True
214 return False
215
216 def get_distinfo_file(self, path, binary=False):
217 """
218 Returns a file located under the ``.dist-info`` directory. Returns a
219 ``file`` instance for the file pointed by *path*.
220
221 :parameter path: a ``'/'``-separated path relative to the
222 ``.dist-info`` directory or an absolute path;
223 If *path* is an absolute path and doesn't start
224 with the ``.dist-info`` directory path,
225 a :class:`PackagingError` is raised
226 :type path: string
227 :parameter binary: If *binary* is ``True``, opens the file in read-only
228 binary mode (``rb``), otherwise opens it in
229 read-only mode (``r``).
230 :rtype: file object
231 """
232 open_flags = 'r'
233 if binary:
234 open_flags += 'b'
235
236 # Check if it is an absolute path # XXX use relpath, add tests
237 if path.find(os.sep) >= 0:
238 # it's an absolute path?
239 distinfo_dirname, path = path.split(os.sep)[-2:]
240 if distinfo_dirname != self.path.split(os.sep)[-1]:
241 raise PackagingError(
242 'dist-info file %r does not belong to the %r %s '
243 'distribution' % (path, self.name, self.version))
244
245 # The file must be relative
246 if path not in DIST_FILES:
247 raise PackagingError('invalid path for a dist-info file: %r' %
248 path)
249
250 path = os.path.join(self.path, path)
251 return open(path, open_flags)
252
253 def list_distinfo_files(self, local=False):
254 """
255 Iterates over the ``RECORD`` entries and returns paths for each line if
256 the path is pointing to a file located in the ``.dist-info`` directory
257 or one of its subdirectories.
258
259 :parameter local: If *local* is ``True``, each returned path is
260 transformed into a local absolute path. Otherwise the
261 raw value from ``RECORD`` is returned.
262 :type local: boolean
263 :returns: iterator of paths
264 """
265 for path, checksum, size in self._get_records(local):
266 yield path
267
268 def __eq__(self, other):
269 return isinstance(other, Distribution) and self.path == other.path
270
271 # See http://docs.python.org/reference/datamodel#object.__hash__
272 __hash__ = object.__hash__
273
274
275class EggInfoDistribution:
276 """Created with the *path* of the ``.egg-info`` directory or file provided
277 to the constructor. It reads the metadata contained in the file itself, or
278 if the given path happens to be a directory, the metadata is read from the
279 file ``PKG-INFO`` under that directory."""
280
281 name = ''
282 """The name of the distribution."""
283
284 version = ''
285 """The version of the distribution."""
286
287 metadata = None
288 """A :class:`packaging.metadata.Metadata` instance loaded with
289 the distribution's ``METADATA`` file."""
290
291 _REQUIREMENT = re.compile(
292 r'(?P<name>[-A-Za-z0-9_.]+)\s*'
293 r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
294 r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
295 r'(?P<extras>\[.*\])?')
296
297 def __init__(self, path):
298 self.path = path
299 if _cache_enabled and path in _cache_path_egg:
300 self.metadata = _cache_path_egg[path].metadata
301 self.name = self.metadata['Name']
302 self.version = self.metadata['Version']
303 return
304
305 # reused from Distribute's pkg_resources
306 def yield_lines(strs):
307 """Yield non-empty/non-comment lines of a ``basestring``
308 or sequence"""
309 if isinstance(strs, str):
310 for s in strs.splitlines():
311 s = s.strip()
312 # skip blank lines/comments
313 if s and not s.startswith('#'):
314 yield s
315 else:
316 for ss in strs:
317 for s in yield_lines(ss):
318 yield s
319
320 requires = None
321
322 if path.endswith('.egg'):
323 if os.path.isdir(path):
324 meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
325 self.metadata = Metadata(path=meta_path)
326 try:
327 req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
328 with open(req_path, 'r') as fp:
329 requires = fp.read()
330 except IOError:
331 requires = None
332 else:
333 # FIXME handle the case where zipfile is not available
334 zipf = zipimport.zipimporter(path)
335 fileobj = io.StringIO(
336 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
337 self.metadata = Metadata(fileobj=fileobj)
338 try:
339 requires = zipf.get_data('EGG-INFO/requires.txt')
340 except IOError:
341 requires = None
342 self.name = self.metadata['Name']
343 self.version = self.metadata['Version']
344
345 elif path.endswith('.egg-info'):
346 if os.path.isdir(path):
347 path = os.path.join(path, 'PKG-INFO')
348 try:
349 with open(os.path.join(path, 'requires.txt'), 'r') as fp:
350 requires = fp.read()
351 except IOError:
352 requires = None
353 self.metadata = Metadata(path=path)
354 self.name = self.metadata['name']
355 self.version = self.metadata['Version']
356
357 else:
358 raise ValueError('path must end with .egg-info or .egg, got %r' %
359 path)
360
361 if requires is not None:
362 if self.metadata['Metadata-Version'] == '1.1':
363 # we can't have 1.1 metadata *and* Setuptools requires
364 for field in ('Obsoletes', 'Requires', 'Provides'):
365 del self.metadata[field]
366
367 reqs = []
368
369 if requires is not None:
370 for line in yield_lines(requires):
371 if line.startswith('['):
372 logger.warning(
373 'extensions in requires.txt are not supported '
374 '(used by %r %s)', self.name, self.version)
375 break
376 else:
377 match = self._REQUIREMENT.match(line.strip())
378 if not match:
379 # this happens when we encounter extras; since they
380 # are written at the end of the file we just exit
381 break
382 else:
383 if match.group('extras'):
384 msg = ('extra requirements are not supported '
385 '(used by %r %s)', self.name, self.version)
386 logger.warning(msg, self.name)
387 name = match.group('name')
388 version = None
389 if match.group('first'):
390 version = match.group('first')
391 if match.group('rest'):
392 version += match.group('rest')
393 version = version.replace(' ', '') # trim spaces
394 if version is None:
395 reqs.append(name)
396 else:
397 reqs.append('%s (%s)' % (name, version))
398
399 if len(reqs) > 0:
400 self.metadata['Requires-Dist'] += reqs
401
402 if _cache_enabled:
403 _cache_path_egg[self.path] = self
404
405 def __repr__(self):
406 return '<EggInfoDistribution %r %s at %r>' % (
407 self.name, self.version, self.path)
408
409 def list_installed_files(self, local=False):
410
411 def _md5(path):
412 with open(path, 'rb') as f:
413 content = f.read()
414 return md5(content).hexdigest()
415
416 def _size(path):
417 return os.stat(path).st_size
418
419 path = self.path
420 if local:
421 path = path.replace('/', os.sep)
422
423 # XXX What about scripts and data files ?
424 if os.path.isfile(path):
425 return [(path, _md5(path), _size(path))]
426 else:
427 files = []
428 for root, dir, files_ in os.walk(path):
429 for item in files_:
430 item = os.path.join(root, item)
431 files.append((item, _md5(item), _size(item)))
432 return files
433
434 return []
435
436 def uses(self, path):
437 return False
438
439 def __eq__(self, other):
440 return (isinstance(other, EggInfoDistribution) and
441 self.path == other.path)
442
443 # See http://docs.python.org/reference/datamodel#object.__hash__
444 __hash__ = object.__hash__
445
446
447def distinfo_dirname(name, version):
448 """
449 The *name* and *version* parameters are converted into their
450 filename-escaped form, i.e. any ``'-'`` characters are replaced
451 with ``'_'`` other than the one in ``'dist-info'`` and the one
452 separating the name from the version number.
453
454 :parameter name: is converted to a standard distribution name by replacing
455 any runs of non- alphanumeric characters with a single
456 ``'-'``.
457 :type name: string
458 :parameter version: is converted to a standard version string. Spaces
459 become dots, and all other non-alphanumeric characters
460 (except dots) become dashes, with runs of multiple
461 dashes condensed to a single dash.
462 :type version: string
463 :returns: directory name
464 :rtype: string"""
465 file_extension = '.dist-info'
466 name = name.replace('-', '_')
467 normalized_version = suggest_normalized_version(version)
468 # Because this is a lookup procedure, something will be returned even if
469 # it is a version that cannot be normalized
470 if normalized_version is None:
471 # Unable to achieve normality?
472 normalized_version = version
473 return '-'.join([name, normalized_version]) + file_extension
474
475
476def get_distributions(use_egg_info=False, paths=sys.path):
477 """
478 Provides an iterator that looks for ``.dist-info`` directories in
479 ``sys.path`` and returns :class:`Distribution` instances for each one of
480 them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
481 files and directores are iterated as well.
482
483 :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
484 instances
485 """
486 if not _cache_enabled:
487 for dist in _yield_distributions(True, use_egg_info, paths):
488 yield dist
489 else:
490 _generate_cache(use_egg_info, paths)
491
492 for dist in _cache_path.values():
493 yield dist
494
495 if use_egg_info:
496 for dist in _cache_path_egg.values():
497 yield dist
498
499
500def get_distribution(name, use_egg_info=False, paths=None):
501 """
502 Scans all elements in ``sys.path`` and looks for all directories
503 ending with ``.dist-info``. Returns a :class:`Distribution`
504 corresponding to the ``.dist-info`` directory that contains the
505 ``METADATA`` that matches *name* for the *name* metadata field.
506 If no distribution exists with the given *name* and the parameter
507 *use_egg_info* is set to ``True``, then all files and directories ending
508 with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
509 returned if one is found that has metadata that matches *name* for the
510 *name* metadata field.
511
512 This function only returns the first result found, as no more than one
513 value is expected. If the directory is not found, ``None`` is returned.
514
515 :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
516 """
517 if paths == None:
518 paths = sys.path
519
520 if not _cache_enabled:
521 for dist in _yield_distributions(True, use_egg_info, paths):
522 if dist.name == name:
523 return dist
524 else:
525 _generate_cache(use_egg_info, paths)
526
527 if name in _cache_name:
528 return _cache_name[name][0]
529 elif use_egg_info and name in _cache_name_egg:
530 return _cache_name_egg[name][0]
531 else:
532 return None
533
534
535def obsoletes_distribution(name, version=None, use_egg_info=False):
536 """
537 Iterates over all distributions to find which distributions obsolete
538 *name*.
539
540 If a *version* is provided, it will be used to filter the results.
541 If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
542 distributions will be considered as well.
543
544 :type name: string
545 :type version: string
546 :parameter name:
547 """
548 for dist in get_distributions(use_egg_info):
549 obsoleted = (dist.metadata['Obsoletes-Dist'] +
550 dist.metadata['Obsoletes'])
551 for obs in obsoleted:
552 o_components = obs.split(' ', 1)
553 if len(o_components) == 1 or version is None:
554 if name == o_components[0]:
555 yield dist
556 break
557 else:
558 try:
559 predicate = VersionPredicate(obs)
560 except ValueError:
561 raise PackagingError(
562 'distribution %r has ill-formed obsoletes field: '
563 '%r' % (dist.name, obs))
564 if name == o_components[0] and predicate.match(version):
565 yield dist
566 break
567
568
569def provides_distribution(name, version=None, use_egg_info=False):
570 """
571 Iterates over all distributions to find which distributions provide *name*.
572 If a *version* is provided, it will be used to filter the results. Scans
573 all elements in ``sys.path`` and looks for all directories ending with
574 ``.dist-info``. Returns a :class:`Distribution` corresponding to the
575 ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
576 for the name metadata. If the argument *use_egg_info* is set to ``True``,
577 then all files and directories ending with ``.egg-info`` are considered
578 as well and returns an :class:`EggInfoDistribution` instance.
579
580 This function only returns the first result found, since no more than
581 one values are expected. If the directory is not found, returns ``None``.
582
583 :parameter version: a version specifier that indicates the version
584 required, conforming to the format in ``PEP-345``
585
586 :type name: string
587 :type version: string
588 """
589 predicate = None
590 if not version is None:
591 try:
592 predicate = VersionPredicate(name + ' (' + version + ')')
593 except ValueError:
594 raise PackagingError('invalid name or version: %r, %r' %
595 (name, version))
596
597 for dist in get_distributions(use_egg_info):
598 provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
599
600 for p in provided:
601 p_components = p.rsplit(' ', 1)
602 if len(p_components) == 1 or predicate is None:
603 if name == p_components[0]:
604 yield dist
605 break
606 else:
607 p_name, p_ver = p_components
608 if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
609 raise PackagingError(
610 'distribution %r has invalid Provides field: %r' %
611 (dist.name, p))
612 p_ver = p_ver[1:-1] # trim off the parenthesis
613 if p_name == name and predicate.match(p_ver):
614 yield dist
615 break
616
617
618def get_file_users(path):
619 """
620 Iterates over all distributions to find out which distributions use
621 *path*.
622
623 :parameter path: can be a local absolute path or a relative
624 ``'/'``-separated path.
625 :type path: string
626 :rtype: iterator of :class:`Distribution` instances
627 """
628 for dist in get_distributions():
629 if dist.uses(path):
630 yield dist
Tarek Ziadea17d8882011-05-30 10:57:44 +0200631
632
633def get_file_path(distribution_name, relative_path):
634 """Return the path to a resource file."""
635 dist = get_distribution(distribution_name)
636 if dist != None:
637 return dist.get_resource_path(relative_path)
638 raise LookupError('no distribution named %r found' % distribution_name)
639
640
641def get_file(distribution_name, relative_path, *args, **kwargs):
642 """Open and return a resource file."""
643 return open(get_file_path(distribution_name, relative_path),
644 *args, **kwargs)