blob: 2993ebbe7189eb71646d64fb1e67f711675dfd58 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""Implementation of the Metadata for Python packages PEPs.
2
3Supports all metadata formats (1.0, 1.1, 1.2).
4"""
5
6import re
7import logging
8
9from io import StringIO
10from email import message_from_file
11from packaging import logger
12from packaging.markers import interpret
13from packaging.version import (is_valid_predicate, is_valid_version,
14 is_valid_versions)
15from packaging.errors import (MetadataMissingError,
16 MetadataConflictError,
17 MetadataUnrecognizedVersionError)
18
19try:
20 # docutils is installed
21 from docutils.utils import Reporter
22 from docutils.parsers.rst import Parser
23 from docutils import frontend
24 from docutils import nodes
25
26 class SilentReporter(Reporter):
27
28 def __init__(self, source, report_level, halt_level, stream=None,
29 debug=0, encoding='ascii', error_handler='replace'):
30 self.messages = []
Éric Araujo80223142011-10-14 17:04:39 +020031 super(SilentReporter, self).__init__(
32 source, report_level, halt_level, stream,
33 debug, encoding, error_handler)
Tarek Ziade1231a4e2011-05-19 13:07:25 +020034
35 def system_message(self, level, message, *children, **kwargs):
36 self.messages.append((level, message, children, kwargs))
37
38 _HAS_DOCUTILS = True
39except ImportError:
40 # docutils is not installed
41 _HAS_DOCUTILS = False
42
43# public API of this module
44__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION']
45
46# Encoding used for the PKG-INFO files
47PKG_INFO_ENCODING = 'utf-8'
48
49# preferred version. Hopefully will be changed
50# to 1.2 once PEP 345 is supported everywhere
51PKG_INFO_PREFERRED_VERSION = '1.0'
52
53_LINE_PREFIX = re.compile('\n \|')
54_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
55 'Summary', 'Description',
56 'Keywords', 'Home-page', 'Author', 'Author-email',
57 'License')
58
59_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
60 'Supported-Platform', 'Summary', 'Description',
61 'Keywords', 'Home-page', 'Author', 'Author-email',
62 'License', 'Classifier', 'Download-URL', 'Obsoletes',
63 'Provides', 'Requires')
64
Éric Araujoe6db7a32011-09-10 05:22:48 +020065_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier',
66 'Download-URL')
Tarek Ziade1231a4e2011-05-19 13:07:25 +020067
68_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
69 'Supported-Platform', 'Summary', 'Description',
70 'Keywords', 'Home-page', 'Author', 'Author-email',
71 'Maintainer', 'Maintainer-email', 'License',
72 'Classifier', 'Download-URL', 'Obsoletes-Dist',
73 'Project-URL', 'Provides-Dist', 'Requires-Dist',
74 'Requires-Python', 'Requires-External')
75
76_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python',
77 'Obsoletes-Dist', 'Requires-External', 'Maintainer',
78 'Maintainer-email', 'Project-URL')
79
80_ALL_FIELDS = set()
81_ALL_FIELDS.update(_241_FIELDS)
82_ALL_FIELDS.update(_314_FIELDS)
83_ALL_FIELDS.update(_345_FIELDS)
84
85
86def _version2fieldlist(version):
87 if version == '1.0':
88 return _241_FIELDS
89 elif version == '1.1':
90 return _314_FIELDS
91 elif version == '1.2':
92 return _345_FIELDS
93 raise MetadataUnrecognizedVersionError(version)
94
95
96def _best_version(fields):
97 """Detect the best version depending on the fields used."""
98 def _has_marker(keys, markers):
99 for marker in markers:
100 if marker in keys:
101 return True
102 return False
103
104 keys = list(fields)
105 possible_versions = ['1.0', '1.1', '1.2']
106
107 # first let's try to see if a field is not part of one of the version
108 for key in keys:
109 if key not in _241_FIELDS and '1.0' in possible_versions:
110 possible_versions.remove('1.0')
111 if key not in _314_FIELDS and '1.1' in possible_versions:
112 possible_versions.remove('1.1')
113 if key not in _345_FIELDS and '1.2' in possible_versions:
114 possible_versions.remove('1.2')
115
116 # possible_version contains qualified versions
117 if len(possible_versions) == 1:
118 return possible_versions[0] # found !
119 elif len(possible_versions) == 0:
120 raise MetadataConflictError('Unknown metadata set')
121
122 # let's see if one unique marker is found
123 is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS)
124 is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS)
125 if is_1_1 and is_1_2:
126 raise MetadataConflictError('You used incompatible 1.1 and 1.2 fields')
127
128 # we have the choice, either 1.0, or 1.2
129 # - 1.0 has a broken Summary field but works with all tools
130 # - 1.1 is to avoid
131 # - 1.2 fixes Summary but is not widespread yet
132 if not is_1_1 and not is_1_2:
133 # we couldn't find any specific marker
134 if PKG_INFO_PREFERRED_VERSION in possible_versions:
135 return PKG_INFO_PREFERRED_VERSION
136 if is_1_1:
137 return '1.1'
138
139 # default marker when 1.0 is disqualified
140 return '1.2'
141
142
143_ATTR2FIELD = {
144 'metadata_version': 'Metadata-Version',
145 'name': 'Name',
146 'version': 'Version',
147 'platform': 'Platform',
148 'supported_platform': 'Supported-Platform',
149 'summary': 'Summary',
150 'description': 'Description',
151 'keywords': 'Keywords',
152 'home_page': 'Home-page',
153 'author': 'Author',
154 'author_email': 'Author-email',
155 'maintainer': 'Maintainer',
156 'maintainer_email': 'Maintainer-email',
157 'license': 'License',
158 'classifier': 'Classifier',
159 'download_url': 'Download-URL',
160 'obsoletes_dist': 'Obsoletes-Dist',
161 'provides_dist': 'Provides-Dist',
162 'requires_dist': 'Requires-Dist',
163 'requires_python': 'Requires-Python',
164 'requires_external': 'Requires-External',
165 'requires': 'Requires',
166 'provides': 'Provides',
167 'obsoletes': 'Obsoletes',
168 'project_url': 'Project-URL',
169}
170
171_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist')
172_VERSIONS_FIELDS = ('Requires-Python',)
173_VERSION_FIELDS = ('Version',)
174_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes',
175 'Requires', 'Provides', 'Obsoletes-Dist',
176 'Provides-Dist', 'Requires-Dist', 'Requires-External',
177 'Project-URL', 'Supported-Platform')
178_LISTTUPLEFIELDS = ('Project-URL',)
179
180_ELEMENTSFIELD = ('Keywords',)
181
182_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description')
183
184_MISSING = object()
185
Jeremy Klothaa2b4422011-09-12 11:12:42 -0600186_FILESAFE = re.compile('[^A-Za-z0-9.]+')
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200187
Éric Araujo784cd4c2011-10-19 08:50:49 +0200188
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200189class Metadata:
190 """The metadata of a release.
191
192 Supports versions 1.0, 1.1 and 1.2 (auto-detected). You can
193 instantiate the class with one of these arguments (or none):
194 - *path*, the path to a METADATA file
195 - *fileobj* give a file-like object with METADATA as content
196 - *mapping* is a dict-like object
197 """
198 # TODO document that execution_context and platform_dependent are used
199 # to filter on query, not when setting a key
200 # also document the mapping API and UNKNOWN default key
201
202 def __init__(self, path=None, platform_dependent=False,
203 execution_context=None, fileobj=None, mapping=None):
204 self._fields = {}
205 self.requires_files = []
206 self.docutils_support = _HAS_DOCUTILS
207 self.platform_dependent = platform_dependent
208 self.execution_context = execution_context
209 if [path, fileobj, mapping].count(None) < 2:
210 raise TypeError('path, fileobj and mapping are exclusive')
211 if path is not None:
212 self.read(path)
213 elif fileobj is not None:
214 self.read_file(fileobj)
215 elif mapping is not None:
216 self.update(mapping)
217
218 def _set_best_version(self):
219 self._fields['Metadata-Version'] = _best_version(self._fields)
220
221 def _write_field(self, file, name, value):
222 file.write('%s: %s\n' % (name, value))
223
224 def __getitem__(self, name):
225 return self.get(name)
226
227 def __setitem__(self, name, value):
228 return self.set(name, value)
229
230 def __delitem__(self, name):
231 field_name = self._convert_name(name)
Éric Araujoacc09522011-11-14 19:45:30 +0100232 try:
233 del self._fields[field_name]
234 except KeyError:
235 raise KeyError(name)
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200236 self._set_best_version()
237
238 def __contains__(self, name):
239 return (name in self._fields or
240 self._convert_name(name) in self._fields)
241
242 def _convert_name(self, name):
243 if name in _ALL_FIELDS:
244 return name
245 name = name.replace('-', '_').lower()
246 return _ATTR2FIELD.get(name, name)
247
248 def _default_value(self, name):
249 if name in _LISTFIELDS or name in _ELEMENTSFIELD:
250 return []
251 return 'UNKNOWN'
252
253 def _check_rst_data(self, data):
254 """Return warnings when the provided data has syntax errors."""
255 source_path = StringIO()
256 parser = Parser()
257 settings = frontend.OptionParser().get_default_values()
258 settings.tab_width = 4
259 settings.pep_references = None
260 settings.rfc_references = None
261 reporter = SilentReporter(source_path,
262 settings.report_level,
263 settings.halt_level,
264 stream=settings.warning_stream,
265 debug=settings.debug,
266 encoding=settings.error_encoding,
267 error_handler=settings.error_encoding_error_handler)
268
269 document = nodes.document(settings, reporter, source=source_path)
270 document.note_source(source_path, -1)
271 try:
272 parser.parse(data, document)
273 except AttributeError:
274 reporter.messages.append((-1, 'Could not finish the parsing.',
275 '', {}))
276
277 return reporter.messages
278
279 def _platform(self, value):
280 if not self.platform_dependent or ';' not in value:
281 return True, value
282 value, marker = value.split(';')
283 return interpret(marker, self.execution_context), value
284
285 def _remove_line_prefix(self, value):
286 return _LINE_PREFIX.sub('\n', value)
287
288 #
289 # Public API
290 #
Jeremy Klothaa2b4422011-09-12 11:12:42 -0600291 def get_fullname(self, filesafe=False):
292 """Return the distribution name with version.
293
294 If filesafe is true, return a filename-escaped form."""
295 name, version = self['Name'], self['Version']
296 if filesafe:
297 # For both name and version any runs of non-alphanumeric or '.'
298 # characters are replaced with a single '-'. Additionally any
299 # spaces in the version string become '.'
300 name = _FILESAFE.sub('-', name)
301 version = _FILESAFE.sub('-', version.replace(' ', '.'))
302 return '%s-%s' % (name, version)
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200303
304 def is_metadata_field(self, name):
305 """return True if name is a valid metadata key"""
306 name = self._convert_name(name)
307 return name in _ALL_FIELDS
308
309 def is_multi_field(self, name):
310 name = self._convert_name(name)
311 return name in _LISTFIELDS
312
313 def read(self, filepath):
314 """Read the metadata values from a file path."""
Victor Stinnerc3364522011-05-19 18:49:56 +0200315 with open(filepath, 'r', encoding='utf-8') as fp:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200316 self.read_file(fp)
317
318 def read_file(self, fileob):
319 """Read the metadata values from a file object."""
320 msg = message_from_file(fileob)
321 self._fields['Metadata-Version'] = msg['metadata-version']
322
323 for field in _version2fieldlist(self['Metadata-Version']):
324 if field in _LISTFIELDS:
325 # we can have multiple lines
326 values = msg.get_all(field)
327 if field in _LISTTUPLEFIELDS and values is not None:
328 values = [tuple(value.split(',')) for value in values]
329 self.set(field, values)
330 else:
331 # single line
332 value = msg[field]
333 if value is not None and value != 'UNKNOWN':
334 self.set(field, value)
335
336 def write(self, filepath):
337 """Write the metadata fields to filepath."""
Victor Stinnerc3364522011-05-19 18:49:56 +0200338 with open(filepath, 'w', encoding='utf-8') as fp:
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200339 self.write_file(fp)
340
341 def write_file(self, fileobject):
342 """Write the PKG-INFO format data to a file object."""
343 self._set_best_version()
344 for field in _version2fieldlist(self['Metadata-Version']):
345 values = self.get(field)
346 if field in _ELEMENTSFIELD:
347 self._write_field(fileobject, field, ','.join(values))
348 continue
349 if field not in _LISTFIELDS:
350 if field == 'Description':
351 values = values.replace('\n', '\n |')
352 values = [values]
353
354 if field in _LISTTUPLEFIELDS:
355 values = [','.join(value) for value in values]
356
357 for value in values:
358 self._write_field(fileobject, field, value)
359
360 def update(self, other=None, **kwargs):
361 """Set metadata values from the given iterable `other` and kwargs.
362
363 Behavior is like `dict.update`: If `other` has a ``keys`` method,
364 they are looped over and ``self[key]`` is assigned ``other[key]``.
365 Else, ``other`` is an iterable of ``(key, value)`` iterables.
366
367 Keys that don't match a metadata field or that have an empty value are
368 dropped.
369 """
Éric Araujo6bbd7752011-09-10 05:18:20 +0200370 # XXX the code should just use self.set, which does tbe same checks and
371 # conversions already, but that would break packaging.pypi: it uses the
372 # update method, which does not call _set_best_version (which set
373 # does), and thus allows having a Metadata object (as long as you don't
374 # modify or write it) with extra fields from PyPI that are not fields
375 # defined in Metadata PEPs. to solve it, the best_version system
376 # should be reworked so that it's called only for writing, or in a new
377 # strict mode, or with a new, more lax Metadata subclass in p7g.pypi
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200378 def _set(key, value):
379 if key in _ATTR2FIELD and value:
380 self.set(self._convert_name(key), value)
381
Éric Araujo6bbd7752011-09-10 05:18:20 +0200382 if not other:
383 # other is None or empty container
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200384 pass
385 elif hasattr(other, 'keys'):
386 for k in other.keys():
387 _set(k, other[k])
388 else:
389 for k, v in other:
390 _set(k, v)
391
392 if kwargs:
Éric Araujo6bbd7752011-09-10 05:18:20 +0200393 for k, v in kwargs.items():
394 _set(k, v)
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200395
396 def set(self, name, value):
397 """Control then set a metadata field."""
398 name = self._convert_name(name)
399
400 if ((name in _ELEMENTSFIELD or name == 'Platform') and
401 not isinstance(value, (list, tuple))):
402 if isinstance(value, str):
403 value = [v.strip() for v in value.split(',')]
404 else:
405 value = []
406 elif (name in _LISTFIELDS and
407 not isinstance(value, (list, tuple))):
408 if isinstance(value, str):
409 value = [value]
410 else:
411 value = []
412
413 if logger.isEnabledFor(logging.WARNING):
Tarek Ziadeb9c09872011-05-30 12:25:38 +0200414 project_name = self['Name']
415
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200416 if name in _PREDICATE_FIELDS and value is not None:
417 for v in value:
418 # check that the values are valid predicates
419 if not is_valid_predicate(v.split(';')[0]):
420 logger.warning(
Tarek Ziadeb9c09872011-05-30 12:25:38 +0200421 '%r: %r is not a valid predicate (field %r)',
422 project_name, v, name)
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200423 # FIXME this rejects UNKNOWN, is that right?
424 elif name in _VERSIONS_FIELDS and value is not None:
425 if not is_valid_versions(value):
Tarek Ziadeb9c09872011-05-30 12:25:38 +0200426 logger.warning('%r: %r is not a valid version (field %r)',
427 project_name, value, name)
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200428 elif name in _VERSION_FIELDS and value is not None:
429 if not is_valid_version(value):
Tarek Ziadeb9c09872011-05-30 12:25:38 +0200430 logger.warning('%r: %r is not a valid version (field %r)',
431 project_name, value, name)
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200432
433 if name in _UNICODEFIELDS:
434 if name == 'Description':
435 value = self._remove_line_prefix(value)
436
437 self._fields[name] = value
438 self._set_best_version()
439
440 def get(self, name, default=_MISSING):
441 """Get a metadata field."""
442 name = self._convert_name(name)
443 if name not in self._fields:
444 if default is _MISSING:
445 default = self._default_value(name)
446 return default
447 if name in _UNICODEFIELDS:
448 value = self._fields[name]
449 return value
450 elif name in _LISTFIELDS:
451 value = self._fields[name]
452 if value is None:
453 return []
454 res = []
455 for val in value:
456 valid, val = self._platform(val)
457 if not valid:
458 continue
459 if name not in _LISTTUPLEFIELDS:
460 res.append(val)
461 else:
462 # That's for Project-URL
463 res.append((val[0], val[1]))
464 return res
465
466 elif name in _ELEMENTSFIELD:
467 valid, value = self._platform(self._fields[name])
468 if not valid:
469 return []
470 if isinstance(value, str):
471 return value.split(',')
472 valid, value = self._platform(self._fields[name])
473 if not valid:
474 return None
475 return value
476
477 def check(self, strict=False, restructuredtext=False):
478 """Check if the metadata is compliant. If strict is False then raise if
479 no Name or Version are provided"""
480 # XXX should check the versions (if the file was loaded)
481 missing, warnings = [], []
482
483 for attr in ('Name', 'Version'): # required by PEP 345
484 if attr not in self:
485 missing.append(attr)
486
487 if strict and missing != []:
488 msg = 'missing required metadata: %s' % ', '.join(missing)
489 raise MetadataMissingError(msg)
490
491 for attr in ('Home-page', 'Author'):
492 if attr not in self:
493 missing.append(attr)
494
495 if _HAS_DOCUTILS and restructuredtext:
496 warnings.extend(self._check_rst_data(self['Description']))
497
498 # checking metadata 1.2 (XXX needs to check 1.1, 1.0)
499 if self['Metadata-Version'] != '1.2':
500 return missing, warnings
501
502 def is_valid_predicates(value):
503 for v in value:
504 if not is_valid_predicate(v.split(';')[0]):
505 return False
506 return True
507
508 for fields, controller in ((_PREDICATE_FIELDS, is_valid_predicates),
509 (_VERSIONS_FIELDS, is_valid_versions),
510 (_VERSION_FIELDS, is_valid_version)):
511 for field in fields:
512 value = self.get(field, None)
513 if value is not None and not controller(value):
514 warnings.append('Wrong value for %r: %s' % (field, value))
515
516 return missing, warnings
517
518 def todict(self):
519 """Return fields as a dict.
520
521 Field names will be converted to use the underscore-lowercase style
522 instead of hyphen-mixed case (i.e. home_page instead of Home-page).
523 """
524 data = {
525 'metadata_version': self['Metadata-Version'],
526 'name': self['Name'],
527 'version': self['Version'],
528 'summary': self['Summary'],
529 'home_page': self['Home-page'],
530 'author': self['Author'],
531 'author_email': self['Author-email'],
532 'license': self['License'],
533 'description': self['Description'],
534 'keywords': self['Keywords'],
535 'platform': self['Platform'],
536 'classifier': self['Classifier'],
537 'download_url': self['Download-URL'],
538 }
539
540 if self['Metadata-Version'] == '1.2':
541 data['requires_dist'] = self['Requires-Dist']
542 data['requires_python'] = self['Requires-Python']
543 data['requires_external'] = self['Requires-External']
544 data['provides_dist'] = self['Provides-Dist']
545 data['obsoletes_dist'] = self['Obsoletes-Dist']
546 data['project_url'] = [','.join(url) for url in
547 self['Project-URL']]
548
549 elif self['Metadata-Version'] == '1.1':
550 data['provides'] = self['Provides']
551 data['requires'] = self['Requires']
552 data['obsoletes'] = self['Obsoletes']
553
554 return data
555
556 # Mapping API
Éric Araujo505f0eb2011-09-19 15:12:23 +0200557 # XXX these methods should return views or sets in 3.x
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200558
559 def keys(self):
Éric Araujo505f0eb2011-09-19 15:12:23 +0200560 return list(_version2fieldlist(self['Metadata-Version']))
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200561
562 def __iter__(self):
563 for key in self.keys():
564 yield key
565
566 def values(self):
Éric Araujo505f0eb2011-09-19 15:12:23 +0200567 return [self[key] for key in self.keys()]
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200568
569 def items(self):
Éric Araujo505f0eb2011-09-19 15:12:23 +0200570 return [(key, self[key]) for key in self.keys()]