blob: e071a2d1b23edea366504078091da5b410c8c434 [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
Georg Brandl50767402008-11-22 08:31:09 +00005whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000019ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000021ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000022digits = '0123456789'
23hexdigits = digits + 'abcdef' + 'ABCDEF'
24octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000025punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000026printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000027
Barry Warsaw8bee7612004-08-25 02:22:30 +000028# Functions which aren't available as string methods.
29
30# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000031def capwords(s, sep=None):
32 """capwords(s, [sep]) -> string
33
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
36 join. Note that this replaces runs of whitespace characters by
37 a single space.
38
39 """
40 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
41
42
Georg Brandl7f13e6b2007-08-31 10:37:15 +000043# Construct a translation map for bytes.translate
Guido van Rossum98297ee2007-11-06 21:34:58 +000044def maketrans(frm: bytes, to: bytes) -> bytes:
Georg Brandl7f13e6b2007-08-31 10:37:15 +000045 """maketrans(frm, to) -> bytes
Barry Warsaw8bee7612004-08-25 02:22:30 +000046
Georg Brandl7f13e6b2007-08-31 10:37:15 +000047 Return a translation table (a bytes object of length 256)
48 suitable for use in bytes.translate where each byte in frm is
49 mapped to the byte at the same position in to.
50 The strings frm and to must be of the same length.
Barry Warsaw8bee7612004-08-25 02:22:30 +000051 """
Georg Brandlabc38772009-04-12 15:51:51 +000052 import warnings
53 warnings.warn("string.maketrans is deprecated, use bytes.maketrans instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +000054 DeprecationWarning, 2)
Georg Brandl7f13e6b2007-08-31 10:37:15 +000055 if len(frm) != len(to):
Collin Winterce36ad82007-08-30 01:19:48 +000056 raise ValueError("maketrans arguments must have same length")
Georg Brandl7f13e6b2007-08-31 10:37:15 +000057 if not (isinstance(frm, bytes) and isinstance(to, bytes)):
58 raise TypeError("maketrans arguments must be bytes objects")
Guido van Rossum254348e2007-11-21 19:29:53 +000059 L = bytearray(range(256))
Georg Brandl7f13e6b2007-08-31 10:37:15 +000060 for i, c in enumerate(frm):
61 L[c] = to[i]
Guido van Rossum98297ee2007-11-06 21:34:58 +000062 return bytes(L)
Barry Warsaw8bee7612004-08-25 02:22:30 +000063
Raymond Hettinger57aef9c2004-12-07 07:55:07 +000064
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000065####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000066import re as _re
67
Barry Warsaw46b629c2004-09-13 14:35:04 +000068class _multimap:
69 """Helper class for combining multiple mappings.
70
71 Used by .{safe_,}substitute() to combine the mapping and keyword
72 arguments.
73 """
74 def __init__(self, primary, secondary):
75 self._primary = primary
76 self._secondary = secondary
77
78 def __getitem__(self, key):
79 try:
80 return self._primary[key]
81 except KeyError:
82 return self._secondary[key]
83
84
Barry Warsaw12827c12004-09-10 03:08:08 +000085class _TemplateMetaclass(type):
86 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000087 %(delim)s(?:
88 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
89 (?P<named>%(id)s) | # delimiter and a Python identifier
90 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
91 (?P<invalid>) # Other ill-formed delimiter exprs
92 )
Barry Warsaw12827c12004-09-10 03:08:08 +000093 """
94
95 def __init__(cls, name, bases, dct):
96 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
97 if 'pattern' in dct:
98 pattern = cls.pattern
99 else:
100 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +0000101 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +0000102 'id' : cls.idpattern,
103 }
104 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
105
106
Guido van Rossum52cc1d82007-03-18 15:41:51 +0000107class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +0000108 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +0000109
Barry Warsaw17cb6002004-09-18 00:06:34 +0000110 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +0000111 idpattern = r'[_a-z][_a-z0-9]*'
112
113 def __init__(self, template):
114 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +0000115
116 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +0000117
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000118 def _invalid(self, mo):
119 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +0000120 lines = self.template[:i].splitlines(True)
121 if not lines:
122 colno = 1
123 lineno = 1
124 else:
125 colno = i - len(''.join(lines[:-1]))
126 lineno = len(lines)
127 raise ValueError('Invalid placeholder in string: line %d, col %d' %
128 (lineno, colno))
129
Barry Warsawb6234a92004-09-13 15:25:15 +0000130 def substitute(self, *args, **kws):
131 if len(args) > 1:
132 raise TypeError('Too many positional arguments')
133 if not args:
134 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000135 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000136 mapping = _multimap(kws, args[0])
137 else:
138 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000139 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000140 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000141 # Check the most common path first.
142 named = mo.group('named') or mo.group('braced')
143 if named is not None:
144 val = mapping[named]
145 # We use this idiom instead of str() because the latter will
146 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000147 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000148 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000149 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000150 if mo.group('invalid') is not None:
151 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000152 raise ValueError('Unrecognized named group in pattern',
153 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000154 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000155
Barry Warsawb6234a92004-09-13 15:25:15 +0000156 def safe_substitute(self, *args, **kws):
157 if len(args) > 1:
158 raise TypeError('Too many positional arguments')
159 if not args:
160 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000161 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000162 mapping = _multimap(kws, args[0])
163 else:
164 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000165 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000166 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000167 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000168 if named is not None:
169 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000170 # We use this idiom instead of str() because the latter
171 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000172 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000173 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000174 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000175 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000176 if braced is not None:
177 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000178 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000179 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000180 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000181 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000182 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000183 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000184 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000185 raise ValueError('Unrecognized named group in pattern',
186 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000187 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000188
189
190
191########################################################################
192# the Formatter class
193# see PEP 3101 for details and purpose of this class
194
Benjamin Petersonf608c612008-11-16 18:33:53 +0000195# The hard parts are reused from the C implementation. They're exposed as "_"
196# prefixed methods of str and unicode.
Eric Smith8c663262007-08-25 02:26:07 +0000197
Eric Smithe226b552007-08-27 11:28:18 +0000198# The overall parser is implemented in str._formatter_parser.
199# The field name parser is implemented in str._formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000200
201class Formatter:
202 def format(self, format_string, *args, **kwargs):
203 return self.vformat(format_string, args, kwargs)
204
205 def vformat(self, format_string, args, kwargs):
Eric Smith3bcc42a2007-08-31 02:26:31 +0000206 used_args = set()
Eric Smith11529192007-09-04 23:04:22 +0000207 result = self._vformat(format_string, args, kwargs, used_args, 2)
208 self.check_unused_args(used_args, args, kwargs)
209 return result
210
211 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
212 if recursion_depth < 0:
213 raise ValueError('Max string recursion exceeded')
Eric Smith8c663262007-08-25 02:26:07 +0000214 result = []
Eric Smith9e7c8da2007-08-28 11:15:20 +0000215 for literal_text, field_name, format_spec, conversion in \
216 self.parse(format_string):
Eric Smith625cbf22007-08-29 03:22:59 +0000217
218 # output the literal text
219 if literal_text:
220 result.append(literal_text)
221
222 # if there's a field, output it
223 if field_name is not None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000224 # this is some markup, find the object and do
225 # the formatting
226
Eric Smith7ade6482007-08-26 22:27:13 +0000227 # given the field_name, find the object it references
Eric Smith3bcc42a2007-08-31 02:26:31 +0000228 # and the argument it came from
Eric Smith9d4ba392007-09-02 15:33:26 +0000229 obj, arg_used = self.get_field(field_name, args, kwargs)
Eric Smith3bcc42a2007-08-31 02:26:31 +0000230 used_args.add(arg_used)
Eric Smith7ade6482007-08-26 22:27:13 +0000231
232 # do any conversion on the resulting object
Eric Smith9e7c8da2007-08-28 11:15:20 +0000233 obj = self.convert_field(obj, conversion)
Eric Smith7ade6482007-08-26 22:27:13 +0000234
Eric Smith11529192007-09-04 23:04:22 +0000235 # expand the format spec, if needed
236 format_spec = self._vformat(format_spec, args, kwargs,
237 used_args, recursion_depth-1)
238
Eric Smith7ade6482007-08-26 22:27:13 +0000239 # format the object and append to the result
240 result.append(self.format_field(obj, format_spec))
Eric Smith625cbf22007-08-29 03:22:59 +0000241
Eric Smith8c663262007-08-25 02:26:07 +0000242 return ''.join(result)
243
Eric Smith9e7c8da2007-08-28 11:15:20 +0000244
Eric Smith8c663262007-08-25 02:26:07 +0000245 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000246 if isinstance(key, int):
247 return args[key]
248 else:
249 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000250
Eric Smith9e7c8da2007-08-28 11:15:20 +0000251
Eric Smith8c663262007-08-25 02:26:07 +0000252 def check_unused_args(self, used_args, args, kwargs):
253 pass
254
Eric Smith9e7c8da2007-08-28 11:15:20 +0000255
Eric Smith8c663262007-08-25 02:26:07 +0000256 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000257 return format(value, format_spec)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000258
259
260 def convert_field(self, value, conversion):
261 # do any conversion on the resulting object
262 if conversion == 'r':
263 return repr(value)
264 elif conversion == 's':
265 return str(value)
Eric Smith11529192007-09-04 23:04:22 +0000266 elif conversion is None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000267 return value
Eric Smith11529192007-09-04 23:04:22 +0000268 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
Eric Smith9e7c8da2007-08-28 11:15:20 +0000269
270
271 # returns an iterable that contains tuples of the form:
272 # (literal_text, field_name, format_spec, conversion)
Eric Smith625cbf22007-08-29 03:22:59 +0000273 # literal_text can be zero length
274 # field_name can be None, in which case there's no
275 # object to format and output
276 # if field_name is not None, it is looked up, formatted
277 # with format_spec and conversion and then used
Eric Smith9e7c8da2007-08-28 11:15:20 +0000278 def parse(self, format_string):
279 return format_string._formatter_parser()
280
281
282 # given a field_name, find the object it references.
283 # field_name: the field being looked up, e.g. "0.name"
284 # or "lookup[3]"
285 # used_args: a set of which args have been used
286 # args, kwargs: as passed in to vformat
Eric Smith9d4ba392007-09-02 15:33:26 +0000287 def get_field(self, field_name, args, kwargs):
Eric Smith9e7c8da2007-08-28 11:15:20 +0000288 first, rest = field_name._formatter_field_name_split()
289
Eric Smith9e7c8da2007-08-28 11:15:20 +0000290 obj = self.get_value(first, args, kwargs)
291
292 # loop through the rest of the field_name, doing
293 # getattr or getitem as needed
294 for is_attr, i in rest:
295 if is_attr:
296 obj = getattr(obj, i)
297 else:
298 obj = obj[i]
299
Eric Smith3bcc42a2007-08-31 02:26:31 +0000300 return obj, first