blob: ea0d359002fbcab33d7262e156ba578c852a9393 [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
Georg Brandl50767402008-11-22 08:31:09 +00005whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000019ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000021ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000022digits = '0123456789'
23hexdigits = digits + 'abcdef' + 'ABCDEF'
24octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000025punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000026printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000027
Barry Warsaw8bee7612004-08-25 02:22:30 +000028# Functions which aren't available as string methods.
29
30# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000031def capwords(s, sep=None):
32 """capwords(s, [sep]) -> string
33
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
36 join. Note that this replaces runs of whitespace characters by
37 a single space.
38
39 """
40 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
41
42
Georg Brandl7f13e6b2007-08-31 10:37:15 +000043# Construct a translation map for bytes.translate
Guido van Rossum98297ee2007-11-06 21:34:58 +000044def maketrans(frm: bytes, to: bytes) -> bytes:
Georg Brandl7f13e6b2007-08-31 10:37:15 +000045 """maketrans(frm, to) -> bytes
Barry Warsaw8bee7612004-08-25 02:22:30 +000046
Georg Brandl7f13e6b2007-08-31 10:37:15 +000047 Return a translation table (a bytes object of length 256)
48 suitable for use in bytes.translate where each byte in frm is
49 mapped to the byte at the same position in to.
50 The strings frm and to must be of the same length.
Barry Warsaw8bee7612004-08-25 02:22:30 +000051 """
Georg Brandl7f13e6b2007-08-31 10:37:15 +000052 if len(frm) != len(to):
Collin Winterce36ad82007-08-30 01:19:48 +000053 raise ValueError("maketrans arguments must have same length")
Georg Brandl7f13e6b2007-08-31 10:37:15 +000054 if not (isinstance(frm, bytes) and isinstance(to, bytes)):
55 raise TypeError("maketrans arguments must be bytes objects")
Guido van Rossum254348e2007-11-21 19:29:53 +000056 L = bytearray(range(256))
Georg Brandl7f13e6b2007-08-31 10:37:15 +000057 for i, c in enumerate(frm):
58 L[c] = to[i]
Guido van Rossum98297ee2007-11-06 21:34:58 +000059 return bytes(L)
Barry Warsaw8bee7612004-08-25 02:22:30 +000060
Raymond Hettinger57aef9c2004-12-07 07:55:07 +000061
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000062####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000063import re as _re
64
Barry Warsaw46b629c2004-09-13 14:35:04 +000065class _multimap:
66 """Helper class for combining multiple mappings.
67
68 Used by .{safe_,}substitute() to combine the mapping and keyword
69 arguments.
70 """
71 def __init__(self, primary, secondary):
72 self._primary = primary
73 self._secondary = secondary
74
75 def __getitem__(self, key):
76 try:
77 return self._primary[key]
78 except KeyError:
79 return self._secondary[key]
80
81
Barry Warsaw12827c12004-09-10 03:08:08 +000082class _TemplateMetaclass(type):
83 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000084 %(delim)s(?:
85 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
86 (?P<named>%(id)s) | # delimiter and a Python identifier
87 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
88 (?P<invalid>) # Other ill-formed delimiter exprs
89 )
Barry Warsaw12827c12004-09-10 03:08:08 +000090 """
91
92 def __init__(cls, name, bases, dct):
93 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
94 if 'pattern' in dct:
95 pattern = cls.pattern
96 else:
97 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +000098 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +000099 'id' : cls.idpattern,
100 }
101 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
102
103
Guido van Rossum52cc1d82007-03-18 15:41:51 +0000104class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +0000105 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +0000106
Barry Warsaw17cb6002004-09-18 00:06:34 +0000107 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +0000108 idpattern = r'[_a-z][_a-z0-9]*'
109
110 def __init__(self, template):
111 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +0000112
113 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +0000114
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000115 def _invalid(self, mo):
116 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +0000117 lines = self.template[:i].splitlines(True)
118 if not lines:
119 colno = 1
120 lineno = 1
121 else:
122 colno = i - len(''.join(lines[:-1]))
123 lineno = len(lines)
124 raise ValueError('Invalid placeholder in string: line %d, col %d' %
125 (lineno, colno))
126
Barry Warsawb6234a92004-09-13 15:25:15 +0000127 def substitute(self, *args, **kws):
128 if len(args) > 1:
129 raise TypeError('Too many positional arguments')
130 if not args:
131 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000132 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000133 mapping = _multimap(kws, args[0])
134 else:
135 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000136 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000137 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000138 # Check the most common path first.
139 named = mo.group('named') or mo.group('braced')
140 if named is not None:
141 val = mapping[named]
142 # We use this idiom instead of str() because the latter will
143 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000144 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000145 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000146 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000147 if mo.group('invalid') is not None:
148 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000149 raise ValueError('Unrecognized named group in pattern',
150 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000151 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000152
Barry Warsawb6234a92004-09-13 15:25:15 +0000153 def safe_substitute(self, *args, **kws):
154 if len(args) > 1:
155 raise TypeError('Too many positional arguments')
156 if not args:
157 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000158 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000159 mapping = _multimap(kws, args[0])
160 else:
161 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000162 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000163 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000164 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000165 if named is not None:
166 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000167 # We use this idiom instead of str() because the latter
168 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000169 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000170 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000171 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000172 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000173 if braced is not None:
174 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000175 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000176 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000177 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000178 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000179 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000180 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000181 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000182 raise ValueError('Unrecognized named group in pattern',
183 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000184 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000185
186
187
188########################################################################
189# the Formatter class
190# see PEP 3101 for details and purpose of this class
191
Benjamin Petersonf608c612008-11-16 18:33:53 +0000192# The hard parts are reused from the C implementation. They're exposed as "_"
193# prefixed methods of str and unicode.
Eric Smith8c663262007-08-25 02:26:07 +0000194
Eric Smithe226b552007-08-27 11:28:18 +0000195# The overall parser is implemented in str._formatter_parser.
196# The field name parser is implemented in str._formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000197
198class Formatter:
199 def format(self, format_string, *args, **kwargs):
200 return self.vformat(format_string, args, kwargs)
201
202 def vformat(self, format_string, args, kwargs):
Eric Smith3bcc42a2007-08-31 02:26:31 +0000203 used_args = set()
Eric Smith11529192007-09-04 23:04:22 +0000204 result = self._vformat(format_string, args, kwargs, used_args, 2)
205 self.check_unused_args(used_args, args, kwargs)
206 return result
207
208 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
209 if recursion_depth < 0:
210 raise ValueError('Max string recursion exceeded')
Eric Smith8c663262007-08-25 02:26:07 +0000211 result = []
Eric Smith9e7c8da2007-08-28 11:15:20 +0000212 for literal_text, field_name, format_spec, conversion in \
213 self.parse(format_string):
Eric Smith625cbf22007-08-29 03:22:59 +0000214
215 # output the literal text
216 if literal_text:
217 result.append(literal_text)
218
219 # if there's a field, output it
220 if field_name is not None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000221 # this is some markup, find the object and do
222 # the formatting
223
Eric Smith7ade6482007-08-26 22:27:13 +0000224 # given the field_name, find the object it references
Eric Smith3bcc42a2007-08-31 02:26:31 +0000225 # and the argument it came from
Eric Smith9d4ba392007-09-02 15:33:26 +0000226 obj, arg_used = self.get_field(field_name, args, kwargs)
Eric Smith3bcc42a2007-08-31 02:26:31 +0000227 used_args.add(arg_used)
Eric Smith7ade6482007-08-26 22:27:13 +0000228
229 # do any conversion on the resulting object
Eric Smith9e7c8da2007-08-28 11:15:20 +0000230 obj = self.convert_field(obj, conversion)
Eric Smith7ade6482007-08-26 22:27:13 +0000231
Eric Smith11529192007-09-04 23:04:22 +0000232 # expand the format spec, if needed
233 format_spec = self._vformat(format_spec, args, kwargs,
234 used_args, recursion_depth-1)
235
Eric Smith7ade6482007-08-26 22:27:13 +0000236 # format the object and append to the result
237 result.append(self.format_field(obj, format_spec))
Eric Smith625cbf22007-08-29 03:22:59 +0000238
Eric Smith8c663262007-08-25 02:26:07 +0000239 return ''.join(result)
240
Eric Smith9e7c8da2007-08-28 11:15:20 +0000241
Eric Smith8c663262007-08-25 02:26:07 +0000242 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000243 if isinstance(key, int):
244 return args[key]
245 else:
246 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000247
Eric Smith9e7c8da2007-08-28 11:15:20 +0000248
Eric Smith8c663262007-08-25 02:26:07 +0000249 def check_unused_args(self, used_args, args, kwargs):
250 pass
251
Eric Smith9e7c8da2007-08-28 11:15:20 +0000252
Eric Smith8c663262007-08-25 02:26:07 +0000253 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000254 return format(value, format_spec)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000255
256
257 def convert_field(self, value, conversion):
258 # do any conversion on the resulting object
259 if conversion == 'r':
260 return repr(value)
261 elif conversion == 's':
262 return str(value)
Eric Smith11529192007-09-04 23:04:22 +0000263 elif conversion is None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000264 return value
Eric Smith11529192007-09-04 23:04:22 +0000265 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
Eric Smith9e7c8da2007-08-28 11:15:20 +0000266
267
268 # returns an iterable that contains tuples of the form:
269 # (literal_text, field_name, format_spec, conversion)
Eric Smith625cbf22007-08-29 03:22:59 +0000270 # literal_text can be zero length
271 # field_name can be None, in which case there's no
272 # object to format and output
273 # if field_name is not None, it is looked up, formatted
274 # with format_spec and conversion and then used
Eric Smith9e7c8da2007-08-28 11:15:20 +0000275 def parse(self, format_string):
276 return format_string._formatter_parser()
277
278
279 # given a field_name, find the object it references.
280 # field_name: the field being looked up, e.g. "0.name"
281 # or "lookup[3]"
282 # used_args: a set of which args have been used
283 # args, kwargs: as passed in to vformat
Eric Smith9d4ba392007-09-02 15:33:26 +0000284 def get_field(self, field_name, args, kwargs):
Eric Smith9e7c8da2007-08-28 11:15:20 +0000285 first, rest = field_name._formatter_field_name_split()
286
Eric Smith9e7c8da2007-08-28 11:15:20 +0000287 obj = self.get_value(first, args, kwargs)
288
289 # loop through the rest of the field_name, doing
290 # getattr or getitem as needed
291 for is_attr, i in rest:
292 if is_attr:
293 obj = getattr(obj, i)
294 else:
295 obj = obj[i]
296
Eric Smith3bcc42a2007-08-31 02:26:31 +0000297 return obj, first