blob: 489777b10c25df7ea1c53444f0df800022af56b4 [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
Georg Brandl50767402008-11-22 08:31:09 +00005whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Zachary Warec17a0b82016-06-04 14:35:05 -050017__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords",
18 "digits", "hexdigits", "octdigits", "printable", "punctuation",
19 "whitespace", "Formatter", "Template"]
20
Georg Brandl66c221e2010-10-14 07:04:07 +000021import _string
22
Guido van Rossumc6360141990-10-13 19:23:40 +000023# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000024whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000025ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
26ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000027ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000028digits = '0123456789'
29hexdigits = digits + 'abcdef' + 'ABCDEF'
30octdigits = '01234567'
R David Murray44b548d2016-09-08 13:59:53 -040031punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000032printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000033
Barry Warsaw8bee7612004-08-25 02:22:30 +000034# Functions which aren't available as string methods.
35
36# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000037def capwords(s, sep=None):
Ezio Melottia40bdda2009-09-26 12:33:22 +000038 """capwords(s [,sep]) -> string
Barry Warsaw8bee7612004-08-25 02:22:30 +000039
40 Split the argument into words using split, capitalize each
41 word using capitalize, and join the capitalized words using
Ezio Melottia40bdda2009-09-26 12:33:22 +000042 join. If the optional second argument sep is absent or None,
43 runs of whitespace characters are replaced by a single space
44 and leading and trailing whitespace are removed, otherwise
45 sep is used to split and join the words.
Barry Warsaw8bee7612004-08-25 02:22:30 +000046
47 """
Ezio Melottia40bdda2009-09-26 12:33:22 +000048 return (sep or ' ').join(x.capitalize() for x in s.split(sep))
Barry Warsaw8bee7612004-08-25 02:22:30 +000049
50
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000051####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000052import re as _re
Zachary Warec17a0b82016-06-04 14:35:05 -050053from collections import ChainMap as _ChainMap
Barry Warsaw46b629c2004-09-13 14:35:04 +000054
Serhiy Storchaka2085bd02019-06-01 11:00:15 +030055_sentinel_dict = {}
56
Serhiy Storchaka919f0bc2019-10-21 09:36:21 +030057class Template:
Barry Warsaw8bee7612004-08-25 02:22:30 +000058 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +000059
Barry Warsaw17cb6002004-09-18 00:06:34 +000060 delimiter = '$'
Barry Warsawe256b402017-11-21 10:28:13 -050061 # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
62 # without the ASCII flag. We can't add re.ASCII to flags because of
63 # backward compatibility. So we use the ?a local flag and [a-z] pattern.
INADA Naokib22273e2017-10-13 16:02:23 +090064 # See https://bugs.python.org/issue31672
Serhiy Storchaka87be28f2018-01-04 19:20:11 +020065 idpattern = r'(?a:[_a-z][_a-z0-9]*)'
Barry Warsawba427962017-09-04 16:32:10 -040066 braceidpattern = None
Georg Brandl056cb932010-07-29 17:16:10 +000067 flags = _re.IGNORECASE
Barry Warsaw12827c12004-09-10 03:08:08 +000068
Serhiy Storchaka919f0bc2019-10-21 09:36:21 +030069 def __init_subclass__(cls):
70 super().__init_subclass__()
71 if 'pattern' in cls.__dict__:
72 pattern = cls.pattern
73 else:
74 delim = _re.escape(cls.delimiter)
75 id = cls.idpattern
76 bid = cls.braceidpattern or cls.idpattern
77 pattern = fr"""
78 {delim}(?:
79 (?P<escaped>{delim}) | # Escape sequence of two delimiters
80 (?P<named>{id}) | # delimiter and a Python identifier
81 {{(?P<braced>{bid})}} | # delimiter and a braced identifier
82 (?P<invalid>) # Other ill-formed delimiter exprs
83 )
84 """
85 cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
86
Barry Warsaw12827c12004-09-10 03:08:08 +000087 def __init__(self, template):
88 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +000089
90 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +000091
Barry Warsawb5c6b5b2004-09-13 20:52:50 +000092 def _invalid(self, mo):
93 i = mo.start('invalid')
Ezio Melottid8b509b2011-09-28 17:37:55 +030094 lines = self.template[:i].splitlines(keepends=True)
Barry Warsaw12827c12004-09-10 03:08:08 +000095 if not lines:
96 colno = 1
97 lineno = 1
98 else:
99 colno = i - len(''.join(lines[:-1]))
100 lineno = len(lines)
101 raise ValueError('Invalid placeholder in string: line %d, col %d' %
102 (lineno, colno))
103
Serhiy Storchaka2085bd02019-06-01 11:00:15 +0300104 def substitute(self, mapping=_sentinel_dict, /, **kws):
105 if mapping is _sentinel_dict:
Barry Warsawb6234a92004-09-13 15:25:15 +0000106 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000107 elif kws:
Serhiy Storchaka2085bd02019-06-01 11:00:15 +0300108 mapping = _ChainMap(kws, mapping)
Barry Warsaw46b629c2004-09-13 14:35:04 +0000109 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000110 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000111 # Check the most common path first.
112 named = mo.group('named') or mo.group('braced')
113 if named is not None:
Serhiy Storchaka6e6883f2015-05-28 20:45:29 +0300114 return str(mapping[named])
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000115 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000116 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000117 if mo.group('invalid') is not None:
118 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000119 raise ValueError('Unrecognized named group in pattern',
120 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000121 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000122
Serhiy Storchaka2085bd02019-06-01 11:00:15 +0300123 def safe_substitute(self, mapping=_sentinel_dict, /, **kws):
124 if mapping is _sentinel_dict:
Barry Warsawb6234a92004-09-13 15:25:15 +0000125 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000126 elif kws:
Serhiy Storchaka2085bd02019-06-01 11:00:15 +0300127 mapping = _ChainMap(kws, mapping)
Barry Warsaw46b629c2004-09-13 14:35:04 +0000128 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000129 def convert(mo):
Florent Xiclunaeb19dce2010-09-18 23:34:07 +0000130 named = mo.group('named') or mo.group('braced')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000131 if named is not None:
132 try:
Serhiy Storchaka6e6883f2015-05-28 20:45:29 +0300133 return str(mapping[named])
Barry Warsaw8bee7612004-08-25 02:22:30 +0000134 except KeyError:
Florent Xiclunaeb19dce2010-09-18 23:34:07 +0000135 return mo.group()
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000136 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000137 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000138 if mo.group('invalid') is not None:
Florent Xiclunaeb19dce2010-09-18 23:34:07 +0000139 return mo.group()
Neal Norwitz6627a962004-10-17 16:27:18 +0000140 raise ValueError('Unrecognized named group in pattern',
141 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000142 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000143
Serhiy Storchaka919f0bc2019-10-21 09:36:21 +0300144# Initialize Template.pattern. __init_subclass__() is automatically called
145# only for subclasses, not for the Template class itself.
146Template.__init_subclass__()
Eric Smith8c663262007-08-25 02:26:07 +0000147
148
149########################################################################
150# the Formatter class
151# see PEP 3101 for details and purpose of this class
152
Benjamin Petersonf608c612008-11-16 18:33:53 +0000153# The hard parts are reused from the C implementation. They're exposed as "_"
Florent Xicluna7b2a7712010-09-06 20:27:55 +0000154# prefixed methods of str.
Eric Smith8c663262007-08-25 02:26:07 +0000155
Georg Brandl66c221e2010-10-14 07:04:07 +0000156# The overall parser is implemented in _string.formatter_parser.
157# The field name parser is implemented in _string.formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000158
159class Formatter:
Serhiy Storchaka2085bd02019-06-01 11:00:15 +0300160 def format(self, format_string, /, *args, **kwargs):
Eric Smith8c663262007-08-25 02:26:07 +0000161 return self.vformat(format_string, args, kwargs)
162
163 def vformat(self, format_string, args, kwargs):
Eric Smith3bcc42a2007-08-31 02:26:31 +0000164 used_args = set()
Eric V. Smith85976b12015-09-29 10:27:38 -0400165 result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
Eric Smith11529192007-09-04 23:04:22 +0000166 self.check_unused_args(used_args, args, kwargs)
167 return result
168
Eric V. Smith7ce90742014-04-14 16:43:50 -0400169 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
170 auto_arg_index=0):
Eric Smith11529192007-09-04 23:04:22 +0000171 if recursion_depth < 0:
172 raise ValueError('Max string recursion exceeded')
Eric Smith8c663262007-08-25 02:26:07 +0000173 result = []
Eric Smith9e7c8da2007-08-28 11:15:20 +0000174 for literal_text, field_name, format_spec, conversion in \
175 self.parse(format_string):
Eric Smith625cbf22007-08-29 03:22:59 +0000176
177 # output the literal text
178 if literal_text:
179 result.append(literal_text)
180
181 # if there's a field, output it
182 if field_name is not None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000183 # this is some markup, find the object and do
184 # the formatting
185
Eric V. Smith7ce90742014-04-14 16:43:50 -0400186 # handle arg indexing when empty field_names are given.
187 if field_name == '':
188 if auto_arg_index is False:
189 raise ValueError('cannot switch from manual field '
190 'specification to automatic field '
191 'numbering')
192 field_name = str(auto_arg_index)
193 auto_arg_index += 1
194 elif field_name.isdigit():
195 if auto_arg_index:
196 raise ValueError('cannot switch from manual field '
197 'specification to automatic field '
198 'numbering')
199 # disable auto arg incrementing, if it gets
200 # used later on, then an exception will be raised
201 auto_arg_index = False
202
Eric Smith7ade6482007-08-26 22:27:13 +0000203 # given the field_name, find the object it references
Eric Smith3bcc42a2007-08-31 02:26:31 +0000204 # and the argument it came from
Eric Smith9d4ba392007-09-02 15:33:26 +0000205 obj, arg_used = self.get_field(field_name, args, kwargs)
Eric Smith3bcc42a2007-08-31 02:26:31 +0000206 used_args.add(arg_used)
Eric Smith7ade6482007-08-26 22:27:13 +0000207
208 # do any conversion on the resulting object
Eric Smith9e7c8da2007-08-28 11:15:20 +0000209 obj = self.convert_field(obj, conversion)
Eric Smith7ade6482007-08-26 22:27:13 +0000210
Eric Smith11529192007-09-04 23:04:22 +0000211 # expand the format spec, if needed
Eric V. Smith85976b12015-09-29 10:27:38 -0400212 format_spec, auto_arg_index = self._vformat(
213 format_spec, args, kwargs,
214 used_args, recursion_depth-1,
215 auto_arg_index=auto_arg_index)
Eric Smith11529192007-09-04 23:04:22 +0000216
Eric Smith7ade6482007-08-26 22:27:13 +0000217 # format the object and append to the result
218 result.append(self.format_field(obj, format_spec))
Eric Smith625cbf22007-08-29 03:22:59 +0000219
Eric V. Smith85976b12015-09-29 10:27:38 -0400220 return ''.join(result), auto_arg_index
Eric Smith8c663262007-08-25 02:26:07 +0000221
Eric Smith9e7c8da2007-08-28 11:15:20 +0000222
Eric Smith8c663262007-08-25 02:26:07 +0000223 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000224 if isinstance(key, int):
225 return args[key]
226 else:
227 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000228
Eric Smith9e7c8da2007-08-28 11:15:20 +0000229
Eric Smith8c663262007-08-25 02:26:07 +0000230 def check_unused_args(self, used_args, args, kwargs):
231 pass
232
Eric Smith9e7c8da2007-08-28 11:15:20 +0000233
Eric Smith8c663262007-08-25 02:26:07 +0000234 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000235 return format(value, format_spec)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000236
237
238 def convert_field(self, value, conversion):
239 # do any conversion on the resulting object
R David Murraye56bf972012-08-19 17:26:34 -0400240 if conversion is None:
241 return value
Eric Smith9e7c8da2007-08-28 11:15:20 +0000242 elif conversion == 's':
243 return str(value)
R David Murraye56bf972012-08-19 17:26:34 -0400244 elif conversion == 'r':
245 return repr(value)
246 elif conversion == 'a':
247 return ascii(value)
Florent Xicluna7b2a7712010-09-06 20:27:55 +0000248 raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
Eric Smith9e7c8da2007-08-28 11:15:20 +0000249
250
251 # returns an iterable that contains tuples of the form:
252 # (literal_text, field_name, format_spec, conversion)
Eric Smith625cbf22007-08-29 03:22:59 +0000253 # literal_text can be zero length
254 # field_name can be None, in which case there's no
255 # object to format and output
256 # if field_name is not None, it is looked up, formatted
257 # with format_spec and conversion and then used
Eric Smith9e7c8da2007-08-28 11:15:20 +0000258 def parse(self, format_string):
Georg Brandl66c221e2010-10-14 07:04:07 +0000259 return _string.formatter_parser(format_string)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000260
261
262 # given a field_name, find the object it references.
263 # field_name: the field being looked up, e.g. "0.name"
264 # or "lookup[3]"
265 # used_args: a set of which args have been used
266 # args, kwargs: as passed in to vformat
Eric Smith9d4ba392007-09-02 15:33:26 +0000267 def get_field(self, field_name, args, kwargs):
Georg Brandl66c221e2010-10-14 07:04:07 +0000268 first, rest = _string.formatter_field_name_split(field_name)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000269
Eric Smith9e7c8da2007-08-28 11:15:20 +0000270 obj = self.get_value(first, args, kwargs)
271
272 # loop through the rest of the field_name, doing
273 # getattr or getitem as needed
274 for is_attr, i in rest:
275 if is_attr:
276 obj = getattr(obj, i)
277 else:
278 obj = obj[i]
279
Eric Smith3bcc42a2007-08-31 02:26:31 +0000280 return obj, first