blob: a9898e8a06e0e6be66bdae876ac006b212e702d2 [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
Georg Brandl50767402008-11-22 08:31:09 +00005whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000019ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000021ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000022digits = '0123456789'
23hexdigits = digits + 'abcdef' + 'ABCDEF'
24octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000025punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000026printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000027
Barry Warsaw8bee7612004-08-25 02:22:30 +000028# Functions which aren't available as string methods.
29
30# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000031def capwords(s, sep=None):
Ezio Melotti029625c2009-09-26 12:35:01 +000032 """capwords(s [,sep]) -> string
Barry Warsaw8bee7612004-08-25 02:22:30 +000033
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
Ezio Melotti029625c2009-09-26 12:35:01 +000036 join. If the optional second argument sep is absent or None,
37 runs of whitespace characters are replaced by a single space
38 and leading and trailing whitespace are removed, otherwise
39 sep is used to split and join the words.
Barry Warsaw8bee7612004-08-25 02:22:30 +000040
41 """
Ezio Melotti029625c2009-09-26 12:35:01 +000042 return (sep or ' ').join(x.capitalize() for x in s.split(sep))
Barry Warsaw8bee7612004-08-25 02:22:30 +000043
44
Georg Brandl7f13e6b2007-08-31 10:37:15 +000045# Construct a translation map for bytes.translate
Guido van Rossum98297ee2007-11-06 21:34:58 +000046def maketrans(frm: bytes, to: bytes) -> bytes:
Georg Brandl7f13e6b2007-08-31 10:37:15 +000047 """maketrans(frm, to) -> bytes
Barry Warsaw8bee7612004-08-25 02:22:30 +000048
Georg Brandl7f13e6b2007-08-31 10:37:15 +000049 Return a translation table (a bytes object of length 256)
50 suitable for use in bytes.translate where each byte in frm is
51 mapped to the byte at the same position in to.
52 The strings frm and to must be of the same length.
Barry Warsaw8bee7612004-08-25 02:22:30 +000053 """
Georg Brandlabc38772009-04-12 15:51:51 +000054 import warnings
55 warnings.warn("string.maketrans is deprecated, use bytes.maketrans instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +000056 DeprecationWarning, 2)
Georg Brandl7f13e6b2007-08-31 10:37:15 +000057 if len(frm) != len(to):
Collin Winterce36ad82007-08-30 01:19:48 +000058 raise ValueError("maketrans arguments must have same length")
Georg Brandl7f13e6b2007-08-31 10:37:15 +000059 if not (isinstance(frm, bytes) and isinstance(to, bytes)):
60 raise TypeError("maketrans arguments must be bytes objects")
Guido van Rossum254348e2007-11-21 19:29:53 +000061 L = bytearray(range(256))
Georg Brandl7f13e6b2007-08-31 10:37:15 +000062 for i, c in enumerate(frm):
63 L[c] = to[i]
Guido van Rossum98297ee2007-11-06 21:34:58 +000064 return bytes(L)
Barry Warsaw8bee7612004-08-25 02:22:30 +000065
Raymond Hettinger57aef9c2004-12-07 07:55:07 +000066
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000067####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000068import re as _re
69
Barry Warsaw46b629c2004-09-13 14:35:04 +000070class _multimap:
71 """Helper class for combining multiple mappings.
72
73 Used by .{safe_,}substitute() to combine the mapping and keyword
74 arguments.
75 """
76 def __init__(self, primary, secondary):
77 self._primary = primary
78 self._secondary = secondary
79
80 def __getitem__(self, key):
81 try:
82 return self._primary[key]
83 except KeyError:
84 return self._secondary[key]
85
86
Barry Warsaw12827c12004-09-10 03:08:08 +000087class _TemplateMetaclass(type):
88 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000089 %(delim)s(?:
90 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
91 (?P<named>%(id)s) | # delimiter and a Python identifier
92 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
93 (?P<invalid>) # Other ill-formed delimiter exprs
94 )
Barry Warsaw12827c12004-09-10 03:08:08 +000095 """
96
97 def __init__(cls, name, bases, dct):
98 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
99 if 'pattern' in dct:
100 pattern = cls.pattern
101 else:
102 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +0000103 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +0000104 'id' : cls.idpattern,
105 }
106 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
107
108
Guido van Rossum52cc1d82007-03-18 15:41:51 +0000109class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +0000110 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +0000111
Barry Warsaw17cb6002004-09-18 00:06:34 +0000112 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +0000113 idpattern = r'[_a-z][_a-z0-9]*'
114
115 def __init__(self, template):
116 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +0000117
118 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +0000119
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000120 def _invalid(self, mo):
121 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +0000122 lines = self.template[:i].splitlines(True)
123 if not lines:
124 colno = 1
125 lineno = 1
126 else:
127 colno = i - len(''.join(lines[:-1]))
128 lineno = len(lines)
129 raise ValueError('Invalid placeholder in string: line %d, col %d' %
130 (lineno, colno))
131
Barry Warsawb6234a92004-09-13 15:25:15 +0000132 def substitute(self, *args, **kws):
133 if len(args) > 1:
134 raise TypeError('Too many positional arguments')
135 if not args:
136 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000137 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000138 mapping = _multimap(kws, args[0])
139 else:
140 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000141 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000142 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000143 # Check the most common path first.
144 named = mo.group('named') or mo.group('braced')
145 if named is not None:
146 val = mapping[named]
147 # We use this idiom instead of str() because the latter will
148 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000149 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000150 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000151 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000152 if mo.group('invalid') is not None:
153 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000154 raise ValueError('Unrecognized named group in pattern',
155 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000156 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000157
Barry Warsawb6234a92004-09-13 15:25:15 +0000158 def safe_substitute(self, *args, **kws):
159 if len(args) > 1:
160 raise TypeError('Too many positional arguments')
161 if not args:
162 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000163 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000164 mapping = _multimap(kws, args[0])
165 else:
166 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000167 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000168 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000169 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000170 if named is not None:
171 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000172 # We use this idiom instead of str() because the latter
173 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000175 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000176 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000177 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000178 if braced is not None:
179 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000180 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000181 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000182 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000183 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000184 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000185 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000186 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000187 raise ValueError('Unrecognized named group in pattern',
188 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000189 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000190
191
192
193########################################################################
194# the Formatter class
195# see PEP 3101 for details and purpose of this class
196
Benjamin Petersonf608c612008-11-16 18:33:53 +0000197# The hard parts are reused from the C implementation. They're exposed as "_"
198# prefixed methods of str and unicode.
Eric Smith8c663262007-08-25 02:26:07 +0000199
Eric Smithe226b552007-08-27 11:28:18 +0000200# The overall parser is implemented in str._formatter_parser.
201# The field name parser is implemented in str._formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000202
203class Formatter:
204 def format(self, format_string, *args, **kwargs):
205 return self.vformat(format_string, args, kwargs)
206
207 def vformat(self, format_string, args, kwargs):
Eric Smith3bcc42a2007-08-31 02:26:31 +0000208 used_args = set()
Eric Smith11529192007-09-04 23:04:22 +0000209 result = self._vformat(format_string, args, kwargs, used_args, 2)
210 self.check_unused_args(used_args, args, kwargs)
211 return result
212
213 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
214 if recursion_depth < 0:
215 raise ValueError('Max string recursion exceeded')
Eric Smith8c663262007-08-25 02:26:07 +0000216 result = []
Eric Smith9e7c8da2007-08-28 11:15:20 +0000217 for literal_text, field_name, format_spec, conversion in \
218 self.parse(format_string):
Eric Smith625cbf22007-08-29 03:22:59 +0000219
220 # output the literal text
221 if literal_text:
222 result.append(literal_text)
223
224 # if there's a field, output it
225 if field_name is not None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000226 # this is some markup, find the object and do
227 # the formatting
228
Eric Smith7ade6482007-08-26 22:27:13 +0000229 # given the field_name, find the object it references
Eric Smith3bcc42a2007-08-31 02:26:31 +0000230 # and the argument it came from
Eric Smith9d4ba392007-09-02 15:33:26 +0000231 obj, arg_used = self.get_field(field_name, args, kwargs)
Eric Smith3bcc42a2007-08-31 02:26:31 +0000232 used_args.add(arg_used)
Eric Smith7ade6482007-08-26 22:27:13 +0000233
234 # do any conversion on the resulting object
Eric Smith9e7c8da2007-08-28 11:15:20 +0000235 obj = self.convert_field(obj, conversion)
Eric Smith7ade6482007-08-26 22:27:13 +0000236
Eric Smith11529192007-09-04 23:04:22 +0000237 # expand the format spec, if needed
238 format_spec = self._vformat(format_spec, args, kwargs,
239 used_args, recursion_depth-1)
240
Eric Smith7ade6482007-08-26 22:27:13 +0000241 # format the object and append to the result
242 result.append(self.format_field(obj, format_spec))
Eric Smith625cbf22007-08-29 03:22:59 +0000243
Eric Smith8c663262007-08-25 02:26:07 +0000244 return ''.join(result)
245
Eric Smith9e7c8da2007-08-28 11:15:20 +0000246
Eric Smith8c663262007-08-25 02:26:07 +0000247 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000248 if isinstance(key, int):
249 return args[key]
250 else:
251 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000252
Eric Smith9e7c8da2007-08-28 11:15:20 +0000253
Eric Smith8c663262007-08-25 02:26:07 +0000254 def check_unused_args(self, used_args, args, kwargs):
255 pass
256
Eric Smith9e7c8da2007-08-28 11:15:20 +0000257
Eric Smith8c663262007-08-25 02:26:07 +0000258 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000259 return format(value, format_spec)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000260
261
262 def convert_field(self, value, conversion):
263 # do any conversion on the resulting object
264 if conversion == 'r':
265 return repr(value)
266 elif conversion == 's':
267 return str(value)
Eric Smith11529192007-09-04 23:04:22 +0000268 elif conversion is None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000269 return value
Eric Smith11529192007-09-04 23:04:22 +0000270 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
Eric Smith9e7c8da2007-08-28 11:15:20 +0000271
272
273 # returns an iterable that contains tuples of the form:
274 # (literal_text, field_name, format_spec, conversion)
Eric Smith625cbf22007-08-29 03:22:59 +0000275 # literal_text can be zero length
276 # field_name can be None, in which case there's no
277 # object to format and output
278 # if field_name is not None, it is looked up, formatted
279 # with format_spec and conversion and then used
Eric Smith9e7c8da2007-08-28 11:15:20 +0000280 def parse(self, format_string):
281 return format_string._formatter_parser()
282
283
284 # given a field_name, find the object it references.
285 # field_name: the field being looked up, e.g. "0.name"
286 # or "lookup[3]"
287 # used_args: a set of which args have been used
288 # args, kwargs: as passed in to vformat
Eric Smith9d4ba392007-09-02 15:33:26 +0000289 def get_field(self, field_name, args, kwargs):
Eric Smith9e7c8da2007-08-28 11:15:20 +0000290 first, rest = field_name._formatter_field_name_split()
291
Eric Smith9e7c8da2007-08-28 11:15:20 +0000292 obj = self.get_value(first, args, kwargs)
293
294 # loop through the rest of the field_name, doing
295 # getattr or getitem as needed
296 for is_attr, i in rest:
297 if is_attr:
298 obj = getattr(obj, i)
299 else:
300 obj = obj[i]
301
Eric Smith3bcc42a2007-08-31 02:26:31 +0000302 return obj, first