blob: defe894414c07e9bcdba01a1c13435c727c1bcaf [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
Georg Brandl50767402008-11-22 08:31:09 +00005whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000019ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000021ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000022digits = '0123456789'
23hexdigits = digits + 'abcdef' + 'ABCDEF'
24octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000025punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000026printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000027
Barry Warsaw8bee7612004-08-25 02:22:30 +000028# Functions which aren't available as string methods.
29
30# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000031def capwords(s, sep=None):
Ezio Melottia40bdda2009-09-26 12:33:22 +000032 """capwords(s [,sep]) -> string
Barry Warsaw8bee7612004-08-25 02:22:30 +000033
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
Ezio Melottia40bdda2009-09-26 12:33:22 +000036 join. If the optional second argument sep is absent or None,
37 runs of whitespace characters are replaced by a single space
38 and leading and trailing whitespace are removed, otherwise
39 sep is used to split and join the words.
Barry Warsaw8bee7612004-08-25 02:22:30 +000040
41 """
Ezio Melottia40bdda2009-09-26 12:33:22 +000042 return (sep or ' ').join(x.capitalize() for x in s.split(sep))
Barry Warsaw8bee7612004-08-25 02:22:30 +000043
44
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000045####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000046import re as _re
47
Barry Warsaw46b629c2004-09-13 14:35:04 +000048class _multimap:
49 """Helper class for combining multiple mappings.
50
51 Used by .{safe_,}substitute() to combine the mapping and keyword
52 arguments.
53 """
54 def __init__(self, primary, secondary):
55 self._primary = primary
56 self._secondary = secondary
57
58 def __getitem__(self, key):
59 try:
60 return self._primary[key]
61 except KeyError:
62 return self._secondary[key]
63
64
Barry Warsaw12827c12004-09-10 03:08:08 +000065class _TemplateMetaclass(type):
66 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000067 %(delim)s(?:
68 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
69 (?P<named>%(id)s) | # delimiter and a Python identifier
70 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
71 (?P<invalid>) # Other ill-formed delimiter exprs
72 )
Barry Warsaw12827c12004-09-10 03:08:08 +000073 """
74
75 def __init__(cls, name, bases, dct):
76 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
77 if 'pattern' in dct:
78 pattern = cls.pattern
79 else:
80 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +000081 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +000082 'id' : cls.idpattern,
83 }
Georg Brandl056cb932010-07-29 17:16:10 +000084 cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
Barry Warsaw12827c12004-09-10 03:08:08 +000085
86
Guido van Rossum52cc1d82007-03-18 15:41:51 +000087class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +000088 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +000089
Barry Warsaw17cb6002004-09-18 00:06:34 +000090 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +000091 idpattern = r'[_a-z][_a-z0-9]*'
Georg Brandl056cb932010-07-29 17:16:10 +000092 flags = _re.IGNORECASE
Barry Warsaw12827c12004-09-10 03:08:08 +000093
94 def __init__(self, template):
95 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +000096
97 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +000098
Barry Warsawb5c6b5b2004-09-13 20:52:50 +000099 def _invalid(self, mo):
100 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +0000101 lines = self.template[:i].splitlines(True)
102 if not lines:
103 colno = 1
104 lineno = 1
105 else:
106 colno = i - len(''.join(lines[:-1]))
107 lineno = len(lines)
108 raise ValueError('Invalid placeholder in string: line %d, col %d' %
109 (lineno, colno))
110
Barry Warsawb6234a92004-09-13 15:25:15 +0000111 def substitute(self, *args, **kws):
112 if len(args) > 1:
113 raise TypeError('Too many positional arguments')
114 if not args:
115 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000116 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000117 mapping = _multimap(kws, args[0])
118 else:
119 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000120 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000121 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000122 # Check the most common path first.
123 named = mo.group('named') or mo.group('braced')
124 if named is not None:
125 val = mapping[named]
126 # We use this idiom instead of str() because the latter will
127 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000128 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000129 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000130 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000131 if mo.group('invalid') is not None:
132 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000133 raise ValueError('Unrecognized named group in pattern',
134 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000135 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000136
Barry Warsawb6234a92004-09-13 15:25:15 +0000137 def safe_substitute(self, *args, **kws):
138 if len(args) > 1:
139 raise TypeError('Too many positional arguments')
140 if not args:
141 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000142 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000143 mapping = _multimap(kws, args[0])
144 else:
145 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000146 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000147 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000148 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000149 if named is not None:
150 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000151 # We use this idiom instead of str() because the latter
152 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000153 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000154 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000155 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000156 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000157 if braced is not None:
158 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000159 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000160 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000161 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000162 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000163 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000164 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000165 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000166 raise ValueError('Unrecognized named group in pattern',
167 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000168 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000169
170
171
172########################################################################
173# the Formatter class
174# see PEP 3101 for details and purpose of this class
175
Benjamin Petersonf608c612008-11-16 18:33:53 +0000176# The hard parts are reused from the C implementation. They're exposed as "_"
177# prefixed methods of str and unicode.
Eric Smith8c663262007-08-25 02:26:07 +0000178
Eric Smithe226b552007-08-27 11:28:18 +0000179# The overall parser is implemented in str._formatter_parser.
180# The field name parser is implemented in str._formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000181
182class Formatter:
183 def format(self, format_string, *args, **kwargs):
184 return self.vformat(format_string, args, kwargs)
185
186 def vformat(self, format_string, args, kwargs):
Eric Smith3bcc42a2007-08-31 02:26:31 +0000187 used_args = set()
Eric Smith11529192007-09-04 23:04:22 +0000188 result = self._vformat(format_string, args, kwargs, used_args, 2)
189 self.check_unused_args(used_args, args, kwargs)
190 return result
191
192 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
193 if recursion_depth < 0:
194 raise ValueError('Max string recursion exceeded')
Eric Smith8c663262007-08-25 02:26:07 +0000195 result = []
Eric Smith9e7c8da2007-08-28 11:15:20 +0000196 for literal_text, field_name, format_spec, conversion in \
197 self.parse(format_string):
Eric Smith625cbf22007-08-29 03:22:59 +0000198
199 # output the literal text
200 if literal_text:
201 result.append(literal_text)
202
203 # if there's a field, output it
204 if field_name is not None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000205 # this is some markup, find the object and do
206 # the formatting
207
Eric Smith7ade6482007-08-26 22:27:13 +0000208 # given the field_name, find the object it references
Eric Smith3bcc42a2007-08-31 02:26:31 +0000209 # and the argument it came from
Eric Smith9d4ba392007-09-02 15:33:26 +0000210 obj, arg_used = self.get_field(field_name, args, kwargs)
Eric Smith3bcc42a2007-08-31 02:26:31 +0000211 used_args.add(arg_used)
Eric Smith7ade6482007-08-26 22:27:13 +0000212
213 # do any conversion on the resulting object
Eric Smith9e7c8da2007-08-28 11:15:20 +0000214 obj = self.convert_field(obj, conversion)
Eric Smith7ade6482007-08-26 22:27:13 +0000215
Eric Smith11529192007-09-04 23:04:22 +0000216 # expand the format spec, if needed
217 format_spec = self._vformat(format_spec, args, kwargs,
218 used_args, recursion_depth-1)
219
Eric Smith7ade6482007-08-26 22:27:13 +0000220 # format the object and append to the result
221 result.append(self.format_field(obj, format_spec))
Eric Smith625cbf22007-08-29 03:22:59 +0000222
Eric Smith8c663262007-08-25 02:26:07 +0000223 return ''.join(result)
224
Eric Smith9e7c8da2007-08-28 11:15:20 +0000225
Eric Smith8c663262007-08-25 02:26:07 +0000226 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000227 if isinstance(key, int):
228 return args[key]
229 else:
230 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000231
Eric Smith9e7c8da2007-08-28 11:15:20 +0000232
Eric Smith8c663262007-08-25 02:26:07 +0000233 def check_unused_args(self, used_args, args, kwargs):
234 pass
235
Eric Smith9e7c8da2007-08-28 11:15:20 +0000236
Eric Smith8c663262007-08-25 02:26:07 +0000237 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000238 return format(value, format_spec)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000239
240
241 def convert_field(self, value, conversion):
242 # do any conversion on the resulting object
243 if conversion == 'r':
244 return repr(value)
245 elif conversion == 's':
246 return str(value)
Eric Smith11529192007-09-04 23:04:22 +0000247 elif conversion is None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000248 return value
Eric Smith11529192007-09-04 23:04:22 +0000249 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
Eric Smith9e7c8da2007-08-28 11:15:20 +0000250
251
252 # returns an iterable that contains tuples of the form:
253 # (literal_text, field_name, format_spec, conversion)
Eric Smith625cbf22007-08-29 03:22:59 +0000254 # literal_text can be zero length
255 # field_name can be None, in which case there's no
256 # object to format and output
257 # if field_name is not None, it is looked up, formatted
258 # with format_spec and conversion and then used
Eric Smith9e7c8da2007-08-28 11:15:20 +0000259 def parse(self, format_string):
260 return format_string._formatter_parser()
261
262
263 # given a field_name, find the object it references.
264 # field_name: the field being looked up, e.g. "0.name"
265 # or "lookup[3]"
266 # used_args: a set of which args have been used
267 # args, kwargs: as passed in to vformat
Eric Smith9d4ba392007-09-02 15:33:26 +0000268 def get_field(self, field_name, args, kwargs):
Eric Smith9e7c8da2007-08-28 11:15:20 +0000269 first, rest = field_name._formatter_field_name_split()
270
Eric Smith9e7c8da2007-08-28 11:15:20 +0000271 obj = self.get_value(first, args, kwargs)
272
273 # loop through the rest of the field_name, doing
274 # getattr or getitem as needed
275 for is_attr, i in rest:
276 if is_attr:
277 obj = getattr(obj, i)
278 else:
279 obj = obj[i]
280
Eric Smith3bcc42a2007-08-31 02:26:31 +0000281 return obj, first