blob: 4599997e98a5e81b62682e38c2dc8cae41560bbf [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
Georg Brandl50767402008-11-22 08:31:09 +00005whitespace -- a string containing all ASCII whitespace
6ascii_lowercase -- a string containing all ASCII lowercase letters
7ascii_uppercase -- a string containing all ASCII uppercase letters
8ascii_letters -- a string containing all ASCII letters
9digits -- a string containing all ASCII decimal digits
10hexdigits -- a string containing all ASCII hexadecimal digits
11octdigits -- a string containing all ASCII octal digits
12punctuation -- a string containing all ASCII punctuation characters
13printable -- a string containing all ASCII characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000019ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000021ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000022digits = '0123456789'
23hexdigits = digits + 'abcdef' + 'ABCDEF'
24octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000025punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000026printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000027
Barry Warsaw8bee7612004-08-25 02:22:30 +000028# Functions which aren't available as string methods.
29
30# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000031def capwords(s, sep=None):
32 """capwords(s, [sep]) -> string
33
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
36 join. Note that this replaces runs of whitespace characters by
37 a single space.
38
39 """
40 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
41
42
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000043####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000044import re as _re
45
Barry Warsaw46b629c2004-09-13 14:35:04 +000046class _multimap:
47 """Helper class for combining multiple mappings.
48
49 Used by .{safe_,}substitute() to combine the mapping and keyword
50 arguments.
51 """
52 def __init__(self, primary, secondary):
53 self._primary = primary
54 self._secondary = secondary
55
56 def __getitem__(self, key):
57 try:
58 return self._primary[key]
59 except KeyError:
60 return self._secondary[key]
61
62
Barry Warsaw12827c12004-09-10 03:08:08 +000063class _TemplateMetaclass(type):
64 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000065 %(delim)s(?:
66 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
67 (?P<named>%(id)s) | # delimiter and a Python identifier
68 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
69 (?P<invalid>) # Other ill-formed delimiter exprs
70 )
Barry Warsaw12827c12004-09-10 03:08:08 +000071 """
72
73 def __init__(cls, name, bases, dct):
74 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
75 if 'pattern' in dct:
76 pattern = cls.pattern
77 else:
78 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +000079 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +000080 'id' : cls.idpattern,
81 }
82 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
83
84
Guido van Rossum52cc1d82007-03-18 15:41:51 +000085class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +000086 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +000087
Barry Warsaw17cb6002004-09-18 00:06:34 +000088 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +000089 idpattern = r'[_a-z][_a-z0-9]*'
90
91 def __init__(self, template):
92 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +000093
94 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +000095
Barry Warsawb5c6b5b2004-09-13 20:52:50 +000096 def _invalid(self, mo):
97 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +000098 lines = self.template[:i].splitlines(True)
99 if not lines:
100 colno = 1
101 lineno = 1
102 else:
103 colno = i - len(''.join(lines[:-1]))
104 lineno = len(lines)
105 raise ValueError('Invalid placeholder in string: line %d, col %d' %
106 (lineno, colno))
107
Barry Warsawb6234a92004-09-13 15:25:15 +0000108 def substitute(self, *args, **kws):
109 if len(args) > 1:
110 raise TypeError('Too many positional arguments')
111 if not args:
112 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000113 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000114 mapping = _multimap(kws, args[0])
115 else:
116 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000117 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000118 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000119 # Check the most common path first.
120 named = mo.group('named') or mo.group('braced')
121 if named is not None:
122 val = mapping[named]
123 # We use this idiom instead of str() because the latter will
124 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000125 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000126 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000127 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000128 if mo.group('invalid') is not None:
129 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000130 raise ValueError('Unrecognized named group in pattern',
131 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000132 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000133
Barry Warsawb6234a92004-09-13 15:25:15 +0000134 def safe_substitute(self, *args, **kws):
135 if len(args) > 1:
136 raise TypeError('Too many positional arguments')
137 if not args:
138 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000139 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000140 mapping = _multimap(kws, args[0])
141 else:
142 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000143 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000144 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000145 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000146 if named is not None:
147 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000148 # We use this idiom instead of str() because the latter
149 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000150 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000151 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000152 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000153 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000154 if braced is not None:
155 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000156 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000157 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000158 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000159 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000160 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000161 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000162 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000163 raise ValueError('Unrecognized named group in pattern',
164 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000165 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000166
167
168
169########################################################################
170# the Formatter class
171# see PEP 3101 for details and purpose of this class
172
Benjamin Petersonf608c612008-11-16 18:33:53 +0000173# The hard parts are reused from the C implementation. They're exposed as "_"
174# prefixed methods of str and unicode.
Eric Smith8c663262007-08-25 02:26:07 +0000175
Eric Smithe226b552007-08-27 11:28:18 +0000176# The overall parser is implemented in str._formatter_parser.
177# The field name parser is implemented in str._formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000178
179class Formatter:
180 def format(self, format_string, *args, **kwargs):
181 return self.vformat(format_string, args, kwargs)
182
183 def vformat(self, format_string, args, kwargs):
Eric Smith3bcc42a2007-08-31 02:26:31 +0000184 used_args = set()
Eric Smith11529192007-09-04 23:04:22 +0000185 result = self._vformat(format_string, args, kwargs, used_args, 2)
186 self.check_unused_args(used_args, args, kwargs)
187 return result
188
189 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
190 if recursion_depth < 0:
191 raise ValueError('Max string recursion exceeded')
Eric Smith8c663262007-08-25 02:26:07 +0000192 result = []
Eric Smith9e7c8da2007-08-28 11:15:20 +0000193 for literal_text, field_name, format_spec, conversion in \
194 self.parse(format_string):
Eric Smith625cbf22007-08-29 03:22:59 +0000195
196 # output the literal text
197 if literal_text:
198 result.append(literal_text)
199
200 # if there's a field, output it
201 if field_name is not None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000202 # this is some markup, find the object and do
203 # the formatting
204
Eric Smith7ade6482007-08-26 22:27:13 +0000205 # given the field_name, find the object it references
Eric Smith3bcc42a2007-08-31 02:26:31 +0000206 # and the argument it came from
Eric Smith9d4ba392007-09-02 15:33:26 +0000207 obj, arg_used = self.get_field(field_name, args, kwargs)
Eric Smith3bcc42a2007-08-31 02:26:31 +0000208 used_args.add(arg_used)
Eric Smith7ade6482007-08-26 22:27:13 +0000209
210 # do any conversion on the resulting object
Eric Smith9e7c8da2007-08-28 11:15:20 +0000211 obj = self.convert_field(obj, conversion)
Eric Smith7ade6482007-08-26 22:27:13 +0000212
Eric Smith11529192007-09-04 23:04:22 +0000213 # expand the format spec, if needed
214 format_spec = self._vformat(format_spec, args, kwargs,
215 used_args, recursion_depth-1)
216
Eric Smith7ade6482007-08-26 22:27:13 +0000217 # format the object and append to the result
218 result.append(self.format_field(obj, format_spec))
Eric Smith625cbf22007-08-29 03:22:59 +0000219
Eric Smith8c663262007-08-25 02:26:07 +0000220 return ''.join(result)
221
Eric Smith9e7c8da2007-08-28 11:15:20 +0000222
Eric Smith8c663262007-08-25 02:26:07 +0000223 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000224 if isinstance(key, int):
225 return args[key]
226 else:
227 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000228
Eric Smith9e7c8da2007-08-28 11:15:20 +0000229
Eric Smith8c663262007-08-25 02:26:07 +0000230 def check_unused_args(self, used_args, args, kwargs):
231 pass
232
Eric Smith9e7c8da2007-08-28 11:15:20 +0000233
Eric Smith8c663262007-08-25 02:26:07 +0000234 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000235 return format(value, format_spec)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000236
237
238 def convert_field(self, value, conversion):
239 # do any conversion on the resulting object
240 if conversion == 'r':
241 return repr(value)
242 elif conversion == 's':
243 return str(value)
Eric Smith11529192007-09-04 23:04:22 +0000244 elif conversion is None:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000245 return value
Eric Smith11529192007-09-04 23:04:22 +0000246 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
Eric Smith9e7c8da2007-08-28 11:15:20 +0000247
248
249 # returns an iterable that contains tuples of the form:
250 # (literal_text, field_name, format_spec, conversion)
Eric Smith625cbf22007-08-29 03:22:59 +0000251 # literal_text can be zero length
252 # field_name can be None, in which case there's no
253 # object to format and output
254 # if field_name is not None, it is looked up, formatted
255 # with format_spec and conversion and then used
Eric Smith9e7c8da2007-08-28 11:15:20 +0000256 def parse(self, format_string):
257 return format_string._formatter_parser()
258
259
260 # given a field_name, find the object it references.
261 # field_name: the field being looked up, e.g. "0.name"
262 # or "lookup[3]"
263 # used_args: a set of which args have been used
264 # args, kwargs: as passed in to vformat
Eric Smith9d4ba392007-09-02 15:33:26 +0000265 def get_field(self, field_name, args, kwargs):
Eric Smith9e7c8da2007-08-28 11:15:20 +0000266 first, rest = field_name._formatter_field_name_split()
267
Eric Smith9e7c8da2007-08-28 11:15:20 +0000268 obj = self.get_value(first, args, kwargs)
269
270 # loop through the rest of the field_name, doing
271 # getattr or getitem as needed
272 for is_attr, i in rest:
273 if is_attr:
274 obj = getattr(obj, i)
275 else:
276 obj = obj[i]
277
Eric Smith3bcc42a2007-08-31 02:26:31 +0000278 return obj, first