blob: cef5029cbe97cec7294922c46a4c1b666ab6a5cd [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
5whitespace -- a string containing all characters considered whitespace
6lowercase -- a string containing all characters considered lowercase letters
7uppercase -- a string containing all characters considered uppercase letters
8letters -- a string containing all characters considered letters
9digits -- a string containing all characters considered decimal digits
10hexdigits -- a string containing all characters considered hexadecimal digits
11octdigits -- a string containing all characters considered octal digits
Fred Drakefd64c592000-09-18 19:38:11 +000012punctuation -- a string containing all characters considered punctuation
13printable -- a string containing all characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000019ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000021ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000022digits = '0123456789'
23hexdigits = digits + 'abcdef' + 'ABCDEF'
24octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000025punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000026printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000027
28# Case conversion helpers
Martin v. Löwis5357c652002-10-14 20:03:40 +000029# Use str to convert Unicode literal in case of -U
Guido van Rossumc1f779c2007-07-03 08:25:58 +000030_idmap = str('').join(chr(c) for c in range(256))
Guido van Rossumc6360141990-10-13 19:23:40 +000031
Barry Warsaw8bee7612004-08-25 02:22:30 +000032# Functions which aren't available as string methods.
33
34# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000035def capwords(s, sep=None):
36 """capwords(s, [sep]) -> string
37
38 Split the argument into words using split, capitalize each
39 word using capitalize, and join the capitalized words using
40 join. Note that this replaces runs of whitespace characters by
41 a single space.
42
43 """
44 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
45
46
47# Construct a translation string
48_idmapL = None
49def maketrans(fromstr, tostr):
50 """maketrans(frm, to) -> string
51
52 Return a translation table (a string of 256 bytes long)
53 suitable for use in string.translate. The strings frm and to
54 must be of the same length.
55
56 """
57 if len(fromstr) != len(tostr):
58 raise ValueError, "maketrans arguments must have same length"
59 global _idmapL
60 if not _idmapL:
Guido van Rossumc1f779c2007-07-03 08:25:58 +000061 _idmapL = list(_idmap)
Barry Warsaw8bee7612004-08-25 02:22:30 +000062 L = _idmapL[:]
Guido van Rossumc1f779c2007-07-03 08:25:58 +000063 for i, c in enumerate(fromstr):
64 L[ord(c)] = tostr[i]
Barry Warsaw8bee7612004-08-25 02:22:30 +000065 return ''.join(L)
66
67
Raymond Hettinger57aef9c2004-12-07 07:55:07 +000068
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000069####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000070import re as _re
71
Barry Warsaw46b629c2004-09-13 14:35:04 +000072class _multimap:
73 """Helper class for combining multiple mappings.
74
75 Used by .{safe_,}substitute() to combine the mapping and keyword
76 arguments.
77 """
78 def __init__(self, primary, secondary):
79 self._primary = primary
80 self._secondary = secondary
81
82 def __getitem__(self, key):
83 try:
84 return self._primary[key]
85 except KeyError:
86 return self._secondary[key]
87
88
Barry Warsaw12827c12004-09-10 03:08:08 +000089class _TemplateMetaclass(type):
90 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000091 %(delim)s(?:
92 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
93 (?P<named>%(id)s) | # delimiter and a Python identifier
94 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
95 (?P<invalid>) # Other ill-formed delimiter exprs
96 )
Barry Warsaw12827c12004-09-10 03:08:08 +000097 """
98
99 def __init__(cls, name, bases, dct):
100 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
101 if 'pattern' in dct:
102 pattern = cls.pattern
103 else:
104 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +0000105 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +0000106 'id' : cls.idpattern,
107 }
108 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
109
110
Guido van Rossum52cc1d82007-03-18 15:41:51 +0000111class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +0000112 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +0000113
Barry Warsaw17cb6002004-09-18 00:06:34 +0000114 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +0000115 idpattern = r'[_a-z][_a-z0-9]*'
116
117 def __init__(self, template):
118 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +0000119
120 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +0000121
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000122 def _invalid(self, mo):
123 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +0000124 lines = self.template[:i].splitlines(True)
125 if not lines:
126 colno = 1
127 lineno = 1
128 else:
129 colno = i - len(''.join(lines[:-1]))
130 lineno = len(lines)
131 raise ValueError('Invalid placeholder in string: line %d, col %d' %
132 (lineno, colno))
133
Barry Warsawb6234a92004-09-13 15:25:15 +0000134 def substitute(self, *args, **kws):
135 if len(args) > 1:
136 raise TypeError('Too many positional arguments')
137 if not args:
138 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000139 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000140 mapping = _multimap(kws, args[0])
141 else:
142 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000143 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000144 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000145 # Check the most common path first.
146 named = mo.group('named') or mo.group('braced')
147 if named is not None:
148 val = mapping[named]
149 # We use this idiom instead of str() because the latter will
150 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000151 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000152 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000153 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000154 if mo.group('invalid') is not None:
155 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000156 raise ValueError('Unrecognized named group in pattern',
157 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000158 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000159
Barry Warsawb6234a92004-09-13 15:25:15 +0000160 def safe_substitute(self, *args, **kws):
161 if len(args) > 1:
162 raise TypeError('Too many positional arguments')
163 if not args:
164 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000165 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000166 mapping = _multimap(kws, args[0])
167 else:
168 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000169 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000170 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000171 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000172 if named is not None:
173 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000174 # We use this idiom instead of str() because the latter
175 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000176 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000177 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000178 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000179 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000180 if braced is not None:
181 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000182 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000183 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000184 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000185 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000186 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000187 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000188 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000189 raise ValueError('Unrecognized named group in pattern',
190 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000191 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000192
193
194
195########################################################################
196# the Formatter class
197# see PEP 3101 for details and purpose of this class
198
199# The hard parts are reused from the C implementation. They're
200# exposed here via the sys module. sys was chosen because it's always
201# available and doesn't have to be dynamically loaded.
202
Eric Smithe226b552007-08-27 11:28:18 +0000203# The overall parser is implemented in str._formatter_parser.
204# The field name parser is implemented in str._formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000205
206class Formatter:
207 def format(self, format_string, *args, **kwargs):
208 return self.vformat(format_string, args, kwargs)
209
210 def vformat(self, format_string, args, kwargs):
Eric Smith9e7c8da2007-08-28 11:15:20 +0000211 used_args = self.get_empty_used_args()
Eric Smith8c663262007-08-25 02:26:07 +0000212 result = []
Eric Smith9e7c8da2007-08-28 11:15:20 +0000213 for literal_text, field_name, format_spec, conversion in \
214 self.parse(format_string):
215 if literal_text is None:
216 # this is some markup, find the object and do
217 # the formatting
218
Eric Smith7ade6482007-08-26 22:27:13 +0000219 # given the field_name, find the object it references
Eric Smith9e7c8da2007-08-28 11:15:20 +0000220 obj = self.get_field(field_name, args, kwargs, used_args)
Eric Smith7ade6482007-08-26 22:27:13 +0000221
222 # do any conversion on the resulting object
Eric Smith9e7c8da2007-08-28 11:15:20 +0000223 obj = self.convert_field(obj, conversion)
Eric Smith7ade6482007-08-26 22:27:13 +0000224
225 # format the object and append to the result
226 result.append(self.format_field(obj, format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000227 else:
Eric Smith9e7c8da2007-08-28 11:15:20 +0000228 # this is literal text, use it directly
229 result.append(literal_text)
Eric Smith7ade6482007-08-26 22:27:13 +0000230 self.check_unused_args(used_args, args, kwargs)
Eric Smith8c663262007-08-25 02:26:07 +0000231 return ''.join(result)
232
Eric Smith9e7c8da2007-08-28 11:15:20 +0000233
234 def get_empty_used_args(self):
235 return set()
236
237
Eric Smith8c663262007-08-25 02:26:07 +0000238 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000239 if isinstance(key, int):
240 return args[key]
241 else:
242 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000243
Eric Smith9e7c8da2007-08-28 11:15:20 +0000244
Eric Smith8c663262007-08-25 02:26:07 +0000245 def check_unused_args(self, used_args, args, kwargs):
246 pass
247
Eric Smith9e7c8da2007-08-28 11:15:20 +0000248
Eric Smith8c663262007-08-25 02:26:07 +0000249 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000250 return format(value, format_spec)
Eric Smith9e7c8da2007-08-28 11:15:20 +0000251
252
253 def convert_field(self, value, conversion):
254 # do any conversion on the resulting object
255 if conversion == 'r':
256 return repr(value)
257 elif conversion == 's':
258 return str(value)
259 else:
260 assert conversion is None
261 return value
262
263
264 # returns an iterable that contains tuples of the form:
265 # (literal_text, field_name, format_spec, conversion)
266 def parse(self, format_string):
267 return format_string._formatter_parser()
268
269
270 # given a field_name, find the object it references.
271 # field_name: the field being looked up, e.g. "0.name"
272 # or "lookup[3]"
273 # used_args: a set of which args have been used
274 # args, kwargs: as passed in to vformat
275 # also, mark it as used in 'used_args'
276 def get_field(self, field_name, args, kwargs, used_args):
277 first, rest = field_name._formatter_field_name_split()
278
279 used_args.add(first)
280 obj = self.get_value(first, args, kwargs)
281
282 # loop through the rest of the field_name, doing
283 # getattr or getitem as needed
284 for is_attr, i in rest:
285 if is_attr:
286 obj = getattr(obj, i)
287 else:
288 obj = obj[i]
289
290 return obj