blob: edf5ae22aec148f14a66c4d08d03322bbd8abc40 [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
5whitespace -- a string containing all characters considered whitespace
6lowercase -- a string containing all characters considered lowercase letters
7uppercase -- a string containing all characters considered uppercase letters
8letters -- a string containing all characters considered letters
9digits -- a string containing all characters considered decimal digits
10hexdigits -- a string containing all characters considered hexadecimal digits
11octdigits -- a string containing all characters considered octal digits
Fred Drakefd64c592000-09-18 19:38:11 +000012punctuation -- a string containing all characters considered punctuation
13printable -- a string containing all characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Martin v. Löwis967f1e32007-08-14 09:23:10 +000019ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
Fred Drake960fdf92001-07-20 18:38:26 +000021ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000022digits = '0123456789'
23hexdigits = digits + 'abcdef' + 'ABCDEF'
24octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000025punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Martin v. Löwis967f1e32007-08-14 09:23:10 +000026printable = digits + ascii_letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000027
28# Case conversion helpers
Martin v. Löwis5357c652002-10-14 20:03:40 +000029# Use str to convert Unicode literal in case of -U
Guido van Rossumc1f779c2007-07-03 08:25:58 +000030_idmap = str('').join(chr(c) for c in range(256))
Guido van Rossumc6360141990-10-13 19:23:40 +000031
Barry Warsaw8bee7612004-08-25 02:22:30 +000032# Functions which aren't available as string methods.
33
34# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000035def capwords(s, sep=None):
36 """capwords(s, [sep]) -> string
37
38 Split the argument into words using split, capitalize each
39 word using capitalize, and join the capitalized words using
40 join. Note that this replaces runs of whitespace characters by
41 a single space.
42
43 """
44 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
45
46
47# Construct a translation string
48_idmapL = None
49def maketrans(fromstr, tostr):
50 """maketrans(frm, to) -> string
51
52 Return a translation table (a string of 256 bytes long)
53 suitable for use in string.translate. The strings frm and to
54 must be of the same length.
55
56 """
57 if len(fromstr) != len(tostr):
58 raise ValueError, "maketrans arguments must have same length"
59 global _idmapL
60 if not _idmapL:
Guido van Rossumc1f779c2007-07-03 08:25:58 +000061 _idmapL = list(_idmap)
Barry Warsaw8bee7612004-08-25 02:22:30 +000062 L = _idmapL[:]
Guido van Rossumc1f779c2007-07-03 08:25:58 +000063 for i, c in enumerate(fromstr):
64 L[ord(c)] = tostr[i]
Barry Warsaw8bee7612004-08-25 02:22:30 +000065 return ''.join(L)
66
67
Raymond Hettinger57aef9c2004-12-07 07:55:07 +000068
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000069####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000070import re as _re
71
Barry Warsaw46b629c2004-09-13 14:35:04 +000072class _multimap:
73 """Helper class for combining multiple mappings.
74
75 Used by .{safe_,}substitute() to combine the mapping and keyword
76 arguments.
77 """
78 def __init__(self, primary, secondary):
79 self._primary = primary
80 self._secondary = secondary
81
82 def __getitem__(self, key):
83 try:
84 return self._primary[key]
85 except KeyError:
86 return self._secondary[key]
87
88
Barry Warsaw12827c12004-09-10 03:08:08 +000089class _TemplateMetaclass(type):
90 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000091 %(delim)s(?:
92 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
93 (?P<named>%(id)s) | # delimiter and a Python identifier
94 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
95 (?P<invalid>) # Other ill-formed delimiter exprs
96 )
Barry Warsaw12827c12004-09-10 03:08:08 +000097 """
98
99 def __init__(cls, name, bases, dct):
100 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
101 if 'pattern' in dct:
102 pattern = cls.pattern
103 else:
104 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +0000105 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +0000106 'id' : cls.idpattern,
107 }
108 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
109
110
Guido van Rossum52cc1d82007-03-18 15:41:51 +0000111class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +0000112 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +0000113
Barry Warsaw17cb6002004-09-18 00:06:34 +0000114 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +0000115 idpattern = r'[_a-z][_a-z0-9]*'
116
117 def __init__(self, template):
118 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +0000119
120 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +0000121
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000122 def _invalid(self, mo):
123 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +0000124 lines = self.template[:i].splitlines(True)
125 if not lines:
126 colno = 1
127 lineno = 1
128 else:
129 colno = i - len(''.join(lines[:-1]))
130 lineno = len(lines)
131 raise ValueError('Invalid placeholder in string: line %d, col %d' %
132 (lineno, colno))
133
Barry Warsawb6234a92004-09-13 15:25:15 +0000134 def substitute(self, *args, **kws):
135 if len(args) > 1:
136 raise TypeError('Too many positional arguments')
137 if not args:
138 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000139 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000140 mapping = _multimap(kws, args[0])
141 else:
142 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000143 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000144 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000145 # Check the most common path first.
146 named = mo.group('named') or mo.group('braced')
147 if named is not None:
148 val = mapping[named]
149 # We use this idiom instead of str() because the latter will
150 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000151 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000152 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000153 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000154 if mo.group('invalid') is not None:
155 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000156 raise ValueError('Unrecognized named group in pattern',
157 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000158 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000159
Barry Warsawb6234a92004-09-13 15:25:15 +0000160 def safe_substitute(self, *args, **kws):
161 if len(args) > 1:
162 raise TypeError('Too many positional arguments')
163 if not args:
164 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000165 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000166 mapping = _multimap(kws, args[0])
167 else:
168 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000169 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000170 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000171 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000172 if named is not None:
173 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000174 # We use this idiom instead of str() because the latter
175 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000176 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000177 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000178 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000179 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000180 if braced is not None:
181 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000182 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000183 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000184 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000185 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000186 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000187 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000188 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000189 raise ValueError('Unrecognized named group in pattern',
190 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000191 return self.pattern.sub(convert, self.template)
Eric Smith8c663262007-08-25 02:26:07 +0000192
193
194
195########################################################################
196# the Formatter class
197# see PEP 3101 for details and purpose of this class
198
199# The hard parts are reused from the C implementation. They're
200# exposed here via the sys module. sys was chosen because it's always
201# available and doesn't have to be dynamically loaded.
202
Eric Smith7ade6482007-08-26 22:27:13 +0000203# The overall parser is implemented in sys._formatter_parser.
204# The field name parser is implemented in sys._formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000205
Eric Smith7ade6482007-08-26 22:27:13 +0000206from sys import _formatter_parser, _formatter_field_name_split
Eric Smith8c663262007-08-25 02:26:07 +0000207
208class Formatter:
209 def format(self, format_string, *args, **kwargs):
210 return self.vformat(format_string, args, kwargs)
211
212 def vformat(self, format_string, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000213 used_args = set()
Eric Smith8c663262007-08-25 02:26:07 +0000214 result = []
215 for (is_markup, literal, field_name, format_spec, conversion) in \
216 _formatter_parser(format_string):
217 if is_markup:
Eric Smith7ade6482007-08-26 22:27:13 +0000218 # given the field_name, find the object it references
219
220 # split it into the first part, and and iterator that
221 # looks over the rest
222 first, rest = _formatter_field_name_split(field_name)
223
224 used_args.add(first)
225 obj = self.get_value(first, args, kwargs)
226
227 # loop through the rest of the field_name, doing
228 # getattr or getitem as needed
229 for is_attr, i in rest:
230 if is_attr:
231 obj = getattr(obj, i)
232 else:
233 obj = obj[i]
234
235 # do any conversion on the resulting object
236 if conversion == 'r':
237 obj = repr(obj)
238 elif conversion == 's':
239 obj = str(obj)
240
241 # format the object and append to the result
242 result.append(self.format_field(obj, format_spec))
Eric Smith8c663262007-08-25 02:26:07 +0000243 else:
244 result.append(literal)
Eric Smith7ade6482007-08-26 22:27:13 +0000245 self.check_unused_args(used_args, args, kwargs)
Eric Smith8c663262007-08-25 02:26:07 +0000246 return ''.join(result)
247
248 def get_value(self, key, args, kwargs):
Eric Smith7ade6482007-08-26 22:27:13 +0000249 if isinstance(key, int):
250 return args[key]
251 else:
252 return kwargs[key]
Eric Smith8c663262007-08-25 02:26:07 +0000253
254 def check_unused_args(self, used_args, args, kwargs):
255 pass
256
257 def format_field(self, value, format_spec):
Eric Smith7ade6482007-08-26 22:27:13 +0000258 return format(value, format_spec)