| Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 1 | """A collection of string constants. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 2 |  | 
 | 3 | Public module variables: | 
 | 4 |  | 
 | 5 | whitespace -- a string containing all characters considered whitespace | 
 | 6 | lowercase -- a string containing all characters considered lowercase letters | 
 | 7 | uppercase -- a string containing all characters considered uppercase letters | 
 | 8 | letters -- a string containing all characters considered letters | 
 | 9 | digits -- a string containing all characters considered decimal digits | 
 | 10 | hexdigits -- a string containing all characters considered hexadecimal digits | 
 | 11 | octdigits -- a string containing all characters considered octal digits | 
| Fred Drake | fd64c59 | 2000-09-18 19:38:11 +0000 | [diff] [blame] | 12 | punctuation -- a string containing all characters considered punctuation | 
 | 13 | printable -- a string containing all characters considered printable | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 14 |  | 
 | 15 | """ | 
 | 16 |  | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 17 | # Some strings for ctype-style character classification | 
| Guido van Rossum | 8e2ec56 | 1993-07-29 09:37:38 +0000 | [diff] [blame] | 18 | whitespace = ' \t\n\r\v\f' | 
| Martin v. Löwis | 967f1e3 | 2007-08-14 09:23:10 +0000 | [diff] [blame] | 19 | ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz' | 
 | 20 | ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | 
| Fred Drake | 960fdf9 | 2001-07-20 18:38:26 +0000 | [diff] [blame] | 21 | ascii_letters = ascii_lowercase + ascii_uppercase | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 22 | digits = '0123456789' | 
 | 23 | hexdigits = digits + 'abcdef' + 'ABCDEF' | 
 | 24 | octdigits = '01234567' | 
| Tim Peters | 495ad3c | 2001-01-15 01:36:40 +0000 | [diff] [blame] | 25 | punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" | 
| Martin v. Löwis | 967f1e3 | 2007-08-14 09:23:10 +0000 | [diff] [blame] | 26 | printable = digits + ascii_letters + punctuation + whitespace | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 27 |  | 
 | 28 | # Case conversion helpers | 
| Martin v. Löwis | 5357c65 | 2002-10-14 20:03:40 +0000 | [diff] [blame] | 29 | # Use str to convert Unicode literal in case of -U | 
| Guido van Rossum | c1f779c | 2007-07-03 08:25:58 +0000 | [diff] [blame] | 30 | _idmap = str('').join(chr(c) for c in range(256)) | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 31 |  | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 32 | # Functions which aren't available as string methods. | 
 | 33 |  | 
 | 34 | # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def". | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 35 | def capwords(s, sep=None): | 
 | 36 |     """capwords(s, [sep]) -> string | 
 | 37 |  | 
 | 38 |     Split the argument into words using split, capitalize each | 
 | 39 |     word using capitalize, and join the capitalized words using | 
 | 40 |     join. Note that this replaces runs of whitespace characters by | 
 | 41 |     a single space. | 
 | 42 |  | 
 | 43 |     """ | 
 | 44 |     return (sep or ' ').join([x.capitalize() for x in s.split(sep)]) | 
 | 45 |  | 
 | 46 |  | 
 | 47 | # Construct a translation string | 
 | 48 | _idmapL = None | 
 | 49 | def maketrans(fromstr, tostr): | 
 | 50 |     """maketrans(frm, to) -> string | 
 | 51 |  | 
 | 52 |     Return a translation table (a string of 256 bytes long) | 
 | 53 |     suitable for use in string.translate.  The strings frm and to | 
 | 54 |     must be of the same length. | 
 | 55 |  | 
 | 56 |     """ | 
 | 57 |     if len(fromstr) != len(tostr): | 
 | 58 |         raise ValueError, "maketrans arguments must have same length" | 
 | 59 |     global _idmapL | 
 | 60 |     if not _idmapL: | 
| Guido van Rossum | c1f779c | 2007-07-03 08:25:58 +0000 | [diff] [blame] | 61 |         _idmapL = list(_idmap) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 62 |     L = _idmapL[:] | 
| Guido van Rossum | c1f779c | 2007-07-03 08:25:58 +0000 | [diff] [blame] | 63 |     for i, c in enumerate(fromstr): | 
 | 64 |         L[ord(c)] = tostr[i] | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 65 |     return ''.join(L) | 
 | 66 |  | 
 | 67 |  | 
| Raymond Hettinger | 57aef9c | 2004-12-07 07:55:07 +0000 | [diff] [blame] | 68 |  | 
| Raymond Hettinger | 0d58e2b | 2004-08-26 00:21:13 +0000 | [diff] [blame] | 69 | #################################################################### | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 70 | import re as _re | 
 | 71 |  | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 72 | class _multimap: | 
 | 73 |     """Helper class for combining multiple mappings. | 
 | 74 |  | 
 | 75 |     Used by .{safe_,}substitute() to combine the mapping and keyword | 
 | 76 |     arguments. | 
 | 77 |     """ | 
 | 78 |     def __init__(self, primary, secondary): | 
 | 79 |         self._primary = primary | 
 | 80 |         self._secondary = secondary | 
 | 81 |  | 
 | 82 |     def __getitem__(self, key): | 
 | 83 |         try: | 
 | 84 |             return self._primary[key] | 
 | 85 |         except KeyError: | 
 | 86 |             return self._secondary[key] | 
 | 87 |  | 
 | 88 |  | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 89 | class _TemplateMetaclass(type): | 
 | 90 |     pattern = r""" | 
| Raymond Hettinger | 55593c3 | 2004-09-26 18:56:44 +0000 | [diff] [blame] | 91 |     %(delim)s(?: | 
 | 92 |       (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters | 
 | 93 |       (?P<named>%(id)s)      |   # delimiter and a Python identifier | 
 | 94 |       {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier | 
 | 95 |       (?P<invalid>)              # Other ill-formed delimiter exprs | 
 | 96 |     ) | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 97 |     """ | 
 | 98 |  | 
 | 99 |     def __init__(cls, name, bases, dct): | 
 | 100 |         super(_TemplateMetaclass, cls).__init__(name, bases, dct) | 
 | 101 |         if 'pattern' in dct: | 
 | 102 |             pattern = cls.pattern | 
 | 103 |         else: | 
 | 104 |             pattern = _TemplateMetaclass.pattern % { | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 105 |                 'delim' : _re.escape(cls.delimiter), | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 106 |                 'id'    : cls.idpattern, | 
 | 107 |                 } | 
 | 108 |         cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) | 
 | 109 |  | 
 | 110 |  | 
| Guido van Rossum | 52cc1d8 | 2007-03-18 15:41:51 +0000 | [diff] [blame] | 111 | class Template(metaclass=_TemplateMetaclass): | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 112 |     """A string class for supporting $-substitutions.""" | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 113 |  | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 114 |     delimiter = '$' | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 115 |     idpattern = r'[_a-z][_a-z0-9]*' | 
 | 116 |  | 
 | 117 |     def __init__(self, template): | 
 | 118 |         self.template = template | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 119 |  | 
 | 120 |     # Search for $$, $identifier, ${identifier}, and any bare $'s | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 121 |  | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 122 |     def _invalid(self, mo): | 
 | 123 |         i = mo.start('invalid') | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 124 |         lines = self.template[:i].splitlines(True) | 
 | 125 |         if not lines: | 
 | 126 |             colno = 1 | 
 | 127 |             lineno = 1 | 
 | 128 |         else: | 
 | 129 |             colno = i - len(''.join(lines[:-1])) | 
 | 130 |             lineno = len(lines) | 
 | 131 |         raise ValueError('Invalid placeholder in string: line %d, col %d' % | 
 | 132 |                          (lineno, colno)) | 
 | 133 |  | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 134 |     def substitute(self, *args, **kws): | 
 | 135 |         if len(args) > 1: | 
 | 136 |             raise TypeError('Too many positional arguments') | 
 | 137 |         if not args: | 
 | 138 |             mapping = kws | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 139 |         elif kws: | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 140 |             mapping = _multimap(kws, args[0]) | 
 | 141 |         else: | 
 | 142 |             mapping = args[0] | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 143 |         # Helper function for .sub() | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 144 |         def convert(mo): | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 145 |             # Check the most common path first. | 
 | 146 |             named = mo.group('named') or mo.group('braced') | 
 | 147 |             if named is not None: | 
 | 148 |                 val = mapping[named] | 
 | 149 |                 # We use this idiom instead of str() because the latter will | 
 | 150 |                 # fail if val is a Unicode containing non-ASCII characters. | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 151 |                 return '%s' % (val,) | 
| Raymond Hettinger | 0d58e2b | 2004-08-26 00:21:13 +0000 | [diff] [blame] | 152 |             if mo.group('escaped') is not None: | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 153 |                 return self.delimiter | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 154 |             if mo.group('invalid') is not None: | 
 | 155 |                 self._invalid(mo) | 
| Neal Norwitz | 6627a96 | 2004-10-17 16:27:18 +0000 | [diff] [blame] | 156 |             raise ValueError('Unrecognized named group in pattern', | 
 | 157 |                              self.pattern) | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 158 |         return self.pattern.sub(convert, self.template) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 159 |  | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 160 |     def safe_substitute(self, *args, **kws): | 
 | 161 |         if len(args) > 1: | 
 | 162 |             raise TypeError('Too many positional arguments') | 
 | 163 |         if not args: | 
 | 164 |             mapping = kws | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 165 |         elif kws: | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 166 |             mapping = _multimap(kws, args[0]) | 
 | 167 |         else: | 
 | 168 |             mapping = args[0] | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 169 |         # Helper function for .sub() | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 170 |         def convert(mo): | 
| Raymond Hettinger | 0d58e2b | 2004-08-26 00:21:13 +0000 | [diff] [blame] | 171 |             named = mo.group('named') | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 172 |             if named is not None: | 
 | 173 |                 try: | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 174 |                     # We use this idiom instead of str() because the latter | 
 | 175 |                     # will fail if val is a Unicode containing non-ASCII | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 176 |                     return '%s' % (mapping[named],) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 177 |                 except KeyError: | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 178 |                     return self.delimiter + named | 
| Raymond Hettinger | 0d58e2b | 2004-08-26 00:21:13 +0000 | [diff] [blame] | 179 |             braced = mo.group('braced') | 
| Raymond Hettinger | 6d19111 | 2004-09-14 02:34:08 +0000 | [diff] [blame] | 180 |             if braced is not None: | 
 | 181 |                 try: | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 182 |                     return '%s' % (mapping[braced],) | 
| Raymond Hettinger | 6d19111 | 2004-09-14 02:34:08 +0000 | [diff] [blame] | 183 |                 except KeyError: | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 184 |                     return self.delimiter + '{' + braced + '}' | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 185 |             if mo.group('escaped') is not None: | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 186 |                 return self.delimiter | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 187 |             if mo.group('invalid') is not None: | 
| Barry Warsaw | 8c72eae | 2004-11-01 03:52:43 +0000 | [diff] [blame] | 188 |                 return self.delimiter | 
| Neal Norwitz | 6627a96 | 2004-10-17 16:27:18 +0000 | [diff] [blame] | 189 |             raise ValueError('Unrecognized named group in pattern', | 
 | 190 |                              self.pattern) | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 191 |         return self.pattern.sub(convert, self.template) | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 192 |  | 
 | 193 |  | 
 | 194 |  | 
 | 195 | ######################################################################## | 
 | 196 | # the Formatter class | 
 | 197 | # see PEP 3101 for details and purpose of this class | 
 | 198 |  | 
 | 199 | # The hard parts are reused from the C implementation.  They're | 
 | 200 | # exposed here via the sys module.  sys was chosen because it's always | 
 | 201 | # available and doesn't have to be dynamically loaded. | 
 | 202 |  | 
| Eric Smith | e226b55 | 2007-08-27 11:28:18 +0000 | [diff] [blame] | 203 | # The overall parser is implemented in str._formatter_parser. | 
 | 204 | # The field name parser is implemented in str._formatter_field_name_split | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 205 |  | 
 | 206 | class Formatter: | 
 | 207 |     def format(self, format_string, *args, **kwargs): | 
 | 208 |         return self.vformat(format_string, args, kwargs) | 
 | 209 |  | 
 | 210 |     def vformat(self, format_string, args, kwargs): | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 211 |         used_args = self.get_empty_used_args() | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 212 |         result = [] | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 213 |         for literal_text, field_name, format_spec, conversion in \ | 
 | 214 |                 self.parse(format_string): | 
 | 215 |             if literal_text is None: | 
 | 216 |                 # this is some markup, find the object and do | 
 | 217 |                 #  the formatting | 
 | 218 |  | 
| Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 219 |                 # given the field_name, find the object it references | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 220 |                 obj = self.get_field(field_name, args, kwargs, used_args) | 
| Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 221 |  | 
 | 222 |                 # do any conversion on the resulting object | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 223 |                 obj = self.convert_field(obj, conversion) | 
| Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 224 |  | 
 | 225 |                 # format the object and append to the result | 
 | 226 |                 result.append(self.format_field(obj, format_spec)) | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 227 |             else: | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 228 |                 # this is literal text, use it directly | 
 | 229 |                 result.append(literal_text) | 
| Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 230 |         self.check_unused_args(used_args, args, kwargs) | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 231 |         return ''.join(result) | 
 | 232 |  | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 233 |  | 
 | 234 |     def get_empty_used_args(self): | 
 | 235 |         return set() | 
 | 236 |  | 
 | 237 |  | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 238 |     def get_value(self, key, args, kwargs): | 
| Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 239 |         if isinstance(key, int): | 
 | 240 |             return args[key] | 
 | 241 |         else: | 
 | 242 |             return kwargs[key] | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 243 |  | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 244 |  | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 245 |     def check_unused_args(self, used_args, args, kwargs): | 
 | 246 |         pass | 
 | 247 |  | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 248 |  | 
| Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 249 |     def format_field(self, value, format_spec): | 
| Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 250 |         return format(value, format_spec) | 
| Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 251 |  | 
 | 252 |  | 
 | 253 |     def convert_field(self, value, conversion): | 
 | 254 |         # do any conversion on the resulting object | 
 | 255 |         if conversion == 'r': | 
 | 256 |             return repr(value) | 
 | 257 |         elif conversion == 's': | 
 | 258 |             return str(value) | 
 | 259 |         else: | 
 | 260 |             assert conversion is None | 
 | 261 |             return value | 
 | 262 |  | 
 | 263 |  | 
 | 264 |     # returns an iterable that contains tuples of the form: | 
 | 265 |     # (literal_text, field_name, format_spec, conversion) | 
 | 266 |     def parse(self, format_string): | 
 | 267 |         return format_string._formatter_parser() | 
 | 268 |  | 
 | 269 |  | 
 | 270 |     # given a field_name, find the object it references. | 
 | 271 |     #  field_name:   the field being looked up, e.g. "0.name" | 
 | 272 |     #                 or "lookup[3]" | 
 | 273 |     #  used_args:    a set of which args have been used | 
 | 274 |     #  args, kwargs: as passed in to vformat | 
 | 275 |     # also, mark it as used in 'used_args' | 
 | 276 |     def get_field(self, field_name, args, kwargs, used_args): | 
 | 277 |         first, rest = field_name._formatter_field_name_split() | 
 | 278 |  | 
 | 279 |         used_args.add(first) | 
 | 280 |         obj = self.get_value(first, args, kwargs) | 
 | 281 |  | 
 | 282 |         # loop through the rest of the field_name, doing | 
 | 283 |         #  getattr or getitem as needed | 
 | 284 |         for is_attr, i in rest: | 
 | 285 |             if is_attr: | 
 | 286 |                 obj = getattr(obj, i) | 
 | 287 |             else: | 
 | 288 |                 obj = obj[i] | 
 | 289 |  | 
 | 290 |         return obj |