blob: e5b5284572ff80d4a98015e320bddbabcd0ac41a [file] [log] [blame]
Neal Norwitz9d72bb42007-04-17 08:48:32 +00001"""A collection of string constants.
Guido van Rossum20032041997-12-29 19:26:28 +00002
3Public module variables:
4
5whitespace -- a string containing all characters considered whitespace
6lowercase -- a string containing all characters considered lowercase letters
7uppercase -- a string containing all characters considered uppercase letters
8letters -- a string containing all characters considered letters
9digits -- a string containing all characters considered decimal digits
10hexdigits -- a string containing all characters considered hexadecimal digits
11octdigits -- a string containing all characters considered octal digits
Fred Drakefd64c592000-09-18 19:38:11 +000012punctuation -- a string containing all characters considered punctuation
13printable -- a string containing all characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000014
15"""
16
Guido van Rossumc6360141990-10-13 19:23:40 +000017# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000018whitespace = ' \t\n\r\v\f'
Guido van Rossumc6360141990-10-13 19:23:40 +000019lowercase = 'abcdefghijklmnopqrstuvwxyz'
20uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
21letters = lowercase + uppercase
Fred Drake960fdf92001-07-20 18:38:26 +000022ascii_lowercase = lowercase
23ascii_uppercase = uppercase
24ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000025digits = '0123456789'
26hexdigits = digits + 'abcdef' + 'ABCDEF'
27octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000028punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Fred Drake6b2320f2000-09-18 16:46:17 +000029printable = digits + letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000030
31# Case conversion helpers
Martin v. Löwis5357c652002-10-14 20:03:40 +000032# Use str to convert Unicode literal in case of -U
33l = map(chr, xrange(256))
34_idmap = str('').join(l)
35del l
Guido van Rossumc6360141990-10-13 19:23:40 +000036
Barry Warsaw8bee7612004-08-25 02:22:30 +000037# Functions which aren't available as string methods.
38
39# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
Barry Warsaw8bee7612004-08-25 02:22:30 +000040def capwords(s, sep=None):
41 """capwords(s, [sep]) -> string
42
43 Split the argument into words using split, capitalize each
44 word using capitalize, and join the capitalized words using
45 join. Note that this replaces runs of whitespace characters by
46 a single space.
47
48 """
49 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
50
51
52# Construct a translation string
53_idmapL = None
54def maketrans(fromstr, tostr):
55 """maketrans(frm, to) -> string
56
57 Return a translation table (a string of 256 bytes long)
58 suitable for use in string.translate. The strings frm and to
59 must be of the same length.
60
61 """
62 if len(fromstr) != len(tostr):
63 raise ValueError, "maketrans arguments must have same length"
64 global _idmapL
65 if not _idmapL:
66 _idmapL = map(None, _idmap)
67 L = _idmapL[:]
68 fromstr = map(ord, fromstr)
69 for i in range(len(fromstr)):
70 L[fromstr[i]] = tostr[i]
71 return ''.join(L)
72
73
Raymond Hettinger57aef9c2004-12-07 07:55:07 +000074
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +000075####################################################################
Barry Warsaw8bee7612004-08-25 02:22:30 +000076import re as _re
77
Barry Warsaw46b629c2004-09-13 14:35:04 +000078class _multimap:
79 """Helper class for combining multiple mappings.
80
81 Used by .{safe_,}substitute() to combine the mapping and keyword
82 arguments.
83 """
84 def __init__(self, primary, secondary):
85 self._primary = primary
86 self._secondary = secondary
87
88 def __getitem__(self, key):
89 try:
90 return self._primary[key]
91 except KeyError:
92 return self._secondary[key]
93
94
Barry Warsaw12827c12004-09-10 03:08:08 +000095class _TemplateMetaclass(type):
96 pattern = r"""
Raymond Hettinger55593c32004-09-26 18:56:44 +000097 %(delim)s(?:
98 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
99 (?P<named>%(id)s) | # delimiter and a Python identifier
100 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
101 (?P<invalid>) # Other ill-formed delimiter exprs
102 )
Barry Warsaw12827c12004-09-10 03:08:08 +0000103 """
104
105 def __init__(cls, name, bases, dct):
106 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
107 if 'pattern' in dct:
108 pattern = cls.pattern
109 else:
110 pattern = _TemplateMetaclass.pattern % {
Barry Warsaw17cb6002004-09-18 00:06:34 +0000111 'delim' : _re.escape(cls.delimiter),
Barry Warsaw12827c12004-09-10 03:08:08 +0000112 'id' : cls.idpattern,
113 }
114 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
115
116
Guido van Rossum52cc1d82007-03-18 15:41:51 +0000117class Template(metaclass=_TemplateMetaclass):
Barry Warsaw8bee7612004-08-25 02:22:30 +0000118 """A string class for supporting $-substitutions."""
Barry Warsaw12827c12004-09-10 03:08:08 +0000119
Barry Warsaw17cb6002004-09-18 00:06:34 +0000120 delimiter = '$'
Barry Warsaw12827c12004-09-10 03:08:08 +0000121 idpattern = r'[_a-z][_a-z0-9]*'
122
123 def __init__(self, template):
124 self.template = template
Barry Warsaw8bee7612004-08-25 02:22:30 +0000125
126 # Search for $$, $identifier, ${identifier}, and any bare $'s
Barry Warsaw8bee7612004-08-25 02:22:30 +0000127
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000128 def _invalid(self, mo):
129 i = mo.start('invalid')
Barry Warsaw12827c12004-09-10 03:08:08 +0000130 lines = self.template[:i].splitlines(True)
131 if not lines:
132 colno = 1
133 lineno = 1
134 else:
135 colno = i - len(''.join(lines[:-1]))
136 lineno = len(lines)
137 raise ValueError('Invalid placeholder in string: line %d, col %d' %
138 (lineno, colno))
139
Barry Warsawb6234a92004-09-13 15:25:15 +0000140 def substitute(self, *args, **kws):
141 if len(args) > 1:
142 raise TypeError('Too many positional arguments')
143 if not args:
144 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000145 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000146 mapping = _multimap(kws, args[0])
147 else:
148 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000149 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000150 def convert(mo):
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000151 # Check the most common path first.
152 named = mo.group('named') or mo.group('braced')
153 if named is not None:
154 val = mapping[named]
155 # We use this idiom instead of str() because the latter will
156 # fail if val is a Unicode containing non-ASCII characters.
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157 return '%s' % (val,)
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000158 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000159 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000160 if mo.group('invalid') is not None:
161 self._invalid(mo)
Neal Norwitz6627a962004-10-17 16:27:18 +0000162 raise ValueError('Unrecognized named group in pattern',
163 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000164 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000165
Barry Warsawb6234a92004-09-13 15:25:15 +0000166 def safe_substitute(self, *args, **kws):
167 if len(args) > 1:
168 raise TypeError('Too many positional arguments')
169 if not args:
170 mapping = kws
Barry Warsaw46b629c2004-09-13 14:35:04 +0000171 elif kws:
Barry Warsawb6234a92004-09-13 15:25:15 +0000172 mapping = _multimap(kws, args[0])
173 else:
174 mapping = args[0]
Barry Warsaw46b629c2004-09-13 14:35:04 +0000175 # Helper function for .sub()
Barry Warsaw8bee7612004-08-25 02:22:30 +0000176 def convert(mo):
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000177 named = mo.group('named')
Barry Warsaw8bee7612004-08-25 02:22:30 +0000178 if named is not None:
179 try:
Barry Warsaw12827c12004-09-10 03:08:08 +0000180 # We use this idiom instead of str() because the latter
181 # will fail if val is a Unicode containing non-ASCII
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000182 return '%s' % (mapping[named],)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000183 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000184 return self.delimiter + named
Raymond Hettinger0d58e2b2004-08-26 00:21:13 +0000185 braced = mo.group('braced')
Raymond Hettinger6d191112004-09-14 02:34:08 +0000186 if braced is not None:
187 try:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188 return '%s' % (mapping[braced],)
Raymond Hettinger6d191112004-09-14 02:34:08 +0000189 except KeyError:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000190 return self.delimiter + '{' + braced + '}'
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000191 if mo.group('escaped') is not None:
Barry Warsaw17cb6002004-09-18 00:06:34 +0000192 return self.delimiter
Barry Warsawb5c6b5b2004-09-13 20:52:50 +0000193 if mo.group('invalid') is not None:
Barry Warsaw8c72eae2004-11-01 03:52:43 +0000194 return self.delimiter
Neal Norwitz6627a962004-10-17 16:27:18 +0000195 raise ValueError('Unrecognized named group in pattern',
196 self.pattern)
Barry Warsaw12827c12004-09-10 03:08:08 +0000197 return self.pattern.sub(convert, self.template)
Barry Warsaw8bee7612004-08-25 02:22:30 +0000198
199
Guido van Rossum2db91351992-10-18 17:09:59 +0000200# Try importing optional built-in module "strop" -- if it exists,
201# it redefines some string operations that are 100-1000 times faster.
Guido van Rossum8e2ec561993-07-29 09:37:38 +0000202# It also defines values for whitespace, lowercase and uppercase
203# that match <ctype.h>'s definitions.
Guido van Rossum2db91351992-10-18 17:09:59 +0000204
205try:
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000206 from strop import maketrans
Guido van Rossumb6775db1994-08-01 11:34:53 +0000207except ImportError:
Fred Drake857c4c32000-02-10 16:21:11 +0000208 pass # Use the original versions