blob: d166f3875e68682d8dc7724674f3c4e93b07743e [file] [log] [blame]
Skip Montanaro0b874442003-10-03 14:05:26 +00001"""A collection of string operations (most are no longer used).
Guido van Rossumc6360141990-10-13 19:23:40 +00002
Skip Montanaro0b874442003-10-03 14:05:26 +00003Warning: most of the code you see here isn't normally used nowadays.
4Beginning with Python 1.6, many of these functions are implemented as
5methods on the standard string object. They used to be implemented by
6a built-in module called strop, but strop is now obsolete itself.
Guido van Rossum20032041997-12-29 19:26:28 +00007
8Public module variables:
9
10whitespace -- a string containing all characters considered whitespace
11lowercase -- a string containing all characters considered lowercase letters
12uppercase -- a string containing all characters considered uppercase letters
13letters -- a string containing all characters considered letters
14digits -- a string containing all characters considered decimal digits
15hexdigits -- a string containing all characters considered hexadecimal digits
16octdigits -- a string containing all characters considered octal digits
Fred Drakefd64c592000-09-18 19:38:11 +000017punctuation -- a string containing all characters considered punctuation
18printable -- a string containing all characters considered printable
Guido van Rossum20032041997-12-29 19:26:28 +000019
20"""
21
Guido van Rossumc6360141990-10-13 19:23:40 +000022# Some strings for ctype-style character classification
Guido van Rossum8e2ec561993-07-29 09:37:38 +000023whitespace = ' \t\n\r\v\f'
Guido van Rossumc6360141990-10-13 19:23:40 +000024lowercase = 'abcdefghijklmnopqrstuvwxyz'
25uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
26letters = lowercase + uppercase
Fred Drake960fdf92001-07-20 18:38:26 +000027ascii_lowercase = lowercase
28ascii_uppercase = uppercase
29ascii_letters = ascii_lowercase + ascii_uppercase
Guido van Rossumc6360141990-10-13 19:23:40 +000030digits = '0123456789'
31hexdigits = digits + 'abcdef' + 'ABCDEF'
32octdigits = '01234567'
Tim Peters495ad3c2001-01-15 01:36:40 +000033punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
Fred Drake6b2320f2000-09-18 16:46:17 +000034printable = digits + letters + punctuation + whitespace
Guido van Rossumc6360141990-10-13 19:23:40 +000035
36# Case conversion helpers
Martin v. Löwis5357c652002-10-14 20:03:40 +000037# Use str to convert Unicode literal in case of -U
Barry Warsaw8bee7612004-08-25 02:22:30 +000038# Note that Cookie.py bogusly uses _idmap :(
Martin v. Löwis5357c652002-10-14 20:03:40 +000039l = map(chr, xrange(256))
40_idmap = str('').join(l)
41del l
Guido van Rossumc6360141990-10-13 19:23:40 +000042
Barry Warsaw8bee7612004-08-25 02:22:30 +000043# Functions which aren't available as string methods.
44
45# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
46# See also regsub.capwords().
47def capwords(s, sep=None):
48 """capwords(s, [sep]) -> string
49
50 Split the argument into words using split, capitalize each
51 word using capitalize, and join the capitalized words using
52 join. Note that this replaces runs of whitespace characters by
53 a single space.
54
55 """
56 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
57
58
59# Construct a translation string
60_idmapL = None
61def maketrans(fromstr, tostr):
62 """maketrans(frm, to) -> string
63
64 Return a translation table (a string of 256 bytes long)
65 suitable for use in string.translate. The strings frm and to
66 must be of the same length.
67
68 """
69 if len(fromstr) != len(tostr):
70 raise ValueError, "maketrans arguments must have same length"
71 global _idmapL
72 if not _idmapL:
73 _idmapL = map(None, _idmap)
74 L = _idmapL[:]
75 fromstr = map(ord, fromstr)
76 for i in range(len(fromstr)):
77 L[fromstr[i]] = tostr[i]
78 return ''.join(L)
79
80
81
82import re as _re
83
84class Template(unicode):
85 """A string class for supporting $-substitutions."""
86 __slots__ = []
87
88 # Search for $$, $identifier, ${identifier}, and any bare $'s
89 pattern = _re.compile(r"""
90# Match exactly two $'s -- this is the escape sequence
91(?P<escaped>\${2})|
92# Match a $ followed by a Python identifier
93\$(?P<named>[_a-z][_a-z0-9]*)|
94# Match a $ followed by a brace delimited identifier
95\${(?P<braced>[_a-z][_a-z0-9]*)}|
96# Match any other $'s
97(?P<bogus>\$)
98""", _re.IGNORECASE | _re.VERBOSE)
99
100 def __mod__(self, mapping):
101 def convert(mo):
102 groups = mo.groupdict()
103 if groups.get('escaped') is not None:
104 return '$'
105 if groups.get('bogus') is not None:
106 raise ValueError('Invalid placeholder at index %d' %
107 mo.start('bogus'))
108 val = mapping[groups.get('named') or groups.get('braced')]
109 return unicode(val)
110 return self.pattern.sub(convert, self)
111
112
113class SafeTemplate(Template):
114 """A string class for supporting $-substitutions.
115
116 This class is 'safe' in the sense that you will never get KeyErrors if
117 there are placeholders missing from the interpolation dictionary. In that
118 case, you will get the original placeholder in the value string.
119 """
120 __slots__ = []
121
122 def __mod__(self, mapping):
123 def convert(mo):
124 groups = mo.groupdict()
125 if groups.get('escaped') is not None:
126 return '$'
127 if groups.get('bogus') is not None:
128 raise ValueError('Invalid placeholder at index %d' %
129 mo.start('bogus'))
130 named = groups.get('named')
131 if named is not None:
132 try:
133 return unicode(mapping[named])
134 except KeyError:
135 return '$' + named
136 braced = groups.get('braced')
137 try:
138 return unicode(mapping[braced])
139 except KeyError:
140 return '${' + braced + '}'
141 return self.pattern.sub(convert, self)
142
143
144
145# NOTE: Everything below here is deprecated. Use string methods instead.
146# This stuff will go away in Python 3.0.
147
Guido van Rossum710c3521994-08-17 13:16:11 +0000148# Backward compatible names for exceptions
149index_error = ValueError
150atoi_error = ValueError
151atof_error = ValueError
152atol_error = ValueError
153
Guido van Rossumc6360141990-10-13 19:23:40 +0000154# convert UPPER CASE letters to lower case
155def lower(s):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000156 """lower(s) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000157
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000158 Return a copy of the string s converted to lowercase.
Guido van Rossum20032041997-12-29 19:26:28 +0000159
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000160 """
161 return s.lower()
Guido van Rossumc6360141990-10-13 19:23:40 +0000162
163# Convert lower case letters to UPPER CASE
164def upper(s):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000165 """upper(s) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000166
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000167 Return a copy of the string s converted to uppercase.
Guido van Rossum20032041997-12-29 19:26:28 +0000168
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000169 """
170 return s.upper()
Guido van Rossumc6360141990-10-13 19:23:40 +0000171
172# Swap lower case letters and UPPER CASE
173def swapcase(s):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000174 """swapcase(s) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000175
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000176 Return a copy of the string s with upper case characters
177 converted to lowercase and vice versa.
Guido van Rossum20032041997-12-29 19:26:28 +0000178
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000179 """
180 return s.swapcase()
Guido van Rossumc6360141990-10-13 19:23:40 +0000181
182# Strip leading and trailing tabs and spaces
Martin v. Löwis1f046102002-11-08 12:09:59 +0000183def strip(s, chars=None):
Neal Norwitza4864a22002-11-14 03:31:32 +0000184 """strip(s [,chars]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000185
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000186 Return a copy of the string s with leading and trailing
187 whitespace removed.
Neal Norwitzffe33b72003-04-10 22:35:32 +0000188 If chars is given and not None, remove characters in chars instead.
Neal Norwitza4864a22002-11-14 03:31:32 +0000189 If chars is unicode, S will be converted to unicode before stripping.
Guido van Rossum20032041997-12-29 19:26:28 +0000190
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000191 """
Martin v. Löwis1f046102002-11-08 12:09:59 +0000192 return s.strip(chars)
Guido van Rossumc6360141990-10-13 19:23:40 +0000193
Guido van Rossum306a8a61996-08-08 18:40:59 +0000194# Strip leading tabs and spaces
Neal Norwitzffe33b72003-04-10 22:35:32 +0000195def lstrip(s, chars=None):
196 """lstrip(s [,chars]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000197
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000198 Return a copy of the string s with leading whitespace removed.
Neal Norwitzffe33b72003-04-10 22:35:32 +0000199 If chars is given and not None, remove characters in chars instead.
Guido van Rossum20032041997-12-29 19:26:28 +0000200
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000201 """
Neal Norwitzffe33b72003-04-10 22:35:32 +0000202 return s.lstrip(chars)
Guido van Rossum306a8a61996-08-08 18:40:59 +0000203
204# Strip trailing tabs and spaces
Neal Norwitzffe33b72003-04-10 22:35:32 +0000205def rstrip(s, chars=None):
206 """rstrip(s [,chars]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000207
Neal Norwitzffe33b72003-04-10 22:35:32 +0000208 Return a copy of the string s with trailing whitespace removed.
209 If chars is given and not None, remove characters in chars instead.
Guido van Rossum20032041997-12-29 19:26:28 +0000210
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000211 """
Neal Norwitzffe33b72003-04-10 22:35:32 +0000212 return s.rstrip(chars)
Guido van Rossum306a8a61996-08-08 18:40:59 +0000213
214
Guido van Rossumc6360141990-10-13 19:23:40 +0000215# Split a string into a list of space/tab-separated words
Guido van Rossum8f0c5a72000-03-10 23:22:10 +0000216def split(s, sep=None, maxsplit=-1):
Fred Drakee4f13661999-11-04 19:19:48 +0000217 """split(s [,sep [,maxsplit]]) -> list of strings
Guido van Rossum20032041997-12-29 19:26:28 +0000218
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000219 Return a list of the words in the string s, using sep as the
Fred Drake14537542002-01-30 16:15:13 +0000220 delimiter string. If maxsplit is given, splits at no more than
221 maxsplit places (resulting in at most maxsplit+1 words). If sep
222 is not specified, any whitespace string is a separator.
Guido van Rossum20032041997-12-29 19:26:28 +0000223
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000224 (split and splitfields are synonymous)
Guido van Rossum20032041997-12-29 19:26:28 +0000225
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000226 """
227 return s.split(sep, maxsplit)
228splitfields = split
Guido van Rossumfac38b71991-04-07 13:42:19 +0000229
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +0000230# Split a string into a list of space/tab-separated words
231def rsplit(s, sep=None, maxsplit=-1):
232 """rsplit(s [,sep [,maxsplit]]) -> list of strings
233
234 Return a list of the words in the string s, using sep as the
235 delimiter string, starting at the end of the string and working
236 to the front. If maxsplit is given, at most maxsplit splits are
237 done. If sep is not specified or is None, any whitespace string
238 is a separator.
239 """
240 return s.rsplit(sep, maxsplit)
241
Guido van Rossum2ab19921995-06-22 18:58:00 +0000242# Join fields with optional separator
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000243def join(words, sep = ' '):
244 """join(list [,sep]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000245
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000246 Return a string composed of the words in list, with
Thomas Wouters7e474022000-07-16 12:04:32 +0000247 intervening occurrences of sep. The default separator is a
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000248 single space.
Guido van Rossum20032041997-12-29 19:26:28 +0000249
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000250 (joinfields and join are synonymous)
Guido van Rossum20032041997-12-29 19:26:28 +0000251
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000252 """
253 return sep.join(words)
254joinfields = join
255
Guido van Rossumd3166071993-05-24 14:16:22 +0000256# Find substring, raise exception if not found
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000257def index(s, *args):
258 """index(s, sub [,start [,end]]) -> int
Guido van Rossum20032041997-12-29 19:26:28 +0000259
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000260 Like find but raises ValueError when the substring is not found.
Guido van Rossum20032041997-12-29 19:26:28 +0000261
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000262 """
Fred Drake046d2722000-07-03 07:23:13 +0000263 return s.index(*args)
Guido van Rossumd3166071993-05-24 14:16:22 +0000264
Guido van Rossume65cce51993-11-08 15:05:21 +0000265# Find last substring, raise exception if not found
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000266def rindex(s, *args):
267 """rindex(s, sub [,start [,end]]) -> int
Guido van Rossum20032041997-12-29 19:26:28 +0000268
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000269 Like rfind but raises ValueError when the substring is not found.
Guido van Rossum20032041997-12-29 19:26:28 +0000270
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000271 """
Fred Drake046d2722000-07-03 07:23:13 +0000272 return s.rindex(*args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000273
274# Count non-overlapping occurrences of substring
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000275def count(s, *args):
276 """count(s, sub[, start[,end]]) -> int
Guido van Rossum20032041997-12-29 19:26:28 +0000277
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000278 Return the number of occurrences of substring sub in string
279 s[start:end]. Optional arguments start and end are
280 interpreted as in slice notation.
Guido van Rossum20032041997-12-29 19:26:28 +0000281
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000282 """
Fred Drake046d2722000-07-03 07:23:13 +0000283 return s.count(*args)
Guido van Rossume65cce51993-11-08 15:05:21 +0000284
Guido van Rossumd3166071993-05-24 14:16:22 +0000285# Find substring, return -1 if not found
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000286def find(s, *args):
287 """find(s, sub [,start [,end]]) -> in
Guido van Rossum20032041997-12-29 19:26:28 +0000288
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000289 Return the lowest index in s where substring sub is found,
290 such that sub is contained within s[start,end]. Optional
291 arguments start and end are interpreted as in slice notation.
Guido van Rossum20032041997-12-29 19:26:28 +0000292
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000293 Return -1 on failure.
Guido van Rossum20032041997-12-29 19:26:28 +0000294
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000295 """
Fred Drake046d2722000-07-03 07:23:13 +0000296 return s.find(*args)
Guido van Rossumc6360141990-10-13 19:23:40 +0000297
Guido van Rossume65cce51993-11-08 15:05:21 +0000298# Find last substring, return -1 if not found
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000299def rfind(s, *args):
300 """rfind(s, sub [,start [,end]]) -> int
Guido van Rossum20032041997-12-29 19:26:28 +0000301
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000302 Return the highest index in s where substring sub is found,
303 such that sub is contained within s[start,end]. Optional
304 arguments start and end are interpreted as in slice notation.
Guido van Rossum20032041997-12-29 19:26:28 +0000305
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000306 Return -1 on failure.
Guido van Rossum20032041997-12-29 19:26:28 +0000307
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000308 """
Fred Drake046d2722000-07-03 07:23:13 +0000309 return s.rfind(*args)
Guido van Rossume65cce51993-11-08 15:05:21 +0000310
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000311# for a bit of speed
312_float = float
313_int = int
314_long = long
Guido van Rossumd0753e21997-12-10 22:59:55 +0000315
Guido van Rossume61fa0a1993-10-22 13:56:35 +0000316# Convert string to float
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000317def atof(s):
318 """atof(s) -> float
Guido van Rossum20032041997-12-29 19:26:28 +0000319
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000320 Return the floating point number represented by the string s.
Guido van Rossum20032041997-12-29 19:26:28 +0000321
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000322 """
Guido van Rossum9e896b32000-04-05 20:11:21 +0000323 return _float(s)
324
Guido van Rossume61fa0a1993-10-22 13:56:35 +0000325
Guido van Rossumc6360141990-10-13 19:23:40 +0000326# Convert string to integer
Guido van Rossum9e896b32000-04-05 20:11:21 +0000327def atoi(s , base=10):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000328 """atoi(s [,base]) -> int
Guido van Rossum20032041997-12-29 19:26:28 +0000329
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000330 Return the integer represented by the string s in the given
331 base, which defaults to 10. The string s must consist of one
332 or more digits, possibly preceded by a sign. If base is 0, it
333 is chosen from the leading characters of s, 0 for octal, 0x or
334 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
335 accepted.
Guido van Rossum20032041997-12-29 19:26:28 +0000336
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000337 """
Guido van Rossum9e896b32000-04-05 20:11:21 +0000338 return _int(s, base)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000339
Guido van Rossumc6360141990-10-13 19:23:40 +0000340
Guido van Rossume61fa0a1993-10-22 13:56:35 +0000341# Convert string to long integer
Guido van Rossum9e896b32000-04-05 20:11:21 +0000342def atol(s, base=10):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000343 """atol(s [,base]) -> long
Guido van Rossum20032041997-12-29 19:26:28 +0000344
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000345 Return the long integer represented by the string s in the
346 given base, which defaults to 10. The string s must consist
347 of one or more digits, possibly preceded by a sign. If base
348 is 0, it is chosen from the leading characters of s, 0 for
349 octal, 0x or 0X for hexadecimal. If base is 16, a preceding
350 0x or 0X is accepted. A trailing L or l is not accepted,
351 unless base is 0.
Guido van Rossum20032041997-12-29 19:26:28 +0000352
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000353 """
Guido van Rossum9e896b32000-04-05 20:11:21 +0000354 return _long(s, base)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000355
Guido van Rossume61fa0a1993-10-22 13:56:35 +0000356
Guido van Rossumc6360141990-10-13 19:23:40 +0000357# Left-justify a string
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000358def ljust(s, width, *args):
359 """ljust(s, width[, fillchar]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000360
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000361 Return a left-justified version of s, in a field of the
362 specified width, padded with spaces as needed. The string is
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000363 never truncated. If specified the fillchar is used instead of spaces.
Guido van Rossum20032041997-12-29 19:26:28 +0000364
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000365 """
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000366 return s.ljust(width, *args)
Guido van Rossumc6360141990-10-13 19:23:40 +0000367
368# Right-justify a string
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000369def rjust(s, width, *args):
370 """rjust(s, width[, fillchar]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000371
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000372 Return a right-justified version of s, in a field of the
373 specified width, padded with spaces as needed. The string is
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000374 never truncated. If specified the fillchar is used instead of spaces.
Guido van Rossum20032041997-12-29 19:26:28 +0000375
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000376 """
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000377 return s.rjust(width, *args)
Guido van Rossumc6360141990-10-13 19:23:40 +0000378
379# Center a string
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000380def center(s, width, *args):
381 """center(s, width[, fillchar]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000382
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000383 Return a center version of s, in a field of the specified
384 width. padded with spaces as needed. The string is never
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000385 truncated. If specified the fillchar is used instead of spaces.
Guido van Rossum20032041997-12-29 19:26:28 +0000386
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000387 """
Raymond Hettinger4f8f9762003-11-26 08:21:35 +0000388 return s.center(width, *args)
Guido van Rossumc6360141990-10-13 19:23:40 +0000389
390# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
391# Decadent feature: the argument may be a string or a number
392# (Use of this is deprecated; it should be a string as with ljust c.s.)
393def zfill(x, width):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000394 """zfill(x, width) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000395
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000396 Pad a numeric string x with zeros on the left, to fill a field
397 of the specified width. The string x is never truncated.
Guido van Rossum20032041997-12-29 19:26:28 +0000398
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000399 """
Walter Dörwald65230a22002-06-03 15:58:32 +0000400 if not isinstance(x, basestring):
Walter Dörwald068325e2002-04-15 13:36:47 +0000401 x = repr(x)
402 return x.zfill(width)
Guido van Rossum6ff2e901992-03-27 15:13:31 +0000403
404# Expand tabs in a string.
405# Doesn't take non-printing chars into account, but does understand \n.
Guido van Rossum894a7bb1995-08-10 19:42:05 +0000406def expandtabs(s, tabsize=8):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000407 """expandtabs(s [,tabsize]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000408
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000409 Return a copy of the string s with all tab characters replaced
410 by the appropriate number of spaces, depending on the current
411 column, and the tabsize (default 8).
Guido van Rossum20032041997-12-29 19:26:28 +0000412
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000413 """
Fred Drake046d2722000-07-03 07:23:13 +0000414 return s.expandtabs(tabsize)
Guido van Rossum2db91351992-10-18 17:09:59 +0000415
Guido van Rossum25395281996-05-28 23:08:45 +0000416# Character translation through look-up table.
Guido van Rossumed7253c1996-07-23 18:12:39 +0000417def translate(s, table, deletions=""):
Guido van Rossum5aff7752000-12-19 02:39:08 +0000418 """translate(s,table [,deletions]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000419
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000420 Return a copy of the string s, where all characters occurring
Guido van Rossum5aff7752000-12-19 02:39:08 +0000421 in the optional argument deletions are removed, and the
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000422 remaining characters have been mapped through the given
Guido van Rossum5aff7752000-12-19 02:39:08 +0000423 translation table, which must be a string of length 256. The
424 deletions argument is not allowed for Unicode strings.
Guido van Rossum20032041997-12-29 19:26:28 +0000425
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000426 """
Guido van Rossum5aff7752000-12-19 02:39:08 +0000427 if deletions:
428 return s.translate(table, deletions)
429 else:
430 # Add s[:0] so that if s is Unicode and table is an 8-bit string,
431 # table is converted to Unicode. This means that table *cannot*
432 # be a dictionary -- for that feature, use u.translate() directly.
433 return s.translate(table + s[:0])
Guido van Rossum2db91351992-10-18 17:09:59 +0000434
Guido van Rossum8775d8b1996-06-11 18:43:00 +0000435# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
436def capitalize(s):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000437 """capitalize(s) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000438
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000439 Return a copy of the string s with only its first character
440 capitalized.
Guido van Rossum20032041997-12-29 19:26:28 +0000441
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000442 """
443 return s.capitalize()
Guido van Rossum8775d8b1996-06-11 18:43:00 +0000444
Guido van Rossum1eb9a811997-03-25 16:50:31 +0000445# Substring replacement (global)
Guido van Rossum8f0c5a72000-03-10 23:22:10 +0000446def replace(s, old, new, maxsplit=-1):
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000447 """replace (str, old, new[, maxsplit]) -> string
Guido van Rossum20032041997-12-29 19:26:28 +0000448
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000449 Return a copy of string str with all occurrences of substring
450 old replaced by new. If the optional argument maxsplit is
451 given, only the first maxsplit occurrences are replaced.
Guido van Rossum20032041997-12-29 19:26:28 +0000452
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000453 """
454 return s.replace(old, new, maxsplit)
Guido van Rossum1eb9a811997-03-25 16:50:31 +0000455
456
Guido van Rossum2db91351992-10-18 17:09:59 +0000457# Try importing optional built-in module "strop" -- if it exists,
458# it redefines some string operations that are 100-1000 times faster.
Guido van Rossum8e2ec561993-07-29 09:37:38 +0000459# It also defines values for whitespace, lowercase and uppercase
460# that match <ctype.h>'s definitions.
Guido van Rossum2db91351992-10-18 17:09:59 +0000461
462try:
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000463 from strop import maketrans, lowercase, uppercase, whitespace
464 letters = lowercase + uppercase
Guido van Rossumb6775db1994-08-01 11:34:53 +0000465except ImportError:
Fred Drake857c4c32000-02-10 16:21:11 +0000466 pass # Use the original versions