blob: c3e6f6f9ed49f5ac35e8d12e24e77103f8f8be98 [file] [log] [blame]
Guido van Rossumaad67612000-05-08 17:31:04 +00001# module 'string' -- A collection of string operations
2
3# Warning: most of the code you see here isn't normally used nowadays. With
4# Python 1.6, many of these functions are implemented as methods on the
5# standard string object. They used to be implemented by a built-in module
6# called strop, but strop is now obsolete itself.
7
8"""Common string manipulations.
9
10Public module variables:
11
12whitespace -- a string containing all characters considered whitespace
13lowercase -- a string containing all characters considered lowercase letters
14uppercase -- a string containing all characters considered uppercase letters
15letters -- a string containing all characters considered letters
16digits -- a string containing all characters considered decimal digits
17hexdigits -- a string containing all characters considered hexadecimal digits
18octdigits -- a string containing all characters considered octal digits
19
20"""
21
22# Some strings for ctype-style character classification
23whitespace = ' \t\n\r\v\f'
24lowercase = 'abcdefghijklmnopqrstuvwxyz'
25uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
26letters = lowercase + uppercase
27digits = '0123456789'
28hexdigits = digits + 'abcdef' + 'ABCDEF'
29octdigits = '01234567'
30
31# Case conversion helpers
32_idmap = ''
33for i in range(256): _idmap = _idmap + chr(i)
34del i
35
36# Backward compatible names for exceptions
37index_error = ValueError
38atoi_error = ValueError
39atof_error = ValueError
40atol_error = ValueError
41
42# convert UPPER CASE letters to lower case
43def lower(s):
44 """lower(s) -> string
45
46 Return a copy of the string s converted to lowercase.
47
48 """
49 return s.lower()
50
51# Convert lower case letters to UPPER CASE
52def upper(s):
53 """upper(s) -> string
54
55 Return a copy of the string s converted to uppercase.
56
57 """
58 return s.upper()
59
60# Swap lower case letters and UPPER CASE
61def swapcase(s):
62 """swapcase(s) -> string
63
64 Return a copy of the string s with upper case characters
65 converted to lowercase and vice versa.
66
67 """
68 return s.swapcase()
69
70# Strip leading and trailing tabs and spaces
71def strip(s):
72 """strip(s) -> string
73
74 Return a copy of the string s with leading and trailing
75 whitespace removed.
76
77 """
78 return s.strip()
79
80# Strip leading tabs and spaces
81def lstrip(s):
82 """lstrip(s) -> string
83
84 Return a copy of the string s with leading whitespace removed.
85
86 """
87 return s.lstrip()
88
89# Strip trailing tabs and spaces
90def rstrip(s):
91 """rstrip(s) -> string
92
93 Return a copy of the string s with trailing whitespace
94 removed.
95
96 """
97 return s.rstrip()
98
99
100# Split a string into a list of space/tab-separated words
101# NB: split(s) is NOT the same as splitfields(s, ' ')!
102def split(s, sep=None, maxsplit=0):
103 """split(str [,sep [,maxsplit]]) -> list of strings
104
105 Return a list of the words in the string s, using sep as the
106 delimiter string. If maxsplit is nonzero, splits into at most
107 maxsplit words If sep is not specified, any whitespace string
108 is a separator. Maxsplit defaults to 0.
109
110 (split and splitfields are synonymous)
111
112 """
113 return s.split(sep, maxsplit)
114splitfields = split
115
116# Join fields with optional separator
117def join(words, sep = ' '):
118 """join(list [,sep]) -> string
119
120 Return a string composed of the words in list, with
Thomas Wouters7e474022000-07-16 12:04:32 +0000121 intervening occurrences of sep. The default separator is a
Guido van Rossumaad67612000-05-08 17:31:04 +0000122 single space.
123
124 (joinfields and join are synonymous)
125
126 """
127 return sep.join(words)
128joinfields = join
129
130# for a little bit of speed
131_apply = apply
132
133# Find substring, raise exception if not found
134def index(s, *args):
135 """index(s, sub [,start [,end]]) -> int
136
137 Like find but raises ValueError when the substring is not found.
138
139 """
140 return _apply(s.index, args)
141
142# Find last substring, raise exception if not found
143def rindex(s, *args):
144 """rindex(s, sub [,start [,end]]) -> int
145
146 Like rfind but raises ValueError when the substring is not found.
147
148 """
149 return _apply(s.rindex, args)
150
151# Count non-overlapping occurrences of substring
152def count(s, *args):
153 """count(s, sub[, start[,end]]) -> int
154
155 Return the number of occurrences of substring sub in string
156 s[start:end]. Optional arguments start and end are
157 interpreted as in slice notation.
158
159 """
160 return _apply(s.count, args)
161
162# Find substring, return -1 if not found
163def find(s, *args):
164 """find(s, sub [,start [,end]]) -> in
165
166 Return the lowest index in s where substring sub is found,
167 such that sub is contained within s[start,end]. Optional
168 arguments start and end are interpreted as in slice notation.
169
170 Return -1 on failure.
171
172 """
173 return _apply(s.find, args)
174
175# Find last substring, return -1 if not found
176def rfind(s, *args):
177 """rfind(s, sub [,start [,end]]) -> int
178
179 Return the highest index in s where substring sub is found,
180 such that sub is contained within s[start,end]. Optional
181 arguments start and end are interpreted as in slice notation.
182
183 Return -1 on failure.
184
185 """
186 return _apply(s.rfind, args)
187
188# for a bit of speed
189_float = float
190_int = int
191_long = long
192_StringType = type('')
193
194# Convert string to float
195def atof(s):
196 """atof(s) -> float
197
198 Return the floating point number represented by the string s.
199
200 """
201 if type(s) == _StringType:
202 return _float(s)
203 else:
204 raise TypeError('argument 1: expected string, %s found' %
205 type(s).__name__)
206
207# Convert string to integer
208def atoi(*args):
209 """atoi(s [,base]) -> int
210
211 Return the integer represented by the string s in the given
212 base, which defaults to 10. The string s must consist of one
213 or more digits, possibly preceded by a sign. If base is 0, it
214 is chosen from the leading characters of s, 0 for octal, 0x or
215 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
216 accepted.
217
218 """
219 try:
220 s = args[0]
221 except IndexError:
222 raise TypeError('function requires at least 1 argument: %d given' %
223 len(args))
224 # Don't catch type error resulting from too many arguments to int(). The
225 # error message isn't compatible but the error type is, and this function
226 # is complicated enough already.
227 if type(s) == _StringType:
228 return _apply(_int, args)
229 else:
230 raise TypeError('argument 1: expected string, %s found' %
231 type(s).__name__)
232
233
234# Convert string to long integer
235def atol(*args):
236 """atol(s [,base]) -> long
237
238 Return the long integer represented by the string s in the
239 given base, which defaults to 10. The string s must consist
240 of one or more digits, possibly preceded by a sign. If base
241 is 0, it is chosen from the leading characters of s, 0 for
242 octal, 0x or 0X for hexadecimal. If base is 16, a preceding
243 0x or 0X is accepted. A trailing L or l is not accepted,
244 unless base is 0.
245
246 """
247 try:
248 s = args[0]
249 except IndexError:
250 raise TypeError('function requires at least 1 argument: %d given' %
251 len(args))
252 # Don't catch type error resulting from too many arguments to long(). The
253 # error message isn't compatible but the error type is, and this function
254 # is complicated enough already.
255 if type(s) == _StringType:
256 return _apply(_long, args)
257 else:
258 raise TypeError('argument 1: expected string, %s found' %
259 type(s).__name__)
260
261
262# Left-justify a string
263def ljust(s, width):
264 """ljust(s, width) -> string
265
266 Return a left-justified version of s, in a field of the
267 specified width, padded with spaces as needed. The string is
268 never truncated.
269
270 """
271 n = width - len(s)
272 if n <= 0: return s
273 return s + ' '*n
274
275# Right-justify a string
276def rjust(s, width):
277 """rjust(s, width) -> string
278
279 Return a right-justified version of s, in a field of the
280 specified width, padded with spaces as needed. The string is
281 never truncated.
282
283 """
284 n = width - len(s)
285 if n <= 0: return s
286 return ' '*n + s
287
288# Center a string
289def center(s, width):
290 """center(s, width) -> string
291
292 Return a center version of s, in a field of the specified
293 width. padded with spaces as needed. The string is never
294 truncated.
295
296 """
297 n = width - len(s)
298 if n <= 0: return s
299 half = n/2
300 if n%2 and width%2:
301 # This ensures that center(center(s, i), j) = center(s, j)
302 half = half+1
303 return ' '*half + s + ' '*(n-half)
304
305# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
306# Decadent feature: the argument may be a string or a number
307# (Use of this is deprecated; it should be a string as with ljust c.s.)
308def zfill(x, width):
309 """zfill(x, width) -> string
310
311 Pad a numeric string x with zeros on the left, to fill a field
312 of the specified width. The string x is never truncated.
313
314 """
315 if type(x) == type(''): s = x
316 else: s = `x`
317 n = len(s)
318 if n >= width: return s
319 sign = ''
320 if s[0] in ('-', '+'):
321 sign, s = s[0], s[1:]
322 return sign + '0'*(width-n) + s
323
324# Expand tabs in a string.
325# Doesn't take non-printing chars into account, but does understand \n.
326def expandtabs(s, tabsize=8):
327 """expandtabs(s [,tabsize]) -> string
328
329 Return a copy of the string s with all tab characters replaced
330 by the appropriate number of spaces, depending on the current
331 column, and the tabsize (default 8).
332
333 """
334 res = line = ''
335 for c in s:
336 if c == '\t':
337 c = ' '*(tabsize - len(line) % tabsize)
338 line = line + c
339 if c == '\n':
340 res = res + line
341 line = ''
342 return res + line
343
344# Character translation through look-up table.
345def translate(s, table, deletions=""):
346 """translate(s,table [,deletechars]) -> string
347
348 Return a copy of the string s, where all characters occurring
349 in the optional argument deletechars are removed, and the
350 remaining characters have been mapped through the given
351 translation table, which must be a string of length 256.
352
353 """
354 return s.translate(table, deletions)
355
356# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
357def capitalize(s):
358 """capitalize(s) -> string
359
360 Return a copy of the string s with only its first character
361 capitalized.
362
363 """
364 return s.capitalize()
365
366# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
367# See also regsub.capwords().
368def capwords(s, sep=None):
369 """capwords(s, [sep]) -> string
370
371 Split the argument into words using split, capitalize each
372 word using capitalize, and join the capitalized words using
373 join. Note that this replaces runs of whitespace characters by
374 a single space.
375
376 """
377 return join(map(capitalize, s.split(sep)), sep or ' ')
378
379# Construct a translation string
380_idmapL = None
381def maketrans(fromstr, tostr):
382 """maketrans(frm, to) -> string
383
384 Return a translation table (a string of 256 bytes long)
385 suitable for use in string.translate. The strings frm and to
386 must be of the same length.
387
388 """
389 if len(fromstr) != len(tostr):
390 raise ValueError, "maketrans arguments must have same length"
391 global _idmapL
392 if not _idmapL:
393 _idmapL = map(None, _idmap)
394 L = _idmapL[:]
395 fromstr = map(ord, fromstr)
396 for i in range(len(fromstr)):
397 L[fromstr[i]] = tostr[i]
398 return joinfields(L, "")
399
400# Substring replacement (global)
401def replace(s, old, new, maxsplit=0):
402 """replace (str, old, new[, maxsplit]) -> string
403
404 Return a copy of string str with all occurrences of substring
405 old replaced by new. If the optional argument maxsplit is
406 given, only the first maxsplit occurrences are replaced.
407
408 """
409 return s.replace(old, new, maxsplit)
410
411
412# XXX: transitional
413#
414# If string objects do not have methods, then we need to use the old string.py
415# library, which uses strop for many more things than just the few outlined
416# below.
417try:
418 ''.upper
419except AttributeError:
420 from stringold import *
421
422# Try importing optional built-in module "strop" -- if it exists,
423# it redefines some string operations that are 100-1000 times faster.
424# It also defines values for whitespace, lowercase and uppercase
425# that match <ctype.h>'s definitions.
426
427try:
428 from strop import maketrans, lowercase, uppercase, whitespace
429 letters = lowercase + uppercase
430except ImportError:
431 pass # Use the original versions