blob: 56dcf6db61c12b3c1aa183893140fef00c08fde9 [file] [log] [blame]
Fred Drake295da241998-08-10 19:42:37 +00001\section{\module{string} ---
2 Common string operations.}
Fred Drakeb91e9341998-07-23 17:59:49 +00003\declaremodule{standard}{string}
4
5\modulesynopsis{Common string operations.}
6
Guido van Rossum5fdeeea1994-01-02 01:22:07 +00007
8This module defines some constants useful for checking character
Fred Drake6d2bdb61997-12-16 04:04:25 +00009classes and some useful string functions. See the module
Fred Drakecce10901998-03-17 06:33:25 +000010\module{re}\refstmodindex{re} for string functions based on regular
11expressions.
Guido van Rossum0bf4d891995-03-02 12:37:30 +000012
13The constants defined in this module are are:
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000014
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000015\begin{datadesc}{digits}
16 The string \code{'0123456789'}.
17\end{datadesc}
18
19\begin{datadesc}{hexdigits}
20 The string \code{'0123456789abcdefABCDEF'}.
21\end{datadesc}
22
23\begin{datadesc}{letters}
Fred Drakecce10901998-03-17 06:33:25 +000024 The concatenation of the strings \function{lowercase()} and
25 \function{uppercase()} described below.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000026\end{datadesc}
27
28\begin{datadesc}{lowercase}
29 A string containing all the characters that are considered lowercase
30 letters. On most systems this is the string
Guido van Rossum86751151995-02-28 17:14:32 +000031 \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition ---
Fred Drakecce10901998-03-17 06:33:25 +000032 the effect on the routines \function{upper()} and
33 \function{swapcase()} is undefined.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000034\end{datadesc}
35
36\begin{datadesc}{octdigits}
37 The string \code{'01234567'}.
38\end{datadesc}
39
40\begin{datadesc}{uppercase}
41 A string containing all the characters that are considered uppercase
42 letters. On most systems this is the string
Guido van Rossum86751151995-02-28 17:14:32 +000043 \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition ---
Fred Drakecce10901998-03-17 06:33:25 +000044 the effect on the routines \function{lower()} and
45 \function{swapcase()} is undefined.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000046\end{datadesc}
47
48\begin{datadesc}{whitespace}
49 A string containing all characters that are considered whitespace.
50 On most systems this includes the characters space, tab, linefeed,
Guido van Rossum86751151995-02-28 17:14:32 +000051 return, formfeed, and vertical tab. Do not change its definition ---
Fred Drakecce10901998-03-17 06:33:25 +000052 the effect on the routines \function{strip()} and \function{split()}
53 is undefined.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000054\end{datadesc}
55
Guido van Rossum0bf4d891995-03-02 12:37:30 +000056The functions defined in this module are:
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000057
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000058
59\begin{funcdesc}{atof}{s}
Fred Drakee8489761998-12-21 18:56:13 +000060 Convert a string to a floating point number. The string must have
61 the standard syntax for a floating point literal in Python,
Fred Drake951eea41999-02-18 03:49:20 +000062 optionally preceded by a sign (\samp{+} or \samp{-}), or be
63 \code{'NaN'} (case insensitive) to indicate the IEEE ``Not a
64 Number'' value. Note that this behaves identical to the built-in
65 function \function{float()}\bifuncindex{float} when passed a
66 string.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000067\end{funcdesc}
68
Fred Drakecce10901998-03-17 06:33:25 +000069\begin{funcdesc}{atoi}{s\optional{, base}}
Fred Drakee8489761998-12-21 18:56:13 +000070 Convert string \var{s} to an integer in the given \var{base}. The
71 string must consist of one or more digits, optionally preceded by a
72 sign (\samp{+} or \samp{-}). The \var{base} defaults to 10. If it
73 is 0, a default base is chosen depending on the leading characters
74 of the string (after stripping the sign): \samp{0x} or \samp{0X}
75 means 16, \samp{0} means 8, anything else means 10. If \var{base}
76 is 16, a leading \samp{0x} or \samp{0X} is always accepted. Note
77 that when invoked without \var{base} or with \var{base} set to 10,
78 this behaves identical to the built-in function \function{int()}
79 when passed a string. (Also note: for a more flexible
80 interpretation of numeric literals, use the built-in function
81 \function{eval()}\bifuncindex{eval}.)
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000082\end{funcdesc}
83
Fred Drakecce10901998-03-17 06:33:25 +000084\begin{funcdesc}{atol}{s\optional{, base}}
Fred Drakee8489761998-12-21 18:56:13 +000085 Convert string \var{s} to a long integer in the given \var{base}.
86 The string must consist of one or more digits, optionally preceded
87 by a sign (\samp{+} or \samp{-}). The \var{base} argument has the
88 same meaning as for \function{atoi()}. A trailing \samp{l} or
89 \samp{L} is not allowed, except if the base is 0. Note that when
90 invoked without \var{base} or with \var{base} set to 10, this
91 behaves identical to the built-in function
92 \function{long()}\bifuncindex{long} when passed a string.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000093\end{funcdesc}
94
Guido van Rossume5e55d71996-08-09 21:44:51 +000095\begin{funcdesc}{capitalize}{word}
Fred Drakee8489761998-12-21 18:56:13 +000096 Capitalize the first character of the argument.
Guido van Rossume5e55d71996-08-09 21:44:51 +000097\end{funcdesc}
98
99\begin{funcdesc}{capwords}{s}
Fred Drakee8489761998-12-21 18:56:13 +0000100 Split the argument into words using \function{split()}, capitalize
101 each word using \function{capitalize()}, and join the capitalized
102 words using \function{join()}. Note that this replaces runs of
103 whitespace characters by a single space, and removes leading and
104 trailing whitespace.
Guido van Rossume5e55d71996-08-09 21:44:51 +0000105\end{funcdesc}
106
Guido van Rossum9700e9b1999-01-25 22:31:53 +0000107\begin{funcdesc}{expandtabs}{s, \optional{tabsize}}
Fred Drakee8489761998-12-21 18:56:13 +0000108 Expand tabs in a string, i.e.\ replace them by one or more spaces,
109 depending on the current column and the given tab size. The column
110 number is reset to zero after each newline occurring in the string.
111 This doesn't understand other non-printing characters or escape
Guido van Rossum9700e9b1999-01-25 22:31:53 +0000112 sequences. The tab size defaults to 8.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000113\end{funcdesc}
114
Fred Drakecce10901998-03-17 06:33:25 +0000115\begin{funcdesc}{find}{s, sub\optional{, start\optional{,end}}}
Fred Drakee8489761998-12-21 18:56:13 +0000116 Return the lowest index in \var{s} where the substring \var{sub} is
117 found such that \var{sub} is wholly contained in
118 \code{\var{s}[\var{start}:\var{end}]}. Return \code{-1} on failure.
119 Defaults for \var{start} and \var{end} and interpretation of
120 negative values is the same as for slices.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000121\end{funcdesc}
122
Fred Drakecce10901998-03-17 06:33:25 +0000123\begin{funcdesc}{rfind}{s, sub\optional{, start\optional{, end}}}
Fred Drakee8489761998-12-21 18:56:13 +0000124 Like \function{find()} but find the highest index.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000125\end{funcdesc}
126
Fred Drakecce10901998-03-17 06:33:25 +0000127\begin{funcdesc}{index}{s, sub\optional{, start\optional{, end}}}
Fred Drakee8489761998-12-21 18:56:13 +0000128 Like \function{find()} but raise \exception{ValueError} when the
129 substring is not found.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000130\end{funcdesc}
131
Fred Drakecce10901998-03-17 06:33:25 +0000132\begin{funcdesc}{rindex}{s, sub\optional{, start\optional{, end}}}
Fred Drakee8489761998-12-21 18:56:13 +0000133 Like \function{rfind()} but raise \exception{ValueError} when the
134 substring is not found.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000135\end{funcdesc}
136
Fred Drakecce10901998-03-17 06:33:25 +0000137\begin{funcdesc}{count}{s, sub\optional{, start\optional{, end}}}
Fred Drakee8489761998-12-21 18:56:13 +0000138 Return the number of (non-overlapping) occurrences of substring
139 \var{sub} in string \code{\var{s}[\var{start}:\var{end}]}.
140 Defaults for \var{start} and \var{end} and interpretation of
141 negative values is the same as for slices.
Guido van Rossumab3a2501994-08-01 12:18:36 +0000142\end{funcdesc}
143
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000144\begin{funcdesc}{lower}{s}
Fred Drakee8489761998-12-21 18:56:13 +0000145 Return a copy of \var{s}, but with upper case letters converted to
146 lower case.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000147\end{funcdesc}
148
Guido van Rossumf4d0d571996-07-30 18:23:05 +0000149\begin{funcdesc}{maketrans}{from, to}
Fred Drakee8489761998-12-21 18:56:13 +0000150 Return a translation table suitable for passing to
151 \function{translate()} or \function{regex.compile()}, that will map
152 each character in \var{from} into the character at the same position
153 in \var{to}; \var{from} and \var{to} must have the same length.
Guido van Rossuma3eebe61998-06-11 16:03:30 +0000154
Fred Drakee8489761998-12-21 18:56:13 +0000155 \strong{Warning:} don't use strings derived from \code{lowercase}
156 and \code{uppercase} as arguments; in some locales, these don't have
157 the same length. For case conversions, always use
158 \function{lower()} and \function{upper()}.
Guido van Rossumf4d0d571996-07-30 18:23:05 +0000159\end{funcdesc}
160
Fred Drakecce10901998-03-17 06:33:25 +0000161\begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
Fred Drakee8489761998-12-21 18:56:13 +0000162 Return a list of the words of the string \var{s}. If the optional
163 second argument \var{sep} is absent or \code{None}, the words are
164 separated by arbitrary strings of whitespace characters (space, tab,
165 newline, return, formfeed). If the second argument \var{sep} is
166 present and not \code{None}, it specifies a string to be used as the
167 word separator. The returned list will then have one more items
168 than the number of non-overlapping occurrences of the separator in
169 the string. The optional third argument \var{maxsplit} defaults to
170 0. If it is nonzero, at most \var{maxsplit} number of splits occur,
171 and the remainder of the string is returned as the final element of
172 the list (thus, the list will have at most \code{\var{maxsplit}+1}
173 elements).
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000174\end{funcdesc}
175
Fred Drakecce10901998-03-17 06:33:25 +0000176\begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
Fred Drakee8489761998-12-21 18:56:13 +0000177 This function behaves identically to \function{split()}. (In the
178 past, \function{split()} was only used with one argument, while
179 \function{splitfields()} was only used with two arguments.)
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000180\end{funcdesc}
181
Fred Drakecce10901998-03-17 06:33:25 +0000182\begin{funcdesc}{join}{words\optional{, sep}}
Fred Drakee8489761998-12-21 18:56:13 +0000183 Concatenate a list or tuple of words with intervening occurrences of
184 \var{sep}. The default value for \var{sep} is a single space
185 character. It is always true that
186 \samp{string.join(string.split(\var{s}, \var{sep}), \var{sep})}
187 equals \var{s}.
Guido van Rossume5e55d71996-08-09 21:44:51 +0000188\end{funcdesc}
189
Fred Drakecce10901998-03-17 06:33:25 +0000190\begin{funcdesc}{joinfields}{words\optional{, sep}}
Fred Drakee8489761998-12-21 18:56:13 +0000191 This function behaves identical to \function{join()}. (In the past,
192 \function{join()} was only used with one argument, while
193 \function{joinfields()} was only used with two arguments.)
Guido van Rossume5e55d71996-08-09 21:44:51 +0000194\end{funcdesc}
195
196\begin{funcdesc}{lstrip}{s}
Fred Drakee8489761998-12-21 18:56:13 +0000197 Return a copy of \var{s} but without leading whitespace characters.
Guido van Rossume5e55d71996-08-09 21:44:51 +0000198\end{funcdesc}
199
200\begin{funcdesc}{rstrip}{s}
Fred Drakee8489761998-12-21 18:56:13 +0000201 Return a copy of \var{s} but without trailing whitespace
202 characters.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000203\end{funcdesc}
204
205\begin{funcdesc}{strip}{s}
Fred Drakee8489761998-12-21 18:56:13 +0000206 Return a copy of \var{s} without leading or trailing whitespace.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000207\end{funcdesc}
208
209\begin{funcdesc}{swapcase}{s}
Fred Drakee8489761998-12-21 18:56:13 +0000210 Return a copy of \var{s}, but with lower case letters
211 converted to upper case and vice versa.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000212\end{funcdesc}
213
Guido van Rossumf4d0d571996-07-30 18:23:05 +0000214\begin{funcdesc}{translate}{s, table\optional{, deletechars}}
Fred Drakee8489761998-12-21 18:56:13 +0000215 Delete all characters from \var{s} that are in \var{deletechars} (if
216 present), and then translate the characters using \var{table}, which
217 must be a 256-character string giving the translation for each
218 character value, indexed by its ordinal.
Guido van Rossumf65f2781995-09-13 17:37:21 +0000219\end{funcdesc}
220
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000221\begin{funcdesc}{upper}{s}
Fred Drakee8489761998-12-21 18:56:13 +0000222 Return a copy of \var{s}, but with lower case letters converted to
223 upper case.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000224\end{funcdesc}
225
Fred Drakecce10901998-03-17 06:33:25 +0000226\begin{funcdesc}{ljust}{s, width}
227\funcline{rjust}{s, width}
228\funcline{center}{s, width}
Fred Drakee8489761998-12-21 18:56:13 +0000229 These functions respectively left-justify, right-justify and center
230 a string in a field of given width. They return a string that is at
231 least \var{width} characters wide, created by padding the string
232 \var{s} with spaces until the given width on the right, left or both
233 sides. The string is never truncated.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000234\end{funcdesc}
235
Fred Drakecce10901998-03-17 06:33:25 +0000236\begin{funcdesc}{zfill}{s, width}
Fred Drakee8489761998-12-21 18:56:13 +0000237 Pad a numeric string on the left with zero digits until the given
238 width is reached. Strings starting with a sign are handled
239 correctly.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000240\end{funcdesc}
Guido van Rossum0bf4d891995-03-02 12:37:30 +0000241
Guido van Rossum740eb821997-04-02 05:56:16 +0000242\begin{funcdesc}{replace}{str, old, new\optional{, maxsplit}}
Fred Drakee8489761998-12-21 18:56:13 +0000243 Return a copy of string \var{str} with all occurrences of substring
244 \var{old} replaced by \var{new}. If the optional argument
245 \var{maxsplit} is given, the first \var{maxsplit} occurrences are
246 replaced.
Guido van Rossumc8a80cd1997-03-25 16:41:31 +0000247\end{funcdesc}
248
Guido van Rossum0bf4d891995-03-02 12:37:30 +0000249This module is implemented in Python. Much of its functionality has
Fred Drakecce10901998-03-17 06:33:25 +0000250been reimplemented in the built-in module
251\module{strop}\refbimodindex{strop}. However, you
Guido van Rossum0bf4d891995-03-02 12:37:30 +0000252should \emph{never} import the latter module directly. When
Fred Drakecce10901998-03-17 06:33:25 +0000253\module{string} discovers that \module{strop} exists, it transparently
254replaces parts of itself with the implementation from \module{strop}.
Guido van Rossum0bf4d891995-03-02 12:37:30 +0000255After initialization, there is \emph{no} overhead in using
Fred Drakecce10901998-03-17 06:33:25 +0000256\module{string} instead of \module{strop}.