blob: 84d73e81722c7a9fb78222f5b3e1f054dc117af6 [file] [log] [blame]
Guido van Rossum5fdeeea1994-01-02 01:22:07 +00001\section{Standard Module \sectcode{string}}
2
3\stmodindex{string}
4
5This module defines some constants useful for checking character
Guido van Rossum0bf4d891995-03-02 12:37:30 +00006classes and some useful string functions. See the modules
7\code{regex} and \code{regsub} for string functions based on regular
8expressions.
9
10The constants defined in this module are are:
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000011
12\renewcommand{\indexsubitem}{(data in module string)}
13\begin{datadesc}{digits}
14 The string \code{'0123456789'}.
15\end{datadesc}
16
17\begin{datadesc}{hexdigits}
18 The string \code{'0123456789abcdefABCDEF'}.
19\end{datadesc}
20
21\begin{datadesc}{letters}
22 The concatenation of the strings \code{lowercase} and
23 \code{uppercase} described below.
24\end{datadesc}
25
26\begin{datadesc}{lowercase}
27 A string containing all the characters that are considered lowercase
28 letters. On most systems this is the string
Guido van Rossum86751151995-02-28 17:14:32 +000029 \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition ---
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000030 the effect on the routines \code{upper} and \code{swapcase} is
31 undefined.
32\end{datadesc}
33
34\begin{datadesc}{octdigits}
35 The string \code{'01234567'}.
36\end{datadesc}
37
38\begin{datadesc}{uppercase}
39 A string containing all the characters that are considered uppercase
40 letters. On most systems this is the string
Guido van Rossum86751151995-02-28 17:14:32 +000041 \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition ---
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000042 the effect on the routines \code{lower} and \code{swapcase} is
43 undefined.
44\end{datadesc}
45
46\begin{datadesc}{whitespace}
47 A string containing all characters that are considered whitespace.
48 On most systems this includes the characters space, tab, linefeed,
Guido van Rossum86751151995-02-28 17:14:32 +000049 return, formfeed, and vertical tab. Do not change its definition ---
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000050 the effect on the routines \code{strip} and \code{split} is
51 undefined.
52\end{datadesc}
53
Guido van Rossum0bf4d891995-03-02 12:37:30 +000054The functions defined in this module are:
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000055
56\renewcommand{\indexsubitem}{(in module string)}
57
58\begin{funcdesc}{atof}{s}
59Convert a string to a floating point number. The string must have
60the standard syntax for a floating point literal in Python, optionally
61preceded by a sign (\samp{+} or \samp{-}).
62\end{funcdesc}
63
64\begin{funcdesc}{atoi}{s}
65Convert a string to an integer. The string must consist of one or more
66digits, optionally preceded by a sign (\samp{+} or \samp{-}).
67\end{funcdesc}
68
69\begin{funcdesc}{atol}{s}
70Convert a string to a long integer. The string must consist of one
71or more digits, optionally preceded by a sign (\samp{+} or \samp{-}).
72\end{funcdesc}
73
74\begin{funcdesc}{expandtabs}{s\, tabsize}
75Expand tabs in a string, i.e. replace them by one or more spaces,
76depending on the current column and the given tab size. The column
77number is reset to zero after each newline occurring in the string.
78This doesn't understand other non-printing characters or escape
79sequences.
80\end{funcdesc}
81
Guido van Rossum16d6e711994-08-08 12:30:22 +000082\begin{funcdesc}{find}{s\, sub\optional{\, start}}
83Return the lowest index in \var{s} not smaller than \var{start} where the
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000084substring \var{sub} is found. Return \code{-1} when \var{sub}
Guido van Rossum16d6e711994-08-08 12:30:22 +000085does not occur as a substring of \var{s} with index at least \var{start}.
86If \var{start} is omitted, it defaults to \code{0}. If \var{start} is
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000087negative, \code{len(\var{s})} is added.
88\end{funcdesc}
89
Guido van Rossum16d6e711994-08-08 12:30:22 +000090\begin{funcdesc}{rfind}{s\, sub\optional{\, start}}
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000091Like \code{find} but finds the highest index.
92\end{funcdesc}
93
Guido van Rossum16d6e711994-08-08 12:30:22 +000094\begin{funcdesc}{index}{s\, sub\optional{\, start}}
Guido van Rossum2828e9d1994-08-17 13:16:34 +000095Like \code{find} but raise \code{ValueError} when the substring is
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000096not found.
97\end{funcdesc}
98
Guido van Rossum16d6e711994-08-08 12:30:22 +000099\begin{funcdesc}{rindex}{s\, sub\optional{\, start}}
Guido van Rossum2828e9d1994-08-17 13:16:34 +0000100Like \code{rfind} but raise \code{ValueError} when the substring is
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000101not found.
102\end{funcdesc}
103
Guido van Rossumab3a2501994-08-01 12:18:36 +0000104\begin{funcdesc}{count}{s\, sub\, i}
105Return the number of (non-overlapping) occurrences of substring
106\var{sub} in string \var{s} with index at least \var{i}.
107If \var{i} is omitted, it defaults to \code{0}.
108\end{funcdesc}
109
Guido van Rossum5fdeeea1994-01-02 01:22:07 +0000110\begin{funcdesc}{lower}{s}
111Convert letters to lower case.
112\end{funcdesc}
113
114\begin{funcdesc}{split}{s}
115Returns a list of the whitespace-delimited words of the string
116\var{s}.
117\end{funcdesc}
118
119\begin{funcdesc}{splitfields}{s\, sep}
120 Returns a list containing the fields of the string \var{s}, using
121 the string \var{sep} as a separator. The list will have one more
122 items than the number of non-overlapping occurrences of the
123 separator in the string. Thus, \code{string.splitfields(\var{s}, '
124 ')} is not the same as \code{string.split(\var{s})}, as the latter
125 only returns non-empty words. As a special case,
126 \code{splitfields(\var{s}, '')} returns \code{[\var{s}]}, for any string
127 \var{s}. (See also \code{regsub.split()}.)
128\end{funcdesc}
129
130\begin{funcdesc}{join}{words}
131Concatenate a list or tuple of words with intervening spaces.
132\end{funcdesc}
133
134\begin{funcdesc}{joinfields}{words\, sep}
135Concatenate a list or tuple of words with intervening separators.
136It is always true that
137\code{string.joinfields(string.splitfields(\var{t}, \var{sep}), \var{sep})}
138equals \var{t}.
139\end{funcdesc}
140
141\begin{funcdesc}{strip}{s}
142Removes leading and trailing whitespace from the string
143\var{s}.
144\end{funcdesc}
145
146\begin{funcdesc}{swapcase}{s}
147Converts lower case letters to upper case and vice versa.
148\end{funcdesc}
149
150\begin{funcdesc}{upper}{s}
151Convert letters to upper case.
152\end{funcdesc}
153
154\begin{funcdesc}{ljust}{s\, width}
155\funcline{rjust}{s\, width}
156\funcline{center}{s\, width}
157These functions respectively left-justify, right-justify and center a
158string in a field of given width.
159They return a string that is at least
160\var{width}
161characters wide, created by padding the string
162\var{s}
163with spaces until the given width on the right, left or both sides.
164The string is never truncated.
165\end{funcdesc}
166
167\begin{funcdesc}{zfill}{s\, width}
168Pad a numeric string on the left with zero digits until the given
169width is reached. Strings starting with a sign are handled correctly.
170\end{funcdesc}
Guido van Rossum0bf4d891995-03-02 12:37:30 +0000171
172This module is implemented in Python. Much of its functionality has
173been reimplemented in the built-in module \code{strop}. However, you
174should \emph{never} import the latter module directly. When
175\code{string} discovers that \code{strop} exists, it transparently
176replaces parts of itself with the implementation from \code{strop}.
177After initialization, there is \emph{no} overhead in using
178\code{string} instead of \code{strop}.
179\bimodindex{strop}