blob: b41b7004866d560b1c4b1badad11d4577fe75f21 [file] [log] [blame]
Fred Drake295da241998-08-10 19:42:37 +00001\section{\module{regsub} ---
Fred Drakeffbe6871999-04-22 21:23:22 +00002 String operations using regular expressions}
Fred Drakeb91e9341998-07-23 17:59:49 +00003
Fred Drakeffbe6871999-04-22 21:23:22 +00004\declaremodule{standard}{regsub}
5\modulesynopsis{Substitution and splitting operations that use
Fred Drakeef5781b2000-09-25 17:23:04 +00006 regular expressions. \strong{Obsolete!}}
Fred Drakeb91e9341998-07-23 17:59:49 +00007
Fred Drake54c39471998-04-09 14:03:00 +00008
Guido van Rossum5fdeeea1994-01-02 01:22:07 +00009This module defines a number of functions useful for working with
Fred Drakeffbe6871999-04-22 21:23:22 +000010regular expressions (see built-in module \refmodule{regex}).
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000011
Guido van Rossum6076ea51996-06-26 19:24:22 +000012Warning: these functions are not thread-safe.
13
Guido van Rossum77796191997-12-30 04:54:47 +000014\strong{Obsolescence note:}
15This module is obsolete as of Python version 1.5; it is still being
16maintained because much existing code still uses it. All new code in
Fred Drakeffbe6871999-04-22 21:23:22 +000017need of regular expressions should use the new \refmodule{re} module, which
Guido van Rossum77796191997-12-30 04:54:47 +000018supports the more powerful and regular Perl-style regular expressions.
19Existing code should be converted. The standard library module
Fred Drakeffbe6871999-04-22 21:23:22 +000020\module{reconvert} helps in converting \refmodule{regex} style regular
21expressions to \refmodule{re} style regular expressions. (For more
Fred Drake54c39471998-04-09 14:03:00 +000022conversion help, see Andrew Kuchling's\index{Kuchling, Andrew}
23``regex-to-re HOWTO'' at
24\url{http://www.python.org/doc/howto/regex-to-re/}.)
Guido van Rossum77796191997-12-30 04:54:47 +000025
Guido van Rossum0b3f9511996-08-09 21:43:21 +000026
Fred Drakecce10901998-03-17 06:33:25 +000027\begin{funcdesc}{sub}{pat, repl, str}
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000028Replace the first occurrence of pattern \var{pat} in string
29\var{str} by replacement \var{repl}. If the pattern isn't found,
30the string is returned unchanged. The pattern may be a string or an
31already compiled pattern. The replacement may contain references
32\samp{\e \var{digit}} to subpatterns and escaped backslashes.
33\end{funcdesc}
34
Fred Drakecce10901998-03-17 06:33:25 +000035\begin{funcdesc}{gsub}{pat, repl, str}
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000036Replace all (non-overlapping) occurrences of pattern \var{pat} in
37string \var{str} by replacement \var{repl}. The same rules as for
38\code{sub()} apply. Empty matches for the pattern are replaced only
39when not adjacent to a previous match, so e.g.
40\code{gsub('', '-', 'abc')} returns \code{'-a-b-c-'}.
41\end{funcdesc}
42
Fred Drakecce10901998-03-17 06:33:25 +000043\begin{funcdesc}{split}{str, pat\optional{, maxsplit}}
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000044Split the string \var{str} in fields separated by delimiters matching
45the pattern \var{pat}, and return a list containing the fields. Only
46non-empty matches for the pattern are considered, so e.g.
47\code{split('a:b', ':*')} returns \code{['a', 'b']} and
Guido van Rossum0b3f9511996-08-09 21:43:21 +000048\code{split('abc', '')} returns \code{['abc']}. The \var{maxsplit}
49defaults to 0. If it is nonzero, only \var{maxsplit} number of splits
50occur, and the remainder of the string is returned as the final
51element of the list.
52\end{funcdesc}
53
Fred Drakecce10901998-03-17 06:33:25 +000054\begin{funcdesc}{splitx}{str, pat\optional{, maxsplit}}
Guido van Rossum0b3f9511996-08-09 21:43:21 +000055Split the string \var{str} in fields separated by delimiters matching
56the pattern \var{pat}, and return a list containing the fields as well
57as the separators. For example, \code{splitx('a:::b', ':*')} returns
58\code{['a', ':::', 'b']}. Otherwise, this function behaves the same
59as \code{split}.
60\end{funcdesc}
61
Fred Drakecce10901998-03-17 06:33:25 +000062\begin{funcdesc}{capwords}{s\optional{, pat}}
Guido van Rossum0b3f9511996-08-09 21:43:21 +000063Capitalize words separated by optional pattern \var{pat}. The default
64pattern uses any characters except letters, digits and underscores as
65word delimiters. Capitalization is done by changing the first
66character of each word to upper case.
Guido van Rossum5fdeeea1994-01-02 01:22:07 +000067\end{funcdesc}
Barry Warsaw736bb061997-02-18 18:59:37 +000068
69\begin{funcdesc}{clear_cache}{}
70The regsub module maintains a cache of compiled regular expressions,
71keyed on the regular expression string and the syntax of the regex
72module at the time the expression was compiled. This function clears
73that cache.
74\end{funcdesc}