blob: 2614314cb95403159a296c02ba79b912537e82fb [file] [log] [blame]
Martin v. Löwis2548c732003-04-18 10:39:54 +00001\section{\module{stringprep} ---
2 Internet String Preparation}
3
4\declaremodule{standard}{stringprep}
5\modulesynopsis{String preparation, as per RFC 3453}
6\moduleauthor{Martin v. L\"owis}{martin@v.loewis.de}
7\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de}
8
Fredrik Lundh049fd902006-01-11 00:14:29 +00009\versionadded{2.3}
10
Martin v. Löwis2548c732003-04-18 10:39:54 +000011When identifying things (such as host names) in the internet, it is
12often necessary to compare such identifications for
13``equality''. Exactly how this comparison is executed may depend on
14the application domain, e.g. whether it should be case-insensitive or
15not. It may be also necessary to restrict the possible
16identifications, to allow only identifications consisting of
17``printable'' characters.
18
19\rfc{3454} defines a procedure for ``preparing'' Unicode strings in
20internet protocols. Before passing strings onto the wire, they are
21processed with the preparation procedure, after which they have a
22certain normalized form. The RFC defines a set of tables, which can be
23combined into profiles. Each profile must define which tables it uses,
24and what other optional parts of the \code{stringprep} procedure are
25part of the profile. One example of a \code{stringprep} profile is
26\code{nameprep}, which is used for internationalized domain names.
27
28The module \module{stringprep} only exposes the tables from RFC
293454. As these tables would be very large to represent them as
30dictionaries or lists, the module uses the Unicode character database
31internally. The module source code itself was generated using the
32\code{mkstringprep.py} utility.
33
34As a result, these tables are exposed as functions, not as data
35structures. There are two kinds of tables in the RFC: sets and
36mappings. For a set, \module{stringprep} provides the ``characteristic
37function'', i.e. a function that returns true if the parameter is part
38of the set. For mappings, it provides the mapping function: given the
39key, it returns the associated value. Below is a list of all functions
40available in the module.
41
42\begin{funcdesc}{in_table_a1}{code}
43Determine whether \var{code} is in table{A.1} (Unassigned code points
44in Unicode 3.2).
45\end{funcdesc}
46
47\begin{funcdesc}{in_table_b1}{code}
48Determine whether \var{code} is in table{B.1} (Commonly mapped to
49nothing).
50\end{funcdesc}
51
52\begin{funcdesc}{map_table_b2}{code}
53Return the mapped value for \var{code} according to table{B.2}
54(Mapping for case-folding used with NFKC).
55\end{funcdesc}
56
57\begin{funcdesc}{map_table_b3}{code}
58Return the mapped value for \var{code} according to table{B.3}
59(Mapping for case-folding used with no normalization).
60\end{funcdesc}
61
62\begin{funcdesc}{in_table_c11}{code}
63Determine whether \var{code} is in table{C.1.1}
64(ASCII space characters).
65\end{funcdesc}
66
67\begin{funcdesc}{in_table_c12}{code}
68Determine whether \var{code} is in table{C.1.2}
69(Non-ASCII space characters).
70\end{funcdesc}
71
72\begin{funcdesc}{in_table_c11_c12}{code}
73Determine whether \var{code} is in table{C.1}
74(Space characters, union of C.1.1 and C.1.2).
75\end{funcdesc}
76
77\begin{funcdesc}{in_table_c21}{code}
78Determine whether \var{code} is in table{C.2.1}
79(ASCII control characters).
80\end{funcdesc}
81
82\begin{funcdesc}{in_table_c22}{code}
83Determine whether \var{code} is in table{C.2.2}
84(Non-ASCII control characters).
85\end{funcdesc}
86
87\begin{funcdesc}{in_table_c21_c22}{code}
88Determine whether \var{code} is in table{C.2}
89(Control characters, union of C.2.1 and C.2.2).
90\end{funcdesc}
91
92\begin{funcdesc}{in_table_c3}{code}
93Determine whether \var{code} is in table{C.3}
94(Private use).
95\end{funcdesc}
96
97\begin{funcdesc}{in_table_c4}{code}
98Determine whether \var{code} is in table{C.4}
99(Non-character code points).
100\end{funcdesc}
101
102\begin{funcdesc}{in_table_c5}{code}
103Determine whether \var{code} is in table{C.5}
104(Surrogate codes).
105\end{funcdesc}
106
107\begin{funcdesc}{in_table_c6}{code}
108Determine whether \var{code} is in table{C.6}
109(Inappropriate for plain text).
110\end{funcdesc}
111
112\begin{funcdesc}{in_table_c7}{code}
113Determine whether \var{code} is in table{C.7}
114(Inappropriate for canonical representation).
115\end{funcdesc}
116
117\begin{funcdesc}{in_table_c8}{code}
118Determine whether \var{code} is in table{C.8}
119(Change display properties or are deprecated).
120\end{funcdesc}
121
122\begin{funcdesc}{in_table_c9}{code}
123Determine whether \var{code} is in table{C.9}
124(Tagging characters).
125\end{funcdesc}
126
127\begin{funcdesc}{in_table_d1}{code}
128Determine whether \var{code} is in table{D.1}
129(Characters with bidirectional property ``R'' or ``AL'').
130\end{funcdesc}
131
132\begin{funcdesc}{in_table_d2}{code}
133Determine whether \var{code} is in table{D.2}
134(Characters with bidirectional property ``L'').
135\end{funcdesc}
136