blob: 3492d021fe1529b886c5edb3c6d8a2022a47bc41 [file] [log] [blame]
Martin v. Löwis2548c732003-04-18 10:39:54 +00001\section{\module{stringprep} ---
2 Internet String Preparation}
3
4\declaremodule{standard}{stringprep}
5\modulesynopsis{String preparation, as per RFC 3453}
6\moduleauthor{Martin v. L\"owis}{martin@v.loewis.de}
7\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de}
8
9When identifying things (such as host names) in the internet, it is
10often necessary to compare such identifications for
11``equality''. Exactly how this comparison is executed may depend on
12the application domain, e.g. whether it should be case-insensitive or
13not. It may be also necessary to restrict the possible
14identifications, to allow only identifications consisting of
15``printable'' characters.
16
17\rfc{3454} defines a procedure for ``preparing'' Unicode strings in
18internet protocols. Before passing strings onto the wire, they are
19processed with the preparation procedure, after which they have a
20certain normalized form. The RFC defines a set of tables, which can be
21combined into profiles. Each profile must define which tables it uses,
22and what other optional parts of the \code{stringprep} procedure are
23part of the profile. One example of a \code{stringprep} profile is
24\code{nameprep}, which is used for internationalized domain names.
25
26The module \module{stringprep} only exposes the tables from RFC
273454. As these tables would be very large to represent them as
28dictionaries or lists, the module uses the Unicode character database
29internally. The module source code itself was generated using the
30\code{mkstringprep.py} utility.
31
32As a result, these tables are exposed as functions, not as data
33structures. There are two kinds of tables in the RFC: sets and
34mappings. For a set, \module{stringprep} provides the ``characteristic
35function'', i.e. a function that returns true if the parameter is part
36of the set. For mappings, it provides the mapping function: given the
37key, it returns the associated value. Below is a list of all functions
38available in the module.
39
40\begin{funcdesc}{in_table_a1}{code}
41Determine whether \var{code} is in table{A.1} (Unassigned code points
42in Unicode 3.2).
43\end{funcdesc}
44
45\begin{funcdesc}{in_table_b1}{code}
46Determine whether \var{code} is in table{B.1} (Commonly mapped to
47nothing).
48\end{funcdesc}
49
50\begin{funcdesc}{map_table_b2}{code}
51Return the mapped value for \var{code} according to table{B.2}
52(Mapping for case-folding used with NFKC).
53\end{funcdesc}
54
55\begin{funcdesc}{map_table_b3}{code}
56Return the mapped value for \var{code} according to table{B.3}
57(Mapping for case-folding used with no normalization).
58\end{funcdesc}
59
60\begin{funcdesc}{in_table_c11}{code}
61Determine whether \var{code} is in table{C.1.1}
62(ASCII space characters).
63\end{funcdesc}
64
65\begin{funcdesc}{in_table_c12}{code}
66Determine whether \var{code} is in table{C.1.2}
67(Non-ASCII space characters).
68\end{funcdesc}
69
70\begin{funcdesc}{in_table_c11_c12}{code}
71Determine whether \var{code} is in table{C.1}
72(Space characters, union of C.1.1 and C.1.2).
73\end{funcdesc}
74
75\begin{funcdesc}{in_table_c21}{code}
76Determine whether \var{code} is in table{C.2.1}
77(ASCII control characters).
78\end{funcdesc}
79
80\begin{funcdesc}{in_table_c22}{code}
81Determine whether \var{code} is in table{C.2.2}
82(Non-ASCII control characters).
83\end{funcdesc}
84
85\begin{funcdesc}{in_table_c21_c22}{code}
86Determine whether \var{code} is in table{C.2}
87(Control characters, union of C.2.1 and C.2.2).
88\end{funcdesc}
89
90\begin{funcdesc}{in_table_c3}{code}
91Determine whether \var{code} is in table{C.3}
92(Private use).
93\end{funcdesc}
94
95\begin{funcdesc}{in_table_c4}{code}
96Determine whether \var{code} is in table{C.4}
97(Non-character code points).
98\end{funcdesc}
99
100\begin{funcdesc}{in_table_c5}{code}
101Determine whether \var{code} is in table{C.5}
102(Surrogate codes).
103\end{funcdesc}
104
105\begin{funcdesc}{in_table_c6}{code}
106Determine whether \var{code} is in table{C.6}
107(Inappropriate for plain text).
108\end{funcdesc}
109
110\begin{funcdesc}{in_table_c7}{code}
111Determine whether \var{code} is in table{C.7}
112(Inappropriate for canonical representation).
113\end{funcdesc}
114
115\begin{funcdesc}{in_table_c8}{code}
116Determine whether \var{code} is in table{C.8}
117(Change display properties or are deprecated).
118\end{funcdesc}
119
120\begin{funcdesc}{in_table_c9}{code}
121Determine whether \var{code} is in table{C.9}
122(Tagging characters).
123\end{funcdesc}
124
125\begin{funcdesc}{in_table_d1}{code}
126Determine whether \var{code} is in table{D.1}
127(Characters with bidirectional property ``R'' or ``AL'').
128\end{funcdesc}
129
130\begin{funcdesc}{in_table_d2}{code}
131Determine whether \var{code} is in table{D.2}
132(Characters with bidirectional property ``L'').
133\end{funcdesc}
134