blob: 1eae492c18161d4d7079a9b15e5f697f60d3e68b [file] [log] [blame]
Fred Drake28b29442000-06-13 20:50:50 +00001\section{\module{unicodedata} ---
2 Unicode Database}
3
4\declaremodule{standard}{unicodedata}
5\modulesynopsis{Access the Unicode Database.}
6\moduleauthor{Marc-Andre Lemburg}{mal@lemburg.com}
7\sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
8
9
10\index{Unicode}
11\index{character}
12\indexii{Unicode}{database}
13
14This module provides access to the Unicode Character Database which
15defines character properties for all Unicode characters. The data in
16this database is based on the \file{UnicodeData.txt} file version
173.0.0 which is publically available from \url{ftp://ftp.unicode.org/}.
18
19The module uses the same names and symbols as defined by the
20UnicodeData File Format 3.0.0 (see
21\url{ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.html}). It
22defines the following functions:
23
24\begin{funcdesc}{decimal}{unichr\optional{, default}}
25 Returns the decimal value assigned to the Unicode character
26 \var{unichr} as integer. If no such value is defined,
27 \var{default} is returned, or, if not given,
28 \exception{ValueError} is raised.
29\end{funcdesc}
30
31\begin{funcdesc}{digit}{unichr\optional{, default}}
32 Returns the digit value assigned to the Unicode character
33 \var{unichr} as integer. If no such value is defined,
34 \var{default} is returned, or, if not given,
35 \exception{ValueError} is raised.
36\end{funcdesc}
37
38\begin{funcdesc}{numeric}{unichr\optional{, default}}
39 Returns the numeric value assigned to the Unicode character
40 \var{unichr} as float. If no such value is defined, \var{default} is
41 returned, or, if not given, \exception{ValueError} is raised.
42\end{funcdesc}
43
44\begin{funcdesc}{category}{unichr}
45 Returns the general category assigned to the Unicode character
46 \var{unichr} as string.
47\end{funcdesc}
48
49\begin{funcdesc}{bidirectional}{unichr}
50 Returns the bidirectional category assigned to the Unicode character
51 \var{unichr} as string. If no such value is defined, an empty string
52 is returned.
53\end{funcdesc}
54
55\begin{funcdesc}{combining}{unichr}
56 Returns the canonical combining class assigned to the Unicode
57 character \var{unichr} as integer. Returns \code{0} if no combining
58 class is defined.
59\end{funcdesc}
60
61\begin{funcdesc}{mirrored}{unichr}
62 Returns the mirrored property of assigned to the Unicode character
63 \var{unichr} as integer. Returns \code{1} if the character has been
64 identified as a ``mirrored'' character in bidirectional text,
65 \code{0} otherwise.
66\end{funcdesc}
67
68\begin{funcdesc}{decomposition}{unichr}
69 Returns the character decomposition mapping assigned to the Unicode
70 character \var{unichr} as string. An empty string is returned in case
71 no such mapping is defined.
72\end{funcdesc}