blob: c12cb96af2c7f58288d8fd94f32d7c6c23a16006 [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
4/*
5
6Unicode implementation based on original code by Fredrik Lundh,
7modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
8Unicode Integration Proposal (see file Misc/unicode.txt).
9
Guido van Rossum16b1ad92000-08-03 16:24:25 +000010Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumd8225182000-03-10 22:33:05 +000011
12
13 Original header:
14 --------------------------------------------------------------------
15
16 * Yet another Unicode string type for Python. This type supports the
17 * 16-bit Basic Multilingual Plane (BMP) only.
18 *
19 * Written by Fredrik Lundh, January 1999.
20 *
21 * Copyright (c) 1999 by Secret Labs AB.
22 * Copyright (c) 1999 by Fredrik Lundh.
23 *
24 * fredrik@pythonware.com
25 * http://www.pythonware.com
26 *
27 * --------------------------------------------------------------------
28 * This Unicode String Type is
29 *
30 * Copyright (c) 1999 by Secret Labs AB
31 * Copyright (c) 1999 by Fredrik Lundh
32 *
33 * By obtaining, using, and/or copying this software and/or its
34 * associated documentation, you agree that you have read, understood,
35 * and will comply with the following terms and conditions:
36 *
37 * Permission to use, copy, modify, and distribute this software and its
38 * associated documentation for any purpose and without fee is hereby
39 * granted, provided that the above copyright notice appears in all
40 * copies, and that both that copyright notice and this permission notice
41 * appear in supporting documentation, and that the name of Secret Labs
42 * AB or the author not be used in advertising or publicity pertaining to
43 * distribution of the software without specific, written prior
44 * permission.
45 *
46 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
47 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
48 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
49 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
50 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
51 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
52 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
53 * -------------------------------------------------------------------- */
54
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +000055#include <ctype.h>
Guido van Rossumd8225182000-03-10 22:33:05 +000056
57/* === Internal API ======================================================= */
58
59/* --- Internal Unicode Format -------------------------------------------- */
60
Fredrik Lundh9b14ab32001-06-26 22:59:49 +000061/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
62 properly set, but the default rules below doesn't set it. I'll
63 sort this out some other day -- fredrik@pythonware.com */
64
65#ifndef Py_UNICODE_SIZE
66#error Must define Py_UNICODE_SIZE
67#endif
68
Fredrik Lundh8f455852001-06-27 18:59:43 +000069/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
70 strings are stored as UCS-2 (with limited support for UTF-16) */
71
72#if Py_UNICODE_SIZE >= 4
73#define Py_UNICODE_WIDE
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000074#endif
Fredrik Lundh1294ad02001-06-26 17:17:07 +000075
Guido van Rossumd8225182000-03-10 22:33:05 +000076/* Set these flags if the platform has "wchar.h", "wctype.h" and the
77 wchar_t type is a 16-bit unsigned type */
78/* #define HAVE_WCHAR_H */
79/* #define HAVE_USABLE_WCHAR_T */
80
81/* Defaults for various platforms */
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000082#ifndef PY_UNICODE_TYPE
Guido van Rossumd8225182000-03-10 22:33:05 +000083
Fredrik Lundh1294ad02001-06-26 17:17:07 +000084/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh8f455852001-06-27 18:59:43 +000085# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossumd8225182000-03-10 22:33:05 +000086# define HAVE_USABLE_WCHAR_T
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000087# define PY_UNICODE_TYPE wchar_t
88# endif
89
Fredrik Lundh8f455852001-06-27 18:59:43 +000090# if defined(Py_UNICODE_WIDE)
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000091# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossumd8225182000-03-10 22:33:05 +000092# endif
93
94#endif
95
96/* If the compiler provides a wchar_t type we try to support it
97 through the interface functions PyUnicode_FromWideChar() and
98 PyUnicode_AsWideChar(). */
99
100#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg1a731c62000-08-11 11:43:10 +0000101# ifndef HAVE_WCHAR_H
102# define HAVE_WCHAR_H
103# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000104#endif
105
106#ifdef HAVE_WCHAR_H
Guido van Rossum24bdb042000-03-28 20:29:59 +0000107/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
108# ifdef _HAVE_BSDI
109# include <time.h>
110# endif
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +0000111# include <wchar.h>
Guido van Rossumd8225182000-03-10 22:33:05 +0000112#endif
113
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000114/*
115 * Use this typedef when you need to represent a UTF-16 surrogate pair
116 * as single unsigned integer.
117 */
118#if SIZEOF_INT >= 4
119typedef unsigned int Py_UCS4;
120#elif SIZEOF_LONG >= 4
121typedef unsigned long Py_UCS4;
Guido van Rossumd8225182000-03-10 22:33:05 +0000122#endif
123
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000124typedef PY_UNICODE_TYPE Py_UNICODE;
Marc-André Lemburg43279102000-07-07 09:01:41 +0000125
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000126/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
127
128/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
129 produce different external names and thus cause import errors in
130 case Python interpreters and extensions with mixed compiled in
131 Unicode width assumptions are combined. */
132
133#ifndef Py_UNICODE_WIDE
134
135# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
136# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000137# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000138# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
139# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
140# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
141# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
142# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
143# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
144# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
145# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
146# define PyUnicode_Compare PyUnicodeUCS2_Compare
147# define PyUnicode_Concat PyUnicodeUCS2_Concat
148# define PyUnicode_Contains PyUnicodeUCS2_Contains
149# define PyUnicode_Count PyUnicodeUCS2_Count
150# define PyUnicode_Decode PyUnicodeUCS2_Decode
151# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
152# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
153# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
154# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
155# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000156# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000157# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000158# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000159# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
160# define PyUnicode_Encode PyUnicodeUCS2_Encode
161# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
162# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
163# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
164# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
165# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
166# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
167# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
168# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
169# define PyUnicode_Find PyUnicodeUCS2_Find
170# define PyUnicode_Format PyUnicodeUCS2_Format
171# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
172# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000173# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000174# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
175# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
176# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
177# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
178# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
179# define PyUnicode_Join PyUnicodeUCS2_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000180# define PyUnicode_Partition PyUnicodeUCS2_Partition
181# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
182# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000183# define PyUnicode_Replace PyUnicodeUCS2_Replace
184# define PyUnicode_Resize PyUnicodeUCS2_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000185# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000186# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
187# define PyUnicode_Split PyUnicodeUCS2_Split
188# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
189# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
190# define PyUnicode_Translate PyUnicodeUCS2_Translate
191# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
192# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
193# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
194# define _PyUnicode_Init _PyUnicodeUCS2_Init
195# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
196# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
197# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
198# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
199# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
200# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
201# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
202# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
203# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
204# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
205# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
206# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
207# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
208# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
209# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
210
211#else
212
213# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
214# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000215# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000216# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
217# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
218# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
219# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
220# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
221# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
222# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
223# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
224# define PyUnicode_Compare PyUnicodeUCS4_Compare
225# define PyUnicode_Concat PyUnicodeUCS4_Concat
226# define PyUnicode_Contains PyUnicodeUCS4_Contains
227# define PyUnicode_Count PyUnicodeUCS4_Count
228# define PyUnicode_Decode PyUnicodeUCS4_Decode
229# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
230# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
231# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
232# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
233# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000234# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000235# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000236# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000237# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
238# define PyUnicode_Encode PyUnicodeUCS4_Encode
239# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
240# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
241# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
242# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
243# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
244# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
245# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
246# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
247# define PyUnicode_Find PyUnicodeUCS4_Find
248# define PyUnicode_Format PyUnicodeUCS4_Format
249# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
250# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000251# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000252# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
253# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
254# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
255# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
256# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
257# define PyUnicode_Join PyUnicodeUCS4_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000258# define PyUnicode_Partition PyUnicodeUCS4_Partition
259# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
260# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000261# define PyUnicode_Replace PyUnicodeUCS4_Replace
262# define PyUnicode_Resize PyUnicodeUCS4_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000263# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000264# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
265# define PyUnicode_Split PyUnicodeUCS4_Split
266# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
267# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
268# define PyUnicode_Translate PyUnicodeUCS4_Translate
269# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
270# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
271# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
272# define _PyUnicode_Init _PyUnicodeUCS4_Init
273# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
274# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
275# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
276# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
277# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
278# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
279# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
280# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
281# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
282# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
283# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
284# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
285# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
286# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
287# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
288
289
290#endif
291
Guido van Rossumd8225182000-03-10 22:33:05 +0000292/* --- Internal Unicode Operations ---------------------------------------- */
293
294/* If you want Python to use the compiler's wctype.h functions instead
Barry Warsaw51ac5802000-03-20 16:36:48 +0000295 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
Raymond Hettinger57341c32004-10-31 05:46:59 +0000296 configure Python using --with-wctype-functions. This reduces the
Barry Warsaw51ac5802000-03-20 16:36:48 +0000297 interpreter's code size. */
Guido van Rossumd8225182000-03-10 22:33:05 +0000298
299#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
300
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +0000301#include <wctype.h>
Guido van Rossumd8225182000-03-10 22:33:05 +0000302
303#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
304
305#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
306#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
307#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
308#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
309
310#define Py_UNICODE_TOLOWER(ch) towlower(ch)
311#define Py_UNICODE_TOUPPER(ch) towupper(ch)
312#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
313
314#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
315#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
316#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
317
318#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
319#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
320#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
321
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000322#define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
323
Guido van Rossumd8225182000-03-10 22:33:05 +0000324#else
325
326#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
327
328#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
329#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
330#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
331#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
332
333#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
334#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
335#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
336
337#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
338#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
339#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
340
341#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
342#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
343#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
344
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000345#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000346
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000347#endif
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000348
349#define Py_UNICODE_ISALNUM(ch) \
350 (Py_UNICODE_ISALPHA(ch) || \
351 Py_UNICODE_ISDECIMAL(ch) || \
352 Py_UNICODE_ISDIGIT(ch) || \
353 Py_UNICODE_ISNUMERIC(ch))
354
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000355#define Py_UNICODE_COPY(target, source, length) \
356 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossumd8225182000-03-10 22:33:05 +0000357
358#define Py_UNICODE_FILL(target, value, length) do\
Thomas Wouters477c8d52006-05-27 19:21:47 +0000359 {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
360 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
361 } while (0)
Guido van Rossumd8225182000-03-10 22:33:05 +0000362
Thomas Wouters477c8d52006-05-27 19:21:47 +0000363/* check if substring matches at given offset. the offset must be
364 valid, and the substring must not be empty */
365#define Py_UNICODE_MATCH(string, offset, substring) \
366 ((*((string)->str + (offset)) == *((substring)->str)) && \
367 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
368 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000369
Barry Warsaw51ac5802000-03-20 16:36:48 +0000370#ifdef __cplusplus
371extern "C" {
372#endif
373
Guido van Rossumd8225182000-03-10 22:33:05 +0000374/* --- Unicode Type ------------------------------------------------------- */
375
376typedef struct {
377 PyObject_HEAD
Martin v. Löwis18e16552006-02-15 17:27:45 +0000378 Py_ssize_t length; /* Length of raw Unicode data in buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000379 Py_UNICODE *str; /* Raw Unicode buffer */
380 long hash; /* Hash value; -1 if not set */
Marc-André Lemburgbff879c2000-08-03 18:46:08 +0000381 PyObject *defenc; /* (Default) Encoded version as Python
382 string, or NULL; this is used for
383 implementing the buffer protocol */
Guido van Rossumd8225182000-03-10 22:33:05 +0000384} PyUnicodeObject;
385
Mark Hammond91a681d2002-08-12 07:21:58 +0000386PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Guido van Rossumd8225182000-03-10 22:33:05 +0000387
Thomas Wouters27d517b2007-02-25 20:39:11 +0000388#define PyUnicode_Check(op) \
389 PyType_FastSubclass((op)->ob_type, Py_TPFLAGS_UNICODE_SUBCLASS)
Tim Peters78e0fc72001-09-11 03:07:38 +0000390#define PyUnicode_CheckExact(op) ((op)->ob_type == &PyUnicode_Type)
Guido van Rossumd8225182000-03-10 22:33:05 +0000391
392/* Fast access macros */
393#define PyUnicode_GET_SIZE(op) \
394 (((PyUnicodeObject *)(op))->length)
395#define PyUnicode_GET_DATA_SIZE(op) \
396 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
397#define PyUnicode_AS_UNICODE(op) \
398 (((PyUnicodeObject *)(op))->str)
399#define PyUnicode_AS_DATA(op) \
400 ((const char *)((PyUnicodeObject *)(op))->str)
401
402/* --- Constants ---------------------------------------------------------- */
403
404/* This Unicode character will be used as replacement character during
405 decoding if the errors argument is set to "replace". Note: the
406 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
407 Unicode 3.0. */
408
409#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
410
411/* === Public API ========================================================= */
412
413/* --- Plain Py_UNICODE --------------------------------------------------- */
414
415/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Marc-André Lemburg8155e0e2001-04-23 14:44:21 +0000416 size.
417
418 u may be NULL which causes the contents to be undefined. It is the
419 user's responsibility to fill in the needed data afterwards. Note
420 that modifying the Unicode object contents after construction is
421 only allowed if u was set to NULL.
Guido van Rossumd8225182000-03-10 22:33:05 +0000422
423 The buffer is copied into the new object. */
424
Mark Hammond91a681d2002-08-12 07:21:58 +0000425PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000426 const Py_UNICODE *u, /* Unicode buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000427 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000428 );
429
430/* Return a read-only pointer to the Unicode object's internal
431 Py_UNICODE buffer. */
432
Mark Hammond91a681d2002-08-12 07:21:58 +0000433PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000434 PyObject *unicode /* Unicode object */
435 );
436
437/* Get the length of the Unicode object. */
438
Martin v. Löwis18e16552006-02-15 17:27:45 +0000439PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Guido van Rossumd8225182000-03-10 22:33:05 +0000440 PyObject *unicode /* Unicode object */
441 );
442
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000443/* Get the maximum ordinal for a Unicode character. */
Mark Hammond91a681d2002-08-12 07:21:58 +0000444PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000445
Guido van Rossum52c23592000-04-10 13:41:41 +0000446/* Resize an already allocated Unicode object to the new size length.
447
448 *unicode is modified to point to the new (resized) object and 0
449 returned on success.
450
451 This API may only be called by the function which also called the
452 Unicode constructor. The refcount on the object must be 1. Otherwise,
453 an error is returned.
454
455 Error handling is implemented as follows: an exception is set, -1
456 is returned and *unicode left untouched.
457
458*/
459
Mark Hammond91a681d2002-08-12 07:21:58 +0000460PyAPI_FUNC(int) PyUnicode_Resize(
Guido van Rossum52c23592000-04-10 13:41:41 +0000461 PyObject **unicode, /* Pointer to the Unicode object */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000462 Py_ssize_t length /* New length */
Guido van Rossum52c23592000-04-10 13:41:41 +0000463 );
464
Guido van Rossumd8225182000-03-10 22:33:05 +0000465/* Coerce obj to an Unicode object and return a reference with
466 *incremented* refcount.
467
468 Coercion is done in the following way:
469
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000470 1. String and other char buffer compatible objects are decoded
Fred Drakecb093fe2000-05-09 19:51:53 +0000471 under the assumptions that they contain data using the current
472 default encoding. Decoding is done in "strict" mode.
Guido van Rossumd8225182000-03-10 22:33:05 +0000473
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000474 2. All other objects (including Unicode objects) raise an
475 exception.
Guido van Rossumd8225182000-03-10 22:33:05 +0000476
477 The API returns NULL in case of an error. The caller is responsible
478 for decref'ing the returned objects.
479
480*/
481
Mark Hammond91a681d2002-08-12 07:21:58 +0000482PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000483 register PyObject *obj, /* Object */
484 const char *encoding, /* encoding */
485 const char *errors /* error handling */
486 );
487
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000488/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000489 *incremented* refcount.
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000490
491 Unicode objects are passed back as-is (subclasses are converted to
492 true Unicode objects), all other objects are delegated to
493 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
494 using the default encoding as basis for decoding the object.
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000495
496 The API returns NULL in case of an error. The caller is responsible
497 for decref'ing the returned objects.
498
499*/
500
Mark Hammond91a681d2002-08-12 07:21:58 +0000501PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Guido van Rossumd8225182000-03-10 22:33:05 +0000502 register PyObject *obj /* Object */
503 );
504
505/* --- wchar_t support for platforms which support it --------------------- */
506
507#ifdef HAVE_WCHAR_H
508
509/* Create a Unicode Object from the whcar_t buffer w of the given
510 size.
511
512 The buffer is copied into the new object. */
513
Mark Hammond91a681d2002-08-12 07:21:58 +0000514PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000515 register const wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000516 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000517 );
518
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000519/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossumd8225182000-03-10 22:33:05 +0000520 most size wchar_t characters are copied.
521
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000522 Note that the resulting wchar_t string may or may not be
523 0-terminated. It is the responsibility of the caller to make sure
524 that the wchar_t string is 0-terminated in case this is required by
525 the application.
526
527 Returns the number of wchar_t characters copied (excluding a
528 possibly trailing 0-termination character) or -1 in case of an
Guido van Rossumd8225182000-03-10 22:33:05 +0000529 error. */
530
Martin v. Löwis18e16552006-02-15 17:27:45 +0000531PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000532 PyUnicodeObject *unicode, /* Unicode object */
533 register wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000534 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000535 );
536
537#endif
538
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000539/* --- Unicode ordinals --------------------------------------------------- */
540
541/* Create a Unicode Object from the given Unicode code point ordinal.
542
543 The ordinal must be in range(0x10000) on narrow Python builds
544 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
545 raised in case it is not.
546
547*/
548
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000549PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000550
Guido van Rossumd8225182000-03-10 22:33:05 +0000551/* === Builtin Codecs =====================================================
552
553 Many of these APIs take two arguments encoding and errors. These
554 parameters encoding and errors have the same semantics as the ones
555 of the builtin unicode() API.
556
Fred Drakecb093fe2000-05-09 19:51:53 +0000557 Setting encoding to NULL causes the default encoding to be used.
Guido van Rossumd8225182000-03-10 22:33:05 +0000558
559 Error handling is set by errors which may also be set to NULL
560 meaning to use the default handling defined for the codec. Default
561 error handling for all builtin codecs is "strict" (ValueErrors are
562 raised).
563
564 The codecs all use a similar interface. Only deviation from the
565 generic ones are documented.
566
567*/
568
Fred Drakecb093fe2000-05-09 19:51:53 +0000569/* --- Manage the default encoding ---------------------------------------- */
570
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000571/* Return a Python string holding the default encoded value of the
572 Unicode object.
573
574 The resulting string is cached in the Unicode object for subsequent
575 usage by this function. The cached version is needed to implement
576 the character buffer interface and will live (at least) as long as
577 the Unicode object itself.
578
579 The refcount of the string is *not* incremented.
580
581 *** Exported for internal use by the interpreter only !!! ***
582
583*/
584
Mark Hammond91a681d2002-08-12 07:21:58 +0000585PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000586 PyObject *, const char *);
587
Fred Drakecb093fe2000-05-09 19:51:53 +0000588/* Returns the currently active default encoding.
589
590 The default encoding is currently implemented as run-time settable
591 process global. This may change in future versions of the
592 interpreter to become a parameter which is managed on a per-thread
593 basis.
594
595 */
596
Mark Hammond91a681d2002-08-12 07:21:58 +0000597PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drakecb093fe2000-05-09 19:51:53 +0000598
599/* Sets the currently active default encoding.
600
601 Returns 0 on success, -1 in case of an error.
602
603 */
604
Mark Hammond91a681d2002-08-12 07:21:58 +0000605PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
Fred Drakecb093fe2000-05-09 19:51:53 +0000606 const char *encoding /* Encoding name in standard form */
607 );
608
Guido van Rossumd8225182000-03-10 22:33:05 +0000609/* --- Generic Codecs ----------------------------------------------------- */
610
611/* Create a Unicode object by decoding the encoded string s of the
612 given size. */
613
Mark Hammond91a681d2002-08-12 07:21:58 +0000614PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000615 const char *s, /* encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000616 Py_ssize_t size, /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000617 const char *encoding, /* encoding */
618 const char *errors /* error handling */
619 );
620
621/* Encodes a Py_UNICODE buffer of the given size and returns a
622 Python string object. */
623
Mark Hammond91a681d2002-08-12 07:21:58 +0000624PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000625 const Py_UNICODE *s, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000626 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000627 const char *encoding, /* encoding */
628 const char *errors /* error handling */
629 );
630
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000631/* Encodes a Unicode object and returns the result as Python
632 object. */
633
634PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
635 PyObject *unicode, /* Unicode object */
636 const char *encoding, /* encoding */
637 const char *errors /* error handling */
638 );
639
Guido van Rossumd8225182000-03-10 22:33:05 +0000640/* Encodes a Unicode object and returns the result as Python string
641 object. */
642
Mark Hammond91a681d2002-08-12 07:21:58 +0000643PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Guido van Rossumd8225182000-03-10 22:33:05 +0000644 PyObject *unicode, /* Unicode object */
645 const char *encoding, /* encoding */
646 const char *errors /* error handling */
647 );
648
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000649PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
650 PyObject* string /* 256 character map */
651 );
652
653
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000654/* --- UTF-7 Codecs ------------------------------------------------------- */
655
Mark Hammond91a681d2002-08-12 07:21:58 +0000656PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000657 const char *string, /* UTF-7 encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000658 Py_ssize_t length, /* size of string */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000659 const char *errors /* error handling */
660 );
661
Mark Hammond91a681d2002-08-12 07:21:58 +0000662PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000663 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000664 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000665 int encodeSetO, /* force the encoder to encode characters in
666 Set O, as described in RFC2152 */
667 int encodeWhiteSpace, /* force the encoder to encode space, tab,
668 carriage return and linefeed characters */
669 const char *errors /* error handling */
670 );
671
Guido van Rossumd8225182000-03-10 22:33:05 +0000672/* --- UTF-8 Codecs ------------------------------------------------------- */
673
Mark Hammond91a681d2002-08-12 07:21:58 +0000674PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Guido van Rossumd8225182000-03-10 22:33:05 +0000675 const char *string, /* UTF-8 encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000676 Py_ssize_t length, /* size of string */
Guido van Rossumd8225182000-03-10 22:33:05 +0000677 const char *errors /* error handling */
678 );
679
Walter Dörwald69652032004-09-07 20:24:22 +0000680PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
681 const char *string, /* UTF-8 encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000682 Py_ssize_t length, /* size of string */
Walter Dörwald69652032004-09-07 20:24:22 +0000683 const char *errors, /* error handling */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000684 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000685 );
686
Mark Hammond91a681d2002-08-12 07:21:58 +0000687PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Guido van Rossumd8225182000-03-10 22:33:05 +0000688 PyObject *unicode /* Unicode object */
689 );
690
Mark Hammond91a681d2002-08-12 07:21:58 +0000691PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Guido van Rossumd8225182000-03-10 22:33:05 +0000692 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000693 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000694 const char *errors /* error handling */
695 );
696
697/* --- UTF-16 Codecs ------------------------------------------------------ */
698
Guido van Rossum9e896b32000-04-05 20:11:21 +0000699/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossumd8225182000-03-10 22:33:05 +0000700 the corresponding Unicode object.
701
702 errors (if non-NULL) defines the error handling. It defaults
703 to "strict".
704
705 If byteorder is non-NULL, the decoder starts decoding using the
706 given byte order:
707
708 *byteorder == -1: little endian
709 *byteorder == 0: native order
710 *byteorder == 1: big endian
711
Marc-André Lemburg489b56e2001-05-21 20:30:15 +0000712 In native mode, the first two bytes of the stream are checked for a
713 BOM mark. If found, the BOM mark is analysed, the byte order
714 adjusted and the BOM skipped. In the other modes, no BOM mark
715 interpretation is done. After completion, *byteorder is set to the
716 current byte order at the end of input data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000717
718 If byteorder is NULL, the codec starts in native order mode.
719
720*/
721
Mark Hammond91a681d2002-08-12 07:21:58 +0000722PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Guido van Rossumd8225182000-03-10 22:33:05 +0000723 const char *string, /* UTF-16 encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000724 Py_ssize_t length, /* size of string */
Guido van Rossumd8225182000-03-10 22:33:05 +0000725 const char *errors, /* error handling */
726 int *byteorder /* pointer to byteorder to use
727 0=native;-1=LE,1=BE; updated on
728 exit */
729 );
730
Walter Dörwald69652032004-09-07 20:24:22 +0000731PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
732 const char *string, /* UTF-16 encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000733 Py_ssize_t length, /* size of string */
Walter Dörwald69652032004-09-07 20:24:22 +0000734 const char *errors, /* error handling */
735 int *byteorder, /* pointer to byteorder to use
736 0=native;-1=LE,1=BE; updated on
737 exit */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000738 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000739 );
740
Guido van Rossumd8225182000-03-10 22:33:05 +0000741/* Returns a Python string using the UTF-16 encoding in native byte
742 order. The string always starts with a BOM mark. */
743
Mark Hammond91a681d2002-08-12 07:21:58 +0000744PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Guido van Rossumd8225182000-03-10 22:33:05 +0000745 PyObject *unicode /* Unicode object */
746 );
747
748/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum9e896b32000-04-05 20:11:21 +0000749 the Unicode data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000750
751 If byteorder is not 0, output is written according to the following
752 byte order:
753
754 byteorder == -1: little endian
755 byteorder == 0: native byte order (writes a BOM mark)
756 byteorder == 1: big endian
757
758 If byteorder is 0, the output string will always start with the
759 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
760 prepended.
761
762 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
763 UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters7e474022000-07-16 12:04:32 +0000764 at a later point without compromising the APIs.
Guido van Rossumd8225182000-03-10 22:33:05 +0000765
766*/
767
Mark Hammond91a681d2002-08-12 07:21:58 +0000768PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Guido van Rossumd8225182000-03-10 22:33:05 +0000769 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000770 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000771 const char *errors, /* error handling */
772 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
773 );
774
775/* --- Unicode-Escape Codecs ---------------------------------------------- */
776
Mark Hammond91a681d2002-08-12 07:21:58 +0000777PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Guido van Rossumd8225182000-03-10 22:33:05 +0000778 const char *string, /* Unicode-Escape encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000779 Py_ssize_t length, /* size of string */
Guido van Rossumd8225182000-03-10 22:33:05 +0000780 const char *errors /* error handling */
781 );
782
Mark Hammond91a681d2002-08-12 07:21:58 +0000783PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Guido van Rossumd8225182000-03-10 22:33:05 +0000784 PyObject *unicode /* Unicode object */
785 );
786
Mark Hammond91a681d2002-08-12 07:21:58 +0000787PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Guido van Rossumd8225182000-03-10 22:33:05 +0000788 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000789 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000790 );
791
792/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
793
Mark Hammond91a681d2002-08-12 07:21:58 +0000794PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Guido van Rossumd8225182000-03-10 22:33:05 +0000795 const char *string, /* Raw-Unicode-Escape encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000796 Py_ssize_t length, /* size of string */
Guido van Rossumd8225182000-03-10 22:33:05 +0000797 const char *errors /* error handling */
798 );
799
Mark Hammond91a681d2002-08-12 07:21:58 +0000800PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Guido van Rossumd8225182000-03-10 22:33:05 +0000801 PyObject *unicode /* Unicode object */
802 );
803
Mark Hammond91a681d2002-08-12 07:21:58 +0000804PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossumd8225182000-03-10 22:33:05 +0000805 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000806 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000807 );
808
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000809/* --- Unicode Internal Codec ---------------------------------------------
810
811 Only for internal use in _codecsmodule.c */
812
813PyObject *_PyUnicode_DecodeUnicodeInternal(
814 const char *string,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000815 Py_ssize_t length,
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000816 const char *errors
817 );
818
Guido van Rossumd8225182000-03-10 22:33:05 +0000819/* --- Latin-1 Codecs -----------------------------------------------------
820
821 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
822
823*/
824
Mark Hammond91a681d2002-08-12 07:21:58 +0000825PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Guido van Rossumd8225182000-03-10 22:33:05 +0000826 const char *string, /* Latin-1 encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000827 Py_ssize_t length, /* size of string */
Guido van Rossumd8225182000-03-10 22:33:05 +0000828 const char *errors /* error handling */
829 );
830
Mark Hammond91a681d2002-08-12 07:21:58 +0000831PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Guido van Rossumd8225182000-03-10 22:33:05 +0000832 PyObject *unicode /* Unicode object */
833 );
834
Mark Hammond91a681d2002-08-12 07:21:58 +0000835PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Guido van Rossumd8225182000-03-10 22:33:05 +0000836 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000837 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000838 const char *errors /* error handling */
839 );
840
841/* --- ASCII Codecs -------------------------------------------------------
842
843 Only 7-bit ASCII data is excepted. All other codes generate errors.
844
845*/
846
Mark Hammond91a681d2002-08-12 07:21:58 +0000847PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Guido van Rossumd8225182000-03-10 22:33:05 +0000848 const char *string, /* ASCII encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000849 Py_ssize_t length, /* size of string */
Guido van Rossumd8225182000-03-10 22:33:05 +0000850 const char *errors /* error handling */
851 );
852
Mark Hammond91a681d2002-08-12 07:21:58 +0000853PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Guido van Rossumd8225182000-03-10 22:33:05 +0000854 PyObject *unicode /* Unicode object */
855 );
856
Mark Hammond91a681d2002-08-12 07:21:58 +0000857PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Guido van Rossumd8225182000-03-10 22:33:05 +0000858 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000859 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000860 const char *errors /* error handling */
861 );
862
863/* --- Character Map Codecs -----------------------------------------------
864
865 This codec uses mappings to encode and decode characters.
866
867 Decoding mappings must map single string characters to single
868 Unicode characters, integers (which are then interpreted as Unicode
869 ordinals) or None (meaning "undefined mapping" and causing an
870 error).
871
872 Encoding mappings must map single Unicode characters to single
873 string characters, integers (which are then interpreted as Latin-1
874 ordinals) or None (meaning "undefined mapping" and causing an
875 error).
876
877 If a character lookup fails with a LookupError, the character is
878 copied as-is meaning that its ordinal value will be interpreted as
879 Unicode or Latin-1 ordinal resp. Because of this mappings only need
880 to contain those mappings which map characters to different code
881 points.
882
883*/
884
Mark Hammond91a681d2002-08-12 07:21:58 +0000885PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Guido van Rossumd8225182000-03-10 22:33:05 +0000886 const char *string, /* Encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000887 Py_ssize_t length, /* size of string */
Guido van Rossumd8225182000-03-10 22:33:05 +0000888 PyObject *mapping, /* character mapping
889 (char ordinal -> unicode ordinal) */
890 const char *errors /* error handling */
891 );
892
Mark Hammond91a681d2002-08-12 07:21:58 +0000893PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Guido van Rossumd8225182000-03-10 22:33:05 +0000894 PyObject *unicode, /* Unicode object */
895 PyObject *mapping /* character mapping
896 (unicode ordinal -> char ordinal) */
897 );
898
Mark Hammond91a681d2002-08-12 07:21:58 +0000899PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Guido van Rossumd8225182000-03-10 22:33:05 +0000900 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000901 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000902 PyObject *mapping, /* character mapping
903 (unicode ordinal -> char ordinal) */
904 const char *errors /* error handling */
905 );
906
907/* Translate a Py_UNICODE buffer of the given length by applying a
908 character mapping table to it and return the resulting Unicode
909 object.
910
911 The mapping table must map Unicode ordinal integers to Unicode
912 ordinal integers or None (causing deletion of the character).
913
914 Mapping tables may be dictionaries or sequences. Unmapped character
915 ordinals (ones which cause a LookupError) are left untouched and
916 are copied as-is.
917
918*/
919
Mark Hammond91a681d2002-08-12 07:21:58 +0000920PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Guido van Rossumd8225182000-03-10 22:33:05 +0000921 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000922 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000923 PyObject *table, /* Translate table */
924 const char *errors /* error handling */
925 );
926
Guido van Rossumefec1152000-03-28 02:01:15 +0000927#ifdef MS_WIN32
Guido van Rossum24bdb042000-03-28 20:29:59 +0000928
Guido van Rossumefec1152000-03-28 02:01:15 +0000929/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000930
Mark Hammond91a681d2002-08-12 07:21:58 +0000931PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +0000932 const char *string, /* MBCS encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000933 Py_ssize_t length, /* size of string */
Guido van Rossumefec1152000-03-28 02:01:15 +0000934 const char *errors /* error handling */
935 );
936
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000937PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
938 const char *string, /* MBCS encoded string */
939 Py_ssize_t length, /* size of string */
940 const char *errors, /* error handling */
941 Py_ssize_t *consumed /* bytes consumed */
942 );
943
Mark Hammond91a681d2002-08-12 07:21:58 +0000944PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossumefec1152000-03-28 02:01:15 +0000945 PyObject *unicode /* Unicode object */
946 );
947
Mark Hammond91a681d2002-08-12 07:21:58 +0000948PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +0000949 const Py_UNICODE *data, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000950 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumefec1152000-03-28 02:01:15 +0000951 const char *errors /* error handling */
952 );
953
Guido van Rossumefec1152000-03-28 02:01:15 +0000954#endif /* MS_WIN32 */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000955
Guido van Rossum9e896b32000-04-05 20:11:21 +0000956/* --- Decimal Encoder ---------------------------------------------------- */
957
958/* Takes a Unicode string holding a decimal value and writes it into
959 an output buffer using standard ASCII digit codes.
960
961 The output buffer has to provide at least length+1 bytes of storage
962 area. The output string is 0-terminated.
963
964 The encoder converts whitespace to ' ', decimal characters to their
965 corresponding ASCII digit and all other Latin-1 characters except
966 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
967 are treated as errors. This includes embedded NULL bytes.
968
969 Error handling is defined by the errors argument:
970
971 NULL or "strict": raise a ValueError
972 "ignore": ignore the wrong characters (these are not copied to the
973 output buffer)
974 "replace": replaces illegal characters with '?'
975
976 Returns 0 on success, -1 on failure.
977
978*/
979
Mark Hammond91a681d2002-08-12 07:21:58 +0000980PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Guido van Rossum9e896b32000-04-05 20:11:21 +0000981 Py_UNICODE *s, /* Unicode buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000982 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum9e896b32000-04-05 20:11:21 +0000983 char *output, /* Output buffer; must have size >= length */
984 const char *errors /* error handling */
985 );
986
Guido van Rossumd8225182000-03-10 22:33:05 +0000987/* --- Methods & Slots ----------------------------------------------------
988
989 These are capable of handling Unicode objects and strings on input
990 (we refer to them as strings in the descriptions) and return
991 Unicode objects or integers as apporpriate. */
992
993/* Concat two strings giving a new Unicode string. */
994
Mark Hammond91a681d2002-08-12 07:21:58 +0000995PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Guido van Rossumd8225182000-03-10 22:33:05 +0000996 PyObject *left, /* Left string */
997 PyObject *right /* Right string */
998 );
999
1000/* Split a string giving a list of Unicode strings.
1001
1002 If sep is NULL, splitting will be done at all whitespace
1003 substrings. Otherwise, splits occur at the given separator.
1004
1005 At most maxsplit splits will be done. If negative, no limit is set.
1006
1007 Separators are not included in the resulting list.
1008
1009*/
1010
Mark Hammond91a681d2002-08-12 07:21:58 +00001011PyAPI_FUNC(PyObject*) PyUnicode_Split(
Guido van Rossumd8225182000-03-10 22:33:05 +00001012 PyObject *s, /* String to split */
1013 PyObject *sep, /* String separator */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014 Py_ssize_t maxsplit /* Maxsplit count */
Guido van Rossumd8225182000-03-10 22:33:05 +00001015 );
1016
1017/* Dito, but split at line breaks.
1018
1019 CRLF is considered to be one line break. Line breaks are not
1020 included in the resulting list. */
1021
Mark Hammond91a681d2002-08-12 07:21:58 +00001022PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Guido van Rossumd8225182000-03-10 22:33:05 +00001023 PyObject *s, /* String to split */
Guido van Rossum004d64f2000-04-11 15:39:46 +00001024 int keepends /* If true, line end markers are included */
Guido van Rossumd8225182000-03-10 22:33:05 +00001025 );
1026
Thomas Wouters477c8d52006-05-27 19:21:47 +00001027/* Partition a string using a given separator. */
1028
1029PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1030 PyObject *s, /* String to partition */
1031 PyObject *sep /* String separator */
1032 );
1033
1034/* Partition a string using a given separator, searching from the end of the
1035 string. */
1036
1037PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1038 PyObject *s, /* String to partition */
1039 PyObject *sep /* String separator */
1040 );
1041
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001042/* Split a string giving a list of Unicode strings.
1043
1044 If sep is NULL, splitting will be done at all whitespace
1045 substrings. Otherwise, splits occur at the given separator.
1046
1047 At most maxsplit splits will be done. But unlike PyUnicode_Split
1048 PyUnicode_RSplit splits from the end of the string. If negative,
1049 no limit is set.
1050
1051 Separators are not included in the resulting list.
1052
1053*/
1054
1055PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1056 PyObject *s, /* String to split */
1057 PyObject *sep, /* String separator */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001058 Py_ssize_t maxsplit /* Maxsplit count */
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001059 );
1060
Guido van Rossumd8225182000-03-10 22:33:05 +00001061/* Translate a string by applying a character mapping table to it and
1062 return the resulting Unicode object.
1063
1064 The mapping table must map Unicode ordinal integers to Unicode
1065 ordinal integers or None (causing deletion of the character).
1066
1067 Mapping tables may be dictionaries or sequences. Unmapped character
1068 ordinals (ones which cause a LookupError) are left untouched and
1069 are copied as-is.
1070
1071*/
1072
Mark Hammond91a681d2002-08-12 07:21:58 +00001073PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Guido van Rossumd8225182000-03-10 22:33:05 +00001074 PyObject *str, /* String */
1075 PyObject *table, /* Translate table */
1076 const char *errors /* error handling */
1077 );
1078
1079/* Join a sequence of strings using the given separator and return
1080 the resulting Unicode string. */
1081
Mark Hammond91a681d2002-08-12 07:21:58 +00001082PyAPI_FUNC(PyObject*) PyUnicode_Join(
Guido van Rossumd8225182000-03-10 22:33:05 +00001083 PyObject *separator, /* Separator string */
1084 PyObject *seq /* Sequence object */
1085 );
1086
1087/* Return 1 if substr matches str[start:end] at the given tail end, 0
1088 otherwise. */
1089
Martin v. Löwis18e16552006-02-15 17:27:45 +00001090PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Guido van Rossumd8225182000-03-10 22:33:05 +00001091 PyObject *str, /* String */
1092 PyObject *substr, /* Prefix or Suffix string */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001093 Py_ssize_t start, /* Start index */
1094 Py_ssize_t end, /* Stop index */
Guido van Rossumd8225182000-03-10 22:33:05 +00001095 int direction /* Tail end: -1 prefix, +1 suffix */
1096 );
1097
1098/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg4da6fd62002-05-29 11:33:13 +00001099 given search direction or -1 if not found. -2 is returned in case
1100 an error occurred and an exception is set. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001101
Martin v. Löwis18e16552006-02-15 17:27:45 +00001102PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Guido van Rossumd8225182000-03-10 22:33:05 +00001103 PyObject *str, /* String */
1104 PyObject *substr, /* Substring to find */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001105 Py_ssize_t start, /* Start index */
1106 Py_ssize_t end, /* Stop index */
Guido van Rossumd8225182000-03-10 22:33:05 +00001107 int direction /* Find direction: +1 forward, -1 backward */
1108 );
1109
Barry Warsaw51ac5802000-03-20 16:36:48 +00001110/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001111
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Guido van Rossumd8225182000-03-10 22:33:05 +00001113 PyObject *str, /* String */
1114 PyObject *substr, /* Substring to count */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001115 Py_ssize_t start, /* Start index */
1116 Py_ssize_t end /* Stop index */
Guido van Rossumd8225182000-03-10 22:33:05 +00001117 );
1118
Barry Warsaw51ac5802000-03-20 16:36:48 +00001119/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +00001120 and return the resulting Unicode object. */
1121
Mark Hammond91a681d2002-08-12 07:21:58 +00001122PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Guido van Rossumd8225182000-03-10 22:33:05 +00001123 PyObject *str, /* String */
1124 PyObject *substr, /* Substring to find */
1125 PyObject *replstr, /* Substring to replace */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001126 Py_ssize_t maxcount /* Max. number of replacements to apply;
Guido van Rossumd8225182000-03-10 22:33:05 +00001127 -1 = all */
1128 );
1129
1130/* Compare two strings and return -1, 0, 1 for less than, equal,
1131 greater than resp. */
1132
Mark Hammond91a681d2002-08-12 07:21:58 +00001133PyAPI_FUNC(int) PyUnicode_Compare(
Guido van Rossumd8225182000-03-10 22:33:05 +00001134 PyObject *left, /* Left string */
1135 PyObject *right /* Right string */
1136 );
1137
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001138/* Rich compare two strings and return one of the following:
1139
1140 - NULL in case an exception was raised
1141 - Py_True or Py_False for successfuly comparisons
1142 - Py_NotImplemented in case the type combination is unknown
1143
1144 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1145 case the conversion of the arguments to Unicode fails with a
1146 UnicodeDecodeError.
1147
1148 Possible values for op:
1149
1150 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1151
1152*/
1153
1154PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
1155 PyObject *left, /* Left string */
1156 PyObject *right, /* Right string */
1157 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
1158 );
1159
Thomas Wouters7e474022000-07-16 12:04:32 +00001160/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossumd8225182000-03-10 22:33:05 +00001161 the resulting Unicode string. */
1162
Mark Hammond91a681d2002-08-12 07:21:58 +00001163PyAPI_FUNC(PyObject *) PyUnicode_Format(
Guido van Rossumd8225182000-03-10 22:33:05 +00001164 PyObject *format, /* Format string */
1165 PyObject *args /* Argument tuple or dictionary */
1166 );
1167
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001168/* Checks whether element is contained in container and return 1/0
1169 accordingly.
1170
1171 element has to coerce to an one element Unicode string. -1 is
1172 returned in case of an error. */
1173
Mark Hammond91a681d2002-08-12 07:21:58 +00001174PyAPI_FUNC(int) PyUnicode_Contains(
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001175 PyObject *container, /* Container string */
1176 PyObject *element /* Element string */
1177 );
1178
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001179/* Externally visible for str.strip(unicode) */
Mark Hammond91a681d2002-08-12 07:21:58 +00001180PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001181 PyUnicodeObject *self,
1182 int striptype,
1183 PyObject *sepobj
1184 );
1185
Guido van Rossumd8225182000-03-10 22:33:05 +00001186/* === Characters Type APIs =============================================== */
1187
1188/* These should not be used directly. Use the Py_UNICODE_IS* and
1189 Py_UNICODE_TO* macros instead.
1190
1191 These APIs are implemented in Objects/unicodectype.c.
1192
1193*/
1194
Mark Hammond91a681d2002-08-12 07:21:58 +00001195PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001196 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001197 );
1198
Mark Hammond91a681d2002-08-12 07:21:58 +00001199PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001200 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001201 );
1202
Mark Hammond91a681d2002-08-12 07:21:58 +00001203PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001204 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001205 );
1206
Mark Hammond91a681d2002-08-12 07:21:58 +00001207PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Tim Peters2576c972005-10-29 02:33:18 +00001208 const Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001209 );
1210
Mark Hammond91a681d2002-08-12 07:21:58 +00001211PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Tim Peters2576c972005-10-29 02:33:18 +00001212 const Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001213 );
1214
Mark Hammond91a681d2002-08-12 07:21:58 +00001215PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001216 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001217 );
1218
Mark Hammond91a681d2002-08-12 07:21:58 +00001219PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001220 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001221 );
1222
Mark Hammond91a681d2002-08-12 07:21:58 +00001223PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001224 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001225 );
1226
Mark Hammond91a681d2002-08-12 07:21:58 +00001227PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001228 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001229 );
1230
Mark Hammond91a681d2002-08-12 07:21:58 +00001231PyAPI_FUNC(int) _PyUnicode_ToDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001232 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001233 );
1234
Mark Hammond91a681d2002-08-12 07:21:58 +00001235PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001236 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001237 );
1238
Mark Hammond91a681d2002-08-12 07:21:58 +00001239PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001240 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001241 );
1242
Mark Hammond91a681d2002-08-12 07:21:58 +00001243PyAPI_FUNC(int) _PyUnicode_IsDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001244 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001245 );
1246
Mark Hammond91a681d2002-08-12 07:21:58 +00001247PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001248 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001249 );
1250
Mark Hammond91a681d2002-08-12 07:21:58 +00001251PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001252 Py_UNICODE ch /* Unicode character */
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001253 );
1254
Guido van Rossumd8225182000-03-10 22:33:05 +00001255#ifdef __cplusplus
1256}
1257#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001258#endif /* !Py_UNICODEOBJECT_H */