blob: 2a207797f30adaee0bb3836b2780a2472d611064 [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
Christian Heimesaf98da12008-01-27 15:18:18 +00004#include <stdarg.h>
5
Guido van Rossumd8225182000-03-10 22:33:05 +00006/*
7
8Unicode implementation based on original code by Fredrik Lundh,
9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
Alexander Belopolsky83283c22010-11-16 14:29:01 +000010Unicode Integration Proposal. (See
11http://www.egenix.com/files/python/unicode-proposal.txt).
Guido van Rossumd8225182000-03-10 22:33:05 +000012
Guido van Rossum16b1ad92000-08-03 16:24:25 +000013Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumd8225182000-03-10 22:33:05 +000014
15
16 Original header:
17 --------------------------------------------------------------------
18
19 * Yet another Unicode string type for Python. This type supports the
20 * 16-bit Basic Multilingual Plane (BMP) only.
21 *
22 * Written by Fredrik Lundh, January 1999.
23 *
24 * Copyright (c) 1999 by Secret Labs AB.
25 * Copyright (c) 1999 by Fredrik Lundh.
26 *
27 * fredrik@pythonware.com
28 * http://www.pythonware.com
29 *
30 * --------------------------------------------------------------------
31 * This Unicode String Type is
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000032 *
Guido van Rossumd8225182000-03-10 22:33:05 +000033 * Copyright (c) 1999 by Secret Labs AB
34 * Copyright (c) 1999 by Fredrik Lundh
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000035 *
Guido van Rossumd8225182000-03-10 22:33:05 +000036 * By obtaining, using, and/or copying this software and/or its
37 * associated documentation, you agree that you have read, understood,
38 * and will comply with the following terms and conditions:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 *
Guido van Rossumd8225182000-03-10 22:33:05 +000040 * Permission to use, copy, modify, and distribute this software and its
41 * associated documentation for any purpose and without fee is hereby
42 * granted, provided that the above copyright notice appears in all
43 * copies, and that both that copyright notice and this permission notice
44 * appear in supporting documentation, and that the name of Secret Labs
45 * AB or the author not be used in advertising or publicity pertaining to
46 * distribution of the software without specific, written prior
47 * permission.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 *
Guido van Rossumd8225182000-03-10 22:33:05 +000049 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56 * -------------------------------------------------------------------- */
57
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +000058#include <ctype.h>
Guido van Rossumd8225182000-03-10 22:33:05 +000059
60/* === Internal API ======================================================= */
61
62/* --- Internal Unicode Format -------------------------------------------- */
63
Christian Heimes0625e892008-01-07 21:04:21 +000064/* Python 3.x requires unicode */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065#define Py_USING_UNICODE
Christian Heimes0625e892008-01-07 21:04:21 +000066
Fredrik Lundh9b14ab32001-06-26 22:59:49 +000067/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
68 properly set, but the default rules below doesn't set it. I'll
69 sort this out some other day -- fredrik@pythonware.com */
70
71#ifndef Py_UNICODE_SIZE
72#error Must define Py_UNICODE_SIZE
73#endif
74
Fredrik Lundh8f455852001-06-27 18:59:43 +000075/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
76 strings are stored as UCS-2 (with limited support for UTF-16) */
77
78#if Py_UNICODE_SIZE >= 4
79#define Py_UNICODE_WIDE
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000080#endif
Fredrik Lundh1294ad02001-06-26 17:17:07 +000081
Amaury Forgeot d'Arcfeb73072010-09-12 22:42:57 +000082/* Set these flags if the platform has "wchar.h" and the
Guido van Rossumd8225182000-03-10 22:33:05 +000083 wchar_t type is a 16-bit unsigned type */
84/* #define HAVE_WCHAR_H */
85/* #define HAVE_USABLE_WCHAR_T */
86
87/* Defaults for various platforms */
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000088#ifndef PY_UNICODE_TYPE
Guido van Rossumd8225182000-03-10 22:33:05 +000089
Fredrik Lundh1294ad02001-06-26 17:17:07 +000090/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh8f455852001-06-27 18:59:43 +000091# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossumd8225182000-03-10 22:33:05 +000092# define HAVE_USABLE_WCHAR_T
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000093# define PY_UNICODE_TYPE wchar_t
94# endif
95
Fredrik Lundh8f455852001-06-27 18:59:43 +000096# if defined(Py_UNICODE_WIDE)
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000097# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossumd8225182000-03-10 22:33:05 +000098# endif
99
100#endif
101
102/* If the compiler provides a wchar_t type we try to support it
Victor Stinner137c34c2010-09-29 10:25:54 +0000103 through the interface functions PyUnicode_FromWideChar(),
104 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
Guido van Rossumd8225182000-03-10 22:33:05 +0000105
106#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg1a731c62000-08-11 11:43:10 +0000107# ifndef HAVE_WCHAR_H
108# define HAVE_WCHAR_H
109# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000110#endif
111
112#ifdef HAVE_WCHAR_H
Guido van Rossum24bdb042000-03-28 20:29:59 +0000113/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
114# ifdef _HAVE_BSDI
115# include <time.h>
116# endif
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +0000117# include <wchar.h>
Guido van Rossumd8225182000-03-10 22:33:05 +0000118#endif
119
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000120/*
121 * Use this typedef when you need to represent a UTF-16 surrogate pair
122 * as single unsigned integer.
123 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124#if SIZEOF_INT >= 4
125typedef unsigned int Py_UCS4;
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000126#elif SIZEOF_LONG >= 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127typedef unsigned long Py_UCS4;
Guido van Rossumd8225182000-03-10 22:33:05 +0000128#endif
129
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000130/* Py_UNICODE is the native Unicode storage format (code unit) used by
131 Python and represents a single Unicode element in the Unicode
132 type. */
133
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000134typedef PY_UNICODE_TYPE Py_UNICODE;
Marc-André Lemburg43279102000-07-07 09:01:41 +0000135
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000136/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
137
138/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
139 produce different external names and thus cause import errors in
140 case Python interpreters and extensions with mixed compiled in
141 Unicode width assumptions are combined. */
142
143#ifndef Py_UNICODE_WIDE
144
145# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
146# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000147# define PyUnicode_AsDecodedObject PyUnicodeUCS2_AsDecodedObject
148# define PyUnicode_AsDecodedUnicode PyUnicodeUCS2_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000149# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000150# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000151# define PyUnicode_AsEncodedUnicode PyUnicodeUCS2_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000152# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
153# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000154# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000155# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
156# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
157# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
158# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
159# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000160# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000161# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000162# define PyUnicode_Compare PyUnicodeUCS2_Compare
Victor Stinner09f24bb2010-10-24 20:38:25 +0000163# define PyUnicode_CompareWithASCIIString PyUnicodeUCS2_CompareWithASCIIString
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000164# define PyUnicode_Concat PyUnicodeUCS2_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000165# define PyUnicode_Append PyUnicodeUCS2_Append
166# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000167# define PyUnicode_Contains PyUnicodeUCS2_Contains
168# define PyUnicode_Count PyUnicodeUCS2_Count
169# define PyUnicode_Decode PyUnicodeUCS2_Decode
170# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
171# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
172# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000173# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000174# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000175# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000176# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
177# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000178# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000179# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000180# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000181# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000182# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
183# define PyUnicode_Encode PyUnicodeUCS2_Encode
184# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
185# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
186# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
187# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
188# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000189# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000190# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
191# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
192# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
193# define PyUnicode_Find PyUnicodeUCS2_Find
194# define PyUnicode_Format PyUnicodeUCS2_Format
195# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000196# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
197# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000198# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000199# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000200# define PyUnicode_FromString PyUnicodeUCS2_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000201# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000202# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
Walter Dörwald14176a52007-05-18 17:04:42 +0000203# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000204# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000205# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000206# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
207# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
208# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000209# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000210# define PyUnicode_Join PyUnicodeUCS2_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000211# define PyUnicode_Partition PyUnicodeUCS2_Partition
212# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
213# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000214# define PyUnicode_Replace PyUnicodeUCS2_Replace
215# define PyUnicode_Resize PyUnicodeUCS2_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000216# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000217# define PyUnicode_Split PyUnicodeUCS2_Split
218# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
219# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
220# define PyUnicode_Translate PyUnicodeUCS2_Translate
221# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
222# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
223# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
224# define _PyUnicode_Init _PyUnicodeUCS2_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000225# define PyUnicode_strdup PyUnicodeUCS2_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000226
227#else
228
229# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
230# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000231# define PyUnicode_AsDecodedObject PyUnicodeUCS4_AsDecodedObject
232# define PyUnicode_AsDecodedUnicode PyUnicodeUCS4_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000233# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000234# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000235# define PyUnicode_AsEncodedUnicode PyUnicodeUCS4_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000236# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
237# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000238# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000239# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
240# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
241# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
242# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
243# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000244# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000245# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000246# define PyUnicode_Compare PyUnicodeUCS4_Compare
Victor Stinner09f24bb2010-10-24 20:38:25 +0000247# define PyUnicode_CompareWithASCIIString PyUnicodeUCS4_CompareWithASCIIString
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000248# define PyUnicode_Concat PyUnicodeUCS4_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000249# define PyUnicode_Append PyUnicodeUCS4_Append
250# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000251# define PyUnicode_Contains PyUnicodeUCS4_Contains
252# define PyUnicode_Count PyUnicodeUCS4_Count
253# define PyUnicode_Decode PyUnicodeUCS4_Decode
254# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
255# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
256# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000257# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000258# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000259# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000260# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
261# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000262# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000263# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000264# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000265# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000266# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
267# define PyUnicode_Encode PyUnicodeUCS4_Encode
268# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
269# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
270# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
271# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
272# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000273# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000274# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
275# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
276# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
277# define PyUnicode_Find PyUnicodeUCS4_Find
278# define PyUnicode_Format PyUnicodeUCS4_Format
279# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000280# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
281# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000282# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000283# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000284# define PyUnicode_FromString PyUnicodeUCS4_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000285# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000286# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000287# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000288# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000289# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000290# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
291# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
292# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000293# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000294# define PyUnicode_Join PyUnicodeUCS4_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000295# define PyUnicode_Partition PyUnicodeUCS4_Partition
296# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
297# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000298# define PyUnicode_Replace PyUnicodeUCS4_Replace
299# define PyUnicode_Resize PyUnicodeUCS4_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000300# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000301# define PyUnicode_Split PyUnicodeUCS4_Split
302# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
303# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
304# define PyUnicode_Translate PyUnicodeUCS4_Translate
305# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
306# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
307# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
308# define _PyUnicode_Init _PyUnicodeUCS4_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000309# define PyUnicode_strdup PyUnicodeUCS4_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000310
311#endif
312
Guido van Rossumd8225182000-03-10 22:33:05 +0000313/* --- Internal Unicode Operations ---------------------------------------- */
314
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000315/* Since splitting on whitespace is an important use case, and
316 whitespace in most situations is solely ASCII whitespace, we
317 optimize for the common case by using a quick look-up table
318 _Py_ascii_whitespace (see below) with an inlined check.
Christian Heimes190d79e2008-01-30 11:58:22 +0000319
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000320 */
Christian Heimes190d79e2008-01-30 11:58:22 +0000321#define Py_UNICODE_ISSPACE(ch) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000322 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
Guido van Rossumd8225182000-03-10 22:33:05 +0000323
324#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
325#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
326#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
327#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
328
329#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
330#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
331#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
332
333#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
334#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
335#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
Georg Brandl559e5d72008-06-11 18:37:52 +0000336#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000337
338#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
339#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
340#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
341
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000342#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000343
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000344#define Py_UNICODE_ISALNUM(ch) \
345 (Py_UNICODE_ISALPHA(ch) || \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 Py_UNICODE_ISDECIMAL(ch) || \
347 Py_UNICODE_ISDIGIT(ch) || \
348 Py_UNICODE_ISNUMERIC(ch))
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350#define Py_UNICODE_COPY(target, source, length) \
351 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossumd8225182000-03-10 22:33:05 +0000352
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000353#define Py_UNICODE_FILL(target, value, length) \
354 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
Thomas Wouters477c8d52006-05-27 19:21:47 +0000356 } while (0)
Guido van Rossumd8225182000-03-10 22:33:05 +0000357
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000358/* Check if substring matches at given offset. The offset must be
359 valid, and the substring must not be empty. */
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000360
Thomas Wouters477c8d52006-05-27 19:21:47 +0000361#define Py_UNICODE_MATCH(string, offset, substring) \
362 ((*((string)->str + (offset)) == *((substring)->str)) && \
363 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
364 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000365
Barry Warsaw51ac5802000-03-20 16:36:48 +0000366#ifdef __cplusplus
367extern "C" {
368#endif
369
Guido van Rossumd8225182000-03-10 22:33:05 +0000370/* --- Unicode Type ------------------------------------------------------- */
371
372typedef struct {
373 PyObject_HEAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 Py_ssize_t length; /* Length of raw Unicode data in buffer */
375 Py_UNICODE *str; /* Raw Unicode buffer */
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000376 Py_hash_t hash; /* Hash value; -1 if not set */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 int state; /* != 0 if interned. In this case the two
378 * references from the dictionary to this object
379 * are *not* counted in ob_refcnt. */
380 PyObject *defenc; /* (Default) Encoded version as Python
381 string, or NULL; this is used for
382 implementing the buffer protocol */
Guido van Rossumd8225182000-03-10 22:33:05 +0000383} PyUnicodeObject;
384
Mark Hammond91a681d2002-08-12 07:21:58 +0000385PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Christian Heimesa22e8bd2007-11-29 22:35:39 +0000386PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Guido van Rossumd8225182000-03-10 22:33:05 +0000387
Walter Dörwald16807132007-05-25 13:52:07 +0000388#define SSTATE_NOT_INTERNED 0
389#define SSTATE_INTERNED_MORTAL 1
390#define SSTATE_INTERNED_IMMORTAL 2
391
Thomas Wouters27d517b2007-02-25 20:39:11 +0000392#define PyUnicode_Check(op) \
Christian Heimes90aa7642007-12-19 02:45:37 +0000393 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
394#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
Guido van Rossumd8225182000-03-10 22:33:05 +0000395
396/* Fast access macros */
397#define PyUnicode_GET_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
Guido van Rossumd8225182000-03-10 22:33:05 +0000399#define PyUnicode_GET_DATA_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000401#define PyUnicode_AS_UNICODE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
Guido van Rossumd8225182000-03-10 22:33:05 +0000403#define PyUnicode_AS_DATA(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
Guido van Rossumd8225182000-03-10 22:33:05 +0000405
406/* --- Constants ---------------------------------------------------------- */
407
408/* This Unicode character will be used as replacement character during
409 decoding if the errors argument is set to "replace". Note: the
410 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
411 Unicode 3.0. */
412
413#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
414
415/* === Public API ========================================================= */
416
417/* --- Plain Py_UNICODE --------------------------------------------------- */
418
419/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 size.
Marc-André Lemburg8155e0e2001-04-23 14:44:21 +0000421
422 u may be NULL which causes the contents to be undefined. It is the
423 user's responsibility to fill in the needed data afterwards. Note
424 that modifying the Unicode object contents after construction is
425 only allowed if u was set to NULL.
Guido van Rossumd8225182000-03-10 22:33:05 +0000426
427 The buffer is copied into the new object. */
428
Mark Hammond91a681d2002-08-12 07:21:58 +0000429PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000430 const Py_UNICODE *u, /* Unicode buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000431 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000432 );
433
Georg Brandl952867a2010-06-27 10:17:12 +0000434/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
Walter Dörwaldd2034312007-05-18 16:29:38 +0000435PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
436 const char *u, /* char buffer */
437 Py_ssize_t size /* size of buffer */
438 );
439
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000440/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Georg Brandl952867a2010-06-27 10:17:12 +0000441 UTF-8 encoded bytes */
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000442PyAPI_FUNC(PyObject*) PyUnicode_FromString(
443 const char *u /* string */
444 );
445
Guido van Rossumd8225182000-03-10 22:33:05 +0000446/* Return a read-only pointer to the Unicode object's internal
447 Py_UNICODE buffer. */
448
Mark Hammond91a681d2002-08-12 07:21:58 +0000449PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000451 );
452
453/* Get the length of the Unicode object. */
454
Martin v. Löwis18e16552006-02-15 17:27:45 +0000455PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000457 );
458
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000459/* Get the maximum ordinal for a Unicode character. */
Mark Hammond91a681d2002-08-12 07:21:58 +0000460PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000461
Guido van Rossum52c23592000-04-10 13:41:41 +0000462/* Resize an already allocated Unicode object to the new size length.
463
464 *unicode is modified to point to the new (resized) object and 0
465 returned on success.
466
467 This API may only be called by the function which also called the
468 Unicode constructor. The refcount on the object must be 1. Otherwise,
469 an error is returned.
470
471 Error handling is implemented as follows: an exception is set, -1
472 is returned and *unicode left untouched.
473
474*/
475
Mark Hammond91a681d2002-08-12 07:21:58 +0000476PyAPI_FUNC(int) PyUnicode_Resize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000477 PyObject **unicode, /* Pointer to the Unicode object */
478 Py_ssize_t length /* New length */
Guido van Rossum52c23592000-04-10 13:41:41 +0000479 );
480
Guido van Rossumd8225182000-03-10 22:33:05 +0000481/* Coerce obj to an Unicode object and return a reference with
482 *incremented* refcount.
483
484 Coercion is done in the following way:
485
Georg Brandl952867a2010-06-27 10:17:12 +0000486 1. bytes, bytearray and other char buffer compatible objects are decoded
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000487 under the assumptions that they contain data using the UTF-8
488 encoding. Decoding is done in "strict" mode.
Guido van Rossumd8225182000-03-10 22:33:05 +0000489
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000490 2. All other objects (including Unicode objects) raise an
491 exception.
Guido van Rossumd8225182000-03-10 22:33:05 +0000492
493 The API returns NULL in case of an error. The caller is responsible
494 for decref'ing the returned objects.
495
496*/
497
Mark Hammond91a681d2002-08-12 07:21:58 +0000498PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 register PyObject *obj, /* Object */
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000500 const char *encoding, /* encoding */
501 const char *errors /* error handling */
502 );
503
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000504/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000505 *incremented* refcount.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000507 Unicode objects are passed back as-is (subclasses are converted to
508 true Unicode objects), all other objects are delegated to
509 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
Georg Brandl952867a2010-06-27 10:17:12 +0000510 using UTF-8 encoding as basis for decoding the object.
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000511
512 The API returns NULL in case of an error. The caller is responsible
513 for decref'ing the returned objects.
514
515*/
516
Mark Hammond91a681d2002-08-12 07:21:58 +0000517PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 register PyObject *obj /* Object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000519 );
520
Victor Stinner1205f272010-09-11 00:54:47 +0000521PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
522 const char *format, /* ASCII-encoded string */
523 va_list vargs
524 );
525PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
526 const char *format, /* ASCII-encoded string */
527 ...
528 );
Walter Dörwaldd2034312007-05-18 16:29:38 +0000529
Eric Smith4a7d76d2008-05-30 18:10:19 +0000530/* Format the object based on the format_spec, as defined in PEP 3101
531 (Advanced String Formatting). */
532PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
533 Py_UNICODE *format_spec,
534 Py_ssize_t format_spec_len);
535
Walter Dörwald16807132007-05-25 13:52:07 +0000536PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
537PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
538PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(const char *);
539PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
540
541/* Use only if you know it's a string */
542#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
543
Guido van Rossumd8225182000-03-10 22:33:05 +0000544/* --- wchar_t support for platforms which support it --------------------- */
545
546#ifdef HAVE_WCHAR_H
547
Georg Brandl952867a2010-06-27 10:17:12 +0000548/* Create a Unicode Object from the wchar_t buffer w of the given
Guido van Rossumd8225182000-03-10 22:33:05 +0000549 size.
550
551 The buffer is copied into the new object. */
552
Mark Hammond91a681d2002-08-12 07:21:58 +0000553PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000554 register const wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000555 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000556 );
557
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000558/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossumd8225182000-03-10 22:33:05 +0000559 most size wchar_t characters are copied.
560
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000561 Note that the resulting wchar_t string may or may not be
562 0-terminated. It is the responsibility of the caller to make sure
563 that the wchar_t string is 0-terminated in case this is required by
564 the application.
565
566 Returns the number of wchar_t characters copied (excluding a
567 possibly trailing 0-termination character) or -1 in case of an
Guido van Rossumd8225182000-03-10 22:33:05 +0000568 error. */
569
Martin v. Löwis18e16552006-02-15 17:27:45 +0000570PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000571 PyUnicodeObject *unicode, /* Unicode object */
572 register wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000573 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000574 );
575
Victor Stinner137c34c2010-09-29 10:25:54 +0000576/* Convert the Unicode object to a wide character string. The output string
577 always ends with a nul character. If size is not NULL, write the number of
578 wide characters (including the nul character) into *size.
579
580 Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
581 on success. On error, returns NULL, *size is undefined and raises a
582 MemoryError. */
583
584PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Victor Stinnerbeb4135b2010-10-07 01:02:42 +0000585 PyObject *unicode, /* Unicode object */
Victor Stinner137c34c2010-09-29 10:25:54 +0000586 Py_ssize_t *size /* number of characters of the result */
587 );
588
Guido van Rossumd8225182000-03-10 22:33:05 +0000589#endif
590
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000591/* --- Unicode ordinals --------------------------------------------------- */
592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000593/* Create a Unicode Object from the given Unicode code point ordinal.
594
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000595 The ordinal must be in range(0x10000) on narrow Python builds
596 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
597 raised in case it is not.
598
599*/
600
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000601PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000602
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000603/* --- Free-list management ----------------------------------------------- */
604
605/* Clear the free list used by the Unicode implementation.
606
607 This can be used to release memory used for objects on the free
608 list back to the Python memory allocator.
609
610*/
611
612PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614/* === Builtin Codecs =====================================================
Guido van Rossumd8225182000-03-10 22:33:05 +0000615
616 Many of these APIs take two arguments encoding and errors. These
617 parameters encoding and errors have the same semantics as the ones
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000618 of the builtin str() API.
Guido van Rossumd8225182000-03-10 22:33:05 +0000619
Georg Brandl952867a2010-06-27 10:17:12 +0000620 Setting encoding to NULL causes the default encoding (UTF-8) to be used.
Guido van Rossumd8225182000-03-10 22:33:05 +0000621
622 Error handling is set by errors which may also be set to NULL
623 meaning to use the default handling defined for the codec. Default
624 error handling for all builtin codecs is "strict" (ValueErrors are
625 raised).
626
627 The codecs all use a similar interface. Only deviation from the
628 generic ones are documented.
629
630*/
631
Fred Drakecb093fe2000-05-09 19:51:53 +0000632/* --- Manage the default encoding ---------------------------------------- */
633
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000634/* Return a Python string holding the default encoded value of the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 Unicode object.
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000636
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000637 Same as PyUnicode_AsUTF8String() except
638 the resulting string is cached in the Unicode object for subsequent
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000639 usage by this function. The cached version is needed to implement
640 the character buffer interface and will live (at least) as long as
641 the Unicode object itself.
642
643 The refcount of the string is *not* incremented.
644
645 *** Exported for internal use by the interpreter only !!! ***
646
647*/
648
Mark Hammond91a681d2002-08-12 07:21:58 +0000649PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000650 PyObject *unicode,
651 const char *errors);
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000652
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000653/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000654 Unicode object unicode and the size of the encoded representation
655 in bytes stored in *size.
Christian Heimes5894ba72007-11-04 11:43:14 +0000656
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000657 In case of an error, no *size is set.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000658
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000659 *** This API is for interpreter INTERNAL USE ONLY and will likely
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000660 *** be removed or changed in the future.
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000661
662 *** If you need to access the Unicode object as UTF-8 bytes string,
663 *** please use PyUnicode_AsUTF8String() instead.
664
Martin v. Löwis5b222132007-06-10 09:51:05 +0000665*/
666
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000667PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 PyObject *unicode,
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000669 Py_ssize_t *size);
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000670
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000671/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000672 Unicode object unicode.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000673
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000674 Use of this API is DEPRECATED since no size information can be
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000675 extracted from the returned data.
676
677 *** This API is for interpreter INTERNAL USE ONLY and will likely
678 *** be removed or changed for Python 3.1.
679
680 *** If you need to access the Unicode object as UTF-8 bytes string,
681 *** please use PyUnicode_AsUTF8String() instead.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000682
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000683*/
Martin v. Löwis5b222132007-06-10 09:51:05 +0000684
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000685PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
Martin v. Löwis5b222132007-06-10 09:51:05 +0000686
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000687/* Returns "utf-8". */
Fred Drakecb093fe2000-05-09 19:51:53 +0000688
Mark Hammond91a681d2002-08-12 07:21:58 +0000689PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drakecb093fe2000-05-09 19:51:53 +0000690
Guido van Rossumd8225182000-03-10 22:33:05 +0000691/* --- Generic Codecs ----------------------------------------------------- */
692
693/* Create a Unicode object by decoding the encoded string s of the
694 given size. */
695
Mark Hammond91a681d2002-08-12 07:21:58 +0000696PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000697 const char *s, /* encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000698 Py_ssize_t size, /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000699 const char *encoding, /* encoding */
700 const char *errors /* error handling */
701 );
702
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000703/* Decode a Unicode object unicode and return the result as Python
704 object. */
705
706PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 PyObject *unicode, /* Unicode object */
708 const char *encoding, /* encoding */
709 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000710 );
711
712/* Decode a Unicode object unicode and return the result as Unicode
713 object. */
714
715PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 PyObject *unicode, /* Unicode object */
717 const char *encoding, /* encoding */
718 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000719 );
720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721/* Encodes a Py_UNICODE buffer of the given size and returns a
Guido van Rossumd8225182000-03-10 22:33:05 +0000722 Python string object. */
723
Mark Hammond91a681d2002-08-12 07:21:58 +0000724PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000725 const Py_UNICODE *s, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000726 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000727 const char *encoding, /* encoding */
728 const char *errors /* error handling */
729 );
730
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000731/* Encodes a Unicode object and returns the result as Python
732 object. */
733
734PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 PyObject *unicode, /* Unicode object */
736 const char *encoding, /* encoding */
737 const char *errors /* error handling */
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000738 );
739
Guido van Rossumd8225182000-03-10 22:33:05 +0000740/* Encodes a Unicode object and returns the result as Python string
741 object. */
742
Mark Hammond91a681d2002-08-12 07:21:58 +0000743PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 PyObject *unicode, /* Unicode object */
745 const char *encoding, /* encoding */
746 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000747 );
748
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000749/* Encodes a Unicode object and returns the result as Unicode
750 object. */
751
752PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000753 PyObject *unicode, /* Unicode object */
754 const char *encoding, /* encoding */
755 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000756 );
757
758/* Build an encoding map. */
759
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000760PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
761 PyObject* string /* 256 character map */
762 );
763
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000764/* --- UTF-7 Codecs ------------------------------------------------------- */
765
Mark Hammond91a681d2002-08-12 07:21:58 +0000766PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 const char *string, /* UTF-7 encoded string */
768 Py_ssize_t length, /* size of string */
769 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000770 );
771
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000772PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 const char *string, /* UTF-7 encoded string */
774 Py_ssize_t length, /* size of string */
775 const char *errors, /* error handling */
776 Py_ssize_t *consumed /* bytes consumed */
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000777 );
778
Mark Hammond91a681d2002-08-12 07:21:58 +0000779PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 const Py_UNICODE *data, /* Unicode char buffer */
781 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
782 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
783 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
784 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000785 );
786
Guido van Rossumd8225182000-03-10 22:33:05 +0000787/* --- UTF-8 Codecs ------------------------------------------------------- */
788
Mark Hammond91a681d2002-08-12 07:21:58 +0000789PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 const char *string, /* UTF-8 encoded string */
791 Py_ssize_t length, /* size of string */
792 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000793 );
794
Walter Dörwald69652032004-09-07 20:24:22 +0000795PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 const char *string, /* UTF-8 encoded string */
797 Py_ssize_t length, /* size of string */
798 const char *errors, /* error handling */
799 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000800 );
801
Mark Hammond91a681d2002-08-12 07:21:58 +0000802PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000804 );
805
Mark Hammond91a681d2002-08-12 07:21:58 +0000806PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 const Py_UNICODE *data, /* Unicode char buffer */
808 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
809 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000810 );
811
Walter Dörwald41980ca2007-08-16 21:55:45 +0000812/* --- UTF-32 Codecs ------------------------------------------------------ */
813
814/* Decodes length bytes from a UTF-32 encoded buffer string and returns
815 the corresponding Unicode object.
816
817 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 to "strict".
Walter Dörwald41980ca2007-08-16 21:55:45 +0000819
820 If byteorder is non-NULL, the decoder starts decoding using the
821 given byte order:
822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 *byteorder == -1: little endian
824 *byteorder == 0: native order
825 *byteorder == 1: big endian
Walter Dörwald41980ca2007-08-16 21:55:45 +0000826
827 In native mode, the first four bytes of the stream are checked for a
828 BOM mark. If found, the BOM mark is analysed, the byte order
829 adjusted and the BOM skipped. In the other modes, no BOM mark
830 interpretation is done. After completion, *byteorder is set to the
831 current byte order at the end of input data.
832
833 If byteorder is NULL, the codec starts in native order mode.
834
835*/
836
837PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 const char *string, /* UTF-32 encoded string */
839 Py_ssize_t length, /* size of string */
840 const char *errors, /* error handling */
841 int *byteorder /* pointer to byteorder to use
842 0=native;-1=LE,1=BE; updated on
843 exit */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000844 );
845
846PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 const char *string, /* UTF-32 encoded string */
848 Py_ssize_t length, /* size of string */
849 const char *errors, /* error handling */
850 int *byteorder, /* pointer to byteorder to use
851 0=native;-1=LE,1=BE; updated on
852 exit */
853 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000854 );
855
856/* Returns a Python string using the UTF-32 encoding in native byte
857 order. The string always starts with a BOM mark. */
858
859PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 PyObject *unicode /* Unicode object */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000861 );
862
863/* Returns a Python string object holding the UTF-32 encoded value of
864 the Unicode data.
865
866 If byteorder is not 0, output is written according to the following
867 byte order:
868
869 byteorder == -1: little endian
870 byteorder == 0: native byte order (writes a BOM mark)
871 byteorder == 1: big endian
872
873 If byteorder is 0, the output string will always start with the
874 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
875 prepended.
876
877*/
878
879PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 const Py_UNICODE *data, /* Unicode char buffer */
881 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
882 const char *errors, /* error handling */
883 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000884 );
885
Guido van Rossumd8225182000-03-10 22:33:05 +0000886/* --- UTF-16 Codecs ------------------------------------------------------ */
887
Guido van Rossum9e896b32000-04-05 20:11:21 +0000888/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossumd8225182000-03-10 22:33:05 +0000889 the corresponding Unicode object.
890
891 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 to "strict".
Guido van Rossumd8225182000-03-10 22:33:05 +0000893
894 If byteorder is non-NULL, the decoder starts decoding using the
895 given byte order:
896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 *byteorder == -1: little endian
898 *byteorder == 0: native order
899 *byteorder == 1: big endian
Guido van Rossumd8225182000-03-10 22:33:05 +0000900
Marc-André Lemburg489b56e2001-05-21 20:30:15 +0000901 In native mode, the first two bytes of the stream are checked for a
902 BOM mark. If found, the BOM mark is analysed, the byte order
903 adjusted and the BOM skipped. In the other modes, no BOM mark
904 interpretation is done. After completion, *byteorder is set to the
905 current byte order at the end of input data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000906
907 If byteorder is NULL, the codec starts in native order mode.
908
909*/
910
Mark Hammond91a681d2002-08-12 07:21:58 +0000911PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 const char *string, /* UTF-16 encoded string */
913 Py_ssize_t length, /* size of string */
914 const char *errors, /* error handling */
915 int *byteorder /* pointer to byteorder to use
916 0=native;-1=LE,1=BE; updated on
917 exit */
Guido van Rossumd8225182000-03-10 22:33:05 +0000918 );
919
Walter Dörwald69652032004-09-07 20:24:22 +0000920PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 const char *string, /* UTF-16 encoded string */
922 Py_ssize_t length, /* size of string */
923 const char *errors, /* error handling */
924 int *byteorder, /* pointer to byteorder to use
925 0=native;-1=LE,1=BE; updated on
926 exit */
927 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000928 );
929
Guido van Rossumd8225182000-03-10 22:33:05 +0000930/* Returns a Python string using the UTF-16 encoding in native byte
931 order. The string always starts with a BOM mark. */
932
Mark Hammond91a681d2002-08-12 07:21:58 +0000933PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000934 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000935 );
936
937/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum9e896b32000-04-05 20:11:21 +0000938 the Unicode data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000939
940 If byteorder is not 0, output is written according to the following
941 byte order:
942
943 byteorder == -1: little endian
944 byteorder == 0: native byte order (writes a BOM mark)
945 byteorder == 1: big endian
946
947 If byteorder is 0, the output string will always start with the
948 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
949 prepended.
950
951 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
952 UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters7e474022000-07-16 12:04:32 +0000953 at a later point without compromising the APIs.
Guido van Rossumd8225182000-03-10 22:33:05 +0000954
955*/
956
Mark Hammond91a681d2002-08-12 07:21:58 +0000957PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 const Py_UNICODE *data, /* Unicode char buffer */
959 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
960 const char *errors, /* error handling */
961 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Guido van Rossumd8225182000-03-10 22:33:05 +0000962 );
963
964/* --- Unicode-Escape Codecs ---------------------------------------------- */
965
Mark Hammond91a681d2002-08-12 07:21:58 +0000966PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000967 const char *string, /* Unicode-Escape encoded string */
968 Py_ssize_t length, /* size of string */
969 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000970 );
971
Mark Hammond91a681d2002-08-12 07:21:58 +0000972PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000974 );
975
Mark Hammond91a681d2002-08-12 07:21:58 +0000976PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 const Py_UNICODE *data, /* Unicode char buffer */
978 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000979 );
980
981/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
982
Mark Hammond91a681d2002-08-12 07:21:58 +0000983PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000984 const char *string, /* Raw-Unicode-Escape encoded string */
985 Py_ssize_t length, /* size of string */
986 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000987 );
988
Mark Hammond91a681d2002-08-12 07:21:58 +0000989PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000991 );
992
Mark Hammond91a681d2002-08-12 07:21:58 +0000993PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 const Py_UNICODE *data, /* Unicode char buffer */
995 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000996 );
997
Walter Dörwalda47d1c02005-08-30 10:23:14 +0000998/* --- Unicode Internal Codec ---------------------------------------------
999
1000 Only for internal use in _codecsmodule.c */
1001
1002PyObject *_PyUnicode_DecodeUnicodeInternal(
1003 const char *string,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001004 Py_ssize_t length,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001005 const char *errors
1006 );
1007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008/* --- Latin-1 Codecs -----------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001009
1010 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1011
1012*/
1013
Mark Hammond91a681d2002-08-12 07:21:58 +00001014PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 const char *string, /* Latin-1 encoded string */
1016 Py_ssize_t length, /* size of string */
1017 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001018 );
1019
Mark Hammond91a681d2002-08-12 07:21:58 +00001020PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001022 );
1023
Mark Hammond91a681d2002-08-12 07:21:58 +00001024PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 const Py_UNICODE *data, /* Unicode char buffer */
1026 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1027 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001028 );
1029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030/* --- ASCII Codecs -------------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001031
1032 Only 7-bit ASCII data is excepted. All other codes generate errors.
1033
1034*/
1035
Mark Hammond91a681d2002-08-12 07:21:58 +00001036PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 const char *string, /* ASCII encoded string */
1038 Py_ssize_t length, /* size of string */
1039 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001040 );
1041
Mark Hammond91a681d2002-08-12 07:21:58 +00001042PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001044 );
1045
Mark Hammond91a681d2002-08-12 07:21:58 +00001046PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 const Py_UNICODE *data, /* Unicode char buffer */
1048 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1049 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001050 );
1051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052/* --- Character Map Codecs -----------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 This codec uses mappings to encode and decode characters.
Guido van Rossumd8225182000-03-10 22:33:05 +00001055
1056 Decoding mappings must map single string characters to single
1057 Unicode characters, integers (which are then interpreted as Unicode
1058 ordinals) or None (meaning "undefined mapping" and causing an
1059 error).
1060
1061 Encoding mappings must map single Unicode characters to single
1062 string characters, integers (which are then interpreted as Latin-1
1063 ordinals) or None (meaning "undefined mapping" and causing an
1064 error).
1065
1066 If a character lookup fails with a LookupError, the character is
1067 copied as-is meaning that its ordinal value will be interpreted as
1068 Unicode or Latin-1 ordinal resp. Because of this mappings only need
1069 to contain those mappings which map characters to different code
1070 points.
1071
1072*/
1073
Mark Hammond91a681d2002-08-12 07:21:58 +00001074PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 const char *string, /* Encoded string */
1076 Py_ssize_t length, /* size of string */
1077 PyObject *mapping, /* character mapping
1078 (char ordinal -> unicode ordinal) */
1079 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001080 );
1081
Mark Hammond91a681d2002-08-12 07:21:58 +00001082PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 PyObject *unicode, /* Unicode object */
1084 PyObject *mapping /* character mapping
1085 (unicode ordinal -> char ordinal) */
Guido van Rossumd8225182000-03-10 22:33:05 +00001086 );
1087
Mark Hammond91a681d2002-08-12 07:21:58 +00001088PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 const Py_UNICODE *data, /* Unicode char buffer */
1090 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1091 PyObject *mapping, /* character mapping
1092 (unicode ordinal -> char ordinal) */
1093 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001094 );
1095
1096/* Translate a Py_UNICODE buffer of the given length by applying a
1097 character mapping table to it and return the resulting Unicode
1098 object.
1099
1100 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001102
1103 Mapping tables may be dictionaries or sequences. Unmapped character
1104 ordinals (ones which cause a LookupError) are left untouched and
1105 are copied as-is.
1106
1107*/
1108
Mark Hammond91a681d2002-08-12 07:21:58 +00001109PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 const Py_UNICODE *data, /* Unicode char buffer */
1111 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1112 PyObject *table, /* Translate table */
1113 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001114 );
1115
Guido van Rossumefec1152000-03-28 02:01:15 +00001116#ifdef MS_WIN32
Guido van Rossum24bdb042000-03-28 20:29:59 +00001117
Guido van Rossumefec1152000-03-28 02:01:15 +00001118/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001119
Mark Hammond91a681d2002-08-12 07:21:58 +00001120PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001121 const char *string, /* MBCS encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001122 Py_ssize_t length, /* size of string */
Guido van Rossumefec1152000-03-28 02:01:15 +00001123 const char *errors /* error handling */
1124 );
1125
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001126PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1127 const char *string, /* MBCS encoded string */
1128 Py_ssize_t length, /* size of string */
1129 const char *errors, /* error handling */
1130 Py_ssize_t *consumed /* bytes consumed */
1131 );
1132
Mark Hammond91a681d2002-08-12 07:21:58 +00001133PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossumefec1152000-03-28 02:01:15 +00001134 PyObject *unicode /* Unicode object */
1135 );
1136
Mark Hammond91a681d2002-08-12 07:21:58 +00001137PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001138 const Py_UNICODE *data, /* Unicode char buffer */
Neal Norwitzd78f6cf2007-08-08 04:49:37 +00001139 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumefec1152000-03-28 02:01:15 +00001140 const char *errors /* error handling */
1141 );
1142
Guido van Rossumefec1152000-03-28 02:01:15 +00001143#endif /* MS_WIN32 */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001144
Guido van Rossum9e896b32000-04-05 20:11:21 +00001145/* --- Decimal Encoder ---------------------------------------------------- */
1146
1147/* Takes a Unicode string holding a decimal value and writes it into
1148 an output buffer using standard ASCII digit codes.
1149
1150 The output buffer has to provide at least length+1 bytes of storage
1151 area. The output string is 0-terminated.
1152
1153 The encoder converts whitespace to ' ', decimal characters to their
1154 corresponding ASCII digit and all other Latin-1 characters except
1155 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1156 are treated as errors. This includes embedded NULL bytes.
1157
1158 Error handling is defined by the errors argument:
1159
1160 NULL or "strict": raise a ValueError
1161 "ignore": ignore the wrong characters (these are not copied to the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 output buffer)
Guido van Rossum9e896b32000-04-05 20:11:21 +00001163 "replace": replaces illegal characters with '?'
1164
1165 Returns 0 on success, -1 on failure.
1166
1167*/
1168
Mark Hammond91a681d2002-08-12 07:21:58 +00001169PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 Py_UNICODE *s, /* Unicode buffer */
1171 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1172 char *output, /* Output buffer; must have size >= length */
1173 const char *errors /* error handling */
Guido van Rossum9e896b32000-04-05 20:11:21 +00001174 );
1175
Martin v. Löwis011e8422009-05-05 04:43:17 +00001176/* --- File system encoding ---------------------------------------------- */
1177
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001178/* ParseTuple converter: encode str objects to bytes using
1179 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
Martin v. Löwis011e8422009-05-05 04:43:17 +00001180
1181PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1182
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001183/* ParseTuple converter: decode bytes objects to unicode using
1184 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1185
1186PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1187
Victor Stinner77c38622010-05-14 15:58:55 +00001188/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1189 and the "surrogateescape" error handler.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001190
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001191 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1192 encoding.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001193
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001194 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001195*/
1196
1197PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1198 const char *s /* encoded string */
1199 );
1200
Victor Stinner77c38622010-05-14 15:58:55 +00001201/* Decode a string using Py_FileSystemDefaultEncoding
1202 and the "surrogateescape" error handler.
1203
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001204 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1205 encoding.
Victor Stinner77c38622010-05-14 15:58:55 +00001206*/
1207
Martin v. Löwis011e8422009-05-05 04:43:17 +00001208PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1209 const char *s, /* encoded string */
1210 Py_ssize_t size /* size */
1211 );
1212
Victor Stinnerae6265f2010-05-15 16:27:27 +00001213/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001214 "surrogateescape" error handler, and return bytes.
Victor Stinnerae6265f2010-05-15 16:27:27 +00001215
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001216 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1217 encoding.
Victor Stinnerae6265f2010-05-15 16:27:27 +00001218*/
1219
1220PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1221 PyObject *unicode
1222 );
1223
Guido van Rossumd8225182000-03-10 22:33:05 +00001224/* --- Methods & Slots ----------------------------------------------------
1225
1226 These are capable of handling Unicode objects and strings on input
1227 (we refer to them as strings in the descriptions) and return
1228 Unicode objects or integers as apporpriate. */
1229
1230/* Concat two strings giving a new Unicode string. */
1231
Mark Hammond91a681d2002-08-12 07:21:58 +00001232PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 PyObject *left, /* Left string */
1234 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001235 );
1236
Walter Dörwald1ab83302007-05-18 17:15:44 +00001237/* Concat two strings and put the result in *pleft
1238 (sets *pleft to NULL on error) */
1239
1240PyAPI_FUNC(void) PyUnicode_Append(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 PyObject **pleft, /* Pointer to left string */
1242 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001243 );
1244
1245/* Concat two strings, put the result in *pleft and drop the right object
1246 (sets *pleft to NULL on error) */
1247
1248PyAPI_FUNC(void) PyUnicode_AppendAndDel(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 PyObject **pleft, /* Pointer to left string */
1250 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001251 );
1252
Guido van Rossumd8225182000-03-10 22:33:05 +00001253/* Split a string giving a list of Unicode strings.
1254
1255 If sep is NULL, splitting will be done at all whitespace
1256 substrings. Otherwise, splits occur at the given separator.
1257
1258 At most maxsplit splits will be done. If negative, no limit is set.
1259
1260 Separators are not included in the resulting list.
1261
1262*/
1263
Mark Hammond91a681d2002-08-12 07:21:58 +00001264PyAPI_FUNC(PyObject*) PyUnicode_Split(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 PyObject *s, /* String to split */
1266 PyObject *sep, /* String separator */
1267 Py_ssize_t maxsplit /* Maxsplit count */
1268 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001269
1270/* Dito, but split at line breaks.
1271
1272 CRLF is considered to be one line break. Line breaks are not
1273 included in the resulting list. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274
Mark Hammond91a681d2002-08-12 07:21:58 +00001275PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 PyObject *s, /* String to split */
1277 int keepends /* If true, line end markers are included */
1278 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001279
Thomas Wouters477c8d52006-05-27 19:21:47 +00001280/* Partition a string using a given separator. */
1281
1282PyAPI_FUNC(PyObject*) PyUnicode_Partition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 PyObject *s, /* String to partition */
1284 PyObject *sep /* String separator */
1285 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001286
1287/* Partition a string using a given separator, searching from the end of the
1288 string. */
1289
1290PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 PyObject *s, /* String to partition */
1292 PyObject *sep /* String separator */
1293 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001294
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001295/* Split a string giving a list of Unicode strings.
1296
1297 If sep is NULL, splitting will be done at all whitespace
1298 substrings. Otherwise, splits occur at the given separator.
1299
1300 At most maxsplit splits will be done. But unlike PyUnicode_Split
1301 PyUnicode_RSplit splits from the end of the string. If negative,
1302 no limit is set.
1303
1304 Separators are not included in the resulting list.
1305
1306*/
1307
1308PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 PyObject *s, /* String to split */
1310 PyObject *sep, /* String separator */
1311 Py_ssize_t maxsplit /* Maxsplit count */
1312 );
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001313
Guido van Rossumd8225182000-03-10 22:33:05 +00001314/* Translate a string by applying a character mapping table to it and
1315 return the resulting Unicode object.
1316
1317 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001319
1320 Mapping tables may be dictionaries or sequences. Unmapped character
1321 ordinals (ones which cause a LookupError) are left untouched and
1322 are copied as-is.
1323
1324*/
1325
Mark Hammond91a681d2002-08-12 07:21:58 +00001326PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyObject *str, /* String */
1328 PyObject *table, /* Translate table */
1329 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001330 );
1331
1332/* Join a sequence of strings using the given separator and return
1333 the resulting Unicode string. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334
Mark Hammond91a681d2002-08-12 07:21:58 +00001335PyAPI_FUNC(PyObject*) PyUnicode_Join(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 PyObject *separator, /* Separator string */
1337 PyObject *seq /* Sequence object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001338 );
1339
1340/* Return 1 if substr matches str[start:end] at the given tail end, 0
1341 otherwise. */
1342
Martin v. Löwis18e16552006-02-15 17:27:45 +00001343PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 PyObject *str, /* String */
1345 PyObject *substr, /* Prefix or Suffix string */
1346 Py_ssize_t start, /* Start index */
1347 Py_ssize_t end, /* Stop index */
1348 int direction /* Tail end: -1 prefix, +1 suffix */
Guido van Rossumd8225182000-03-10 22:33:05 +00001349 );
1350
1351/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg4da6fd62002-05-29 11:33:13 +00001352 given search direction or -1 if not found. -2 is returned in case
1353 an error occurred and an exception is set. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001354
Martin v. Löwis18e16552006-02-15 17:27:45 +00001355PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 PyObject *str, /* String */
1357 PyObject *substr, /* Substring to find */
1358 Py_ssize_t start, /* Start index */
1359 Py_ssize_t end, /* Stop index */
1360 int direction /* Find direction: +1 forward, -1 backward */
Guido van Rossumd8225182000-03-10 22:33:05 +00001361 );
1362
Barry Warsaw51ac5802000-03-20 16:36:48 +00001363/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001364
Martin v. Löwis18e16552006-02-15 17:27:45 +00001365PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 PyObject *str, /* String */
1367 PyObject *substr, /* Substring to count */
1368 Py_ssize_t start, /* Start index */
1369 Py_ssize_t end /* Stop index */
Guido van Rossumd8225182000-03-10 22:33:05 +00001370 );
1371
Barry Warsaw51ac5802000-03-20 16:36:48 +00001372/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +00001373 and return the resulting Unicode object. */
1374
Mark Hammond91a681d2002-08-12 07:21:58 +00001375PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 PyObject *str, /* String */
1377 PyObject *substr, /* Substring to find */
1378 PyObject *replstr, /* Substring to replace */
1379 Py_ssize_t maxcount /* Max. number of replacements to apply;
1380 -1 = all */
Guido van Rossumd8225182000-03-10 22:33:05 +00001381 );
1382
1383/* Compare two strings and return -1, 0, 1 for less than, equal,
1384 greater than resp. */
1385
Mark Hammond91a681d2002-08-12 07:21:58 +00001386PyAPI_FUNC(int) PyUnicode_Compare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 PyObject *left, /* Left string */
1388 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001389 );
1390
Martin v. Löwis5b222132007-06-10 09:51:05 +00001391PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
1392 PyObject *left,
1393 const char *right
1394 );
1395
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001396/* Rich compare two strings and return one of the following:
1397
1398 - NULL in case an exception was raised
1399 - Py_True or Py_False for successfuly comparisons
1400 - Py_NotImplemented in case the type combination is unknown
1401
1402 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1403 case the conversion of the arguments to Unicode fails with a
1404 UnicodeDecodeError.
1405
1406 Possible values for op:
1407
1408 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1409
1410*/
1411
1412PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 PyObject *left, /* Left string */
1414 PyObject *right, /* Right string */
1415 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001416 );
1417
Thomas Wouters7e474022000-07-16 12:04:32 +00001418/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossumd8225182000-03-10 22:33:05 +00001419 the resulting Unicode string. */
1420
Mark Hammond91a681d2002-08-12 07:21:58 +00001421PyAPI_FUNC(PyObject *) PyUnicode_Format(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 PyObject *format, /* Format string */
1423 PyObject *args /* Argument tuple or dictionary */
Guido van Rossumd8225182000-03-10 22:33:05 +00001424 );
1425
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001426/* Checks whether element is contained in container and return 1/0
1427 accordingly.
1428
1429 element has to coerce to an one element Unicode string. -1 is
1430 returned in case of an error. */
1431
Mark Hammond91a681d2002-08-12 07:21:58 +00001432PyAPI_FUNC(int) PyUnicode_Contains(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 PyObject *container, /* Container string */
1434 PyObject *element /* Element string */
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001435 );
1436
Martin v. Löwis47383402007-08-15 07:32:56 +00001437/* Checks whether argument is a valid identifier. */
1438
1439PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1440
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001441/* Externally visible for str.strip(unicode) */
Mark Hammond91a681d2002-08-12 07:21:58 +00001442PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001443 PyUnicodeObject *self,
1444 int striptype,
1445 PyObject *sepobj
1446 );
1447
Eric Smith5807c412008-05-11 21:00:57 +00001448/* Using the current locale, insert the thousands grouping
1449 into the string pointed to by buffer. For the argument descriptions,
1450 see Objects/stringlib/localeutil.h */
1451
Eric Smith0923d1d2009-04-16 20:16:10 +00001452PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
1453 Py_ssize_t n_buffer,
1454 Py_UNICODE *digits,
1455 Py_ssize_t n_digits,
1456 Py_ssize_t min_width);
Eric Smith5807c412008-05-11 21:00:57 +00001457
Eric Smitha3b1ac82009-04-03 14:45:06 +00001458/* Using explicit passed-in values, insert the thousands grouping
1459 into the string pointed to by buffer. For the argument descriptions,
1460 see Objects/stringlib/localeutil.h */
Eric Smith0923d1d2009-04-16 20:16:10 +00001461PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
1462 Py_ssize_t n_buffer,
1463 Py_UNICODE *digits,
1464 Py_ssize_t n_digits,
1465 Py_ssize_t min_width,
1466 const char *grouping,
1467 const char *thousands_sep);
Guido van Rossumd8225182000-03-10 22:33:05 +00001468/* === Characters Type APIs =============================================== */
1469
Benjamin Peterson960cf0f2009-01-09 04:11:44 +00001470/* Helper array used by Py_UNICODE_ISSPACE(). */
1471
1472PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1473
Guido van Rossumd8225182000-03-10 22:33:05 +00001474/* These should not be used directly. Use the Py_UNICODE_IS* and
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 Py_UNICODE_TO* macros instead.
Guido van Rossumd8225182000-03-10 22:33:05 +00001476
1477 These APIs are implemented in Objects/unicodectype.c.
1478
1479*/
1480
Mark Hammond91a681d2002-08-12 07:21:58 +00001481PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001482 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001483 );
1484
Mark Hammond91a681d2002-08-12 07:21:58 +00001485PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001486 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001487 );
1488
Mark Hammond91a681d2002-08-12 07:21:58 +00001489PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001490 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001491 );
1492
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001493PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001494 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001495 );
1496
1497PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001498 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001499 );
1500
Mark Hammond91a681d2002-08-12 07:21:58 +00001501PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001502 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001503 );
1504
Mark Hammond91a681d2002-08-12 07:21:58 +00001505PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001506 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001507 );
1508
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001509PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1510 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001511 );
1512
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001513PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1514 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001515 );
1516
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001517PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1518 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001519 );
1520
Mark Hammond91a681d2002-08-12 07:21:58 +00001521PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001522 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001523 );
1524
Mark Hammond91a681d2002-08-12 07:21:58 +00001525PyAPI_FUNC(int) _PyUnicode_ToDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001526 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001527 );
1528
Mark Hammond91a681d2002-08-12 07:21:58 +00001529PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001530 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001531 );
1532
Mark Hammond91a681d2002-08-12 07:21:58 +00001533PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001534 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001535 );
1536
Mark Hammond91a681d2002-08-12 07:21:58 +00001537PyAPI_FUNC(int) _PyUnicode_IsDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001538 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001539 );
1540
Mark Hammond91a681d2002-08-12 07:21:58 +00001541PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001542 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001543 );
1544
Georg Brandl559e5d72008-06-11 18:37:52 +00001545PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001546 Py_UCS4 ch /* Unicode character */
Georg Brandl559e5d72008-06-11 18:37:52 +00001547 );
1548
Mark Hammond91a681d2002-08-12 07:21:58 +00001549PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001550 Py_UCS4 ch /* Unicode character */
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001551 );
1552
Victor Stinneref8d95c2010-08-16 22:03:11 +00001553PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1554 const Py_UNICODE *u
1555 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001556
1557PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001558 Py_UNICODE *s1,
1559 const Py_UNICODE *s2);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001560
Victor Stinnerc4eb7652010-09-01 23:43:50 +00001561PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1562 Py_UNICODE *s1, const Py_UNICODE *s2);
1563
Martin v. Löwis5b222132007-06-10 09:51:05 +00001564PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001565 Py_UNICODE *s1,
1566 const Py_UNICODE *s2,
1567 size_t n);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001568
1569PyAPI_FUNC(int) Py_UNICODE_strcmp(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001570 const Py_UNICODE *s1,
1571 const Py_UNICODE *s2
1572 );
1573
1574PyAPI_FUNC(int) Py_UNICODE_strncmp(
1575 const Py_UNICODE *s1,
1576 const Py_UNICODE *s2,
1577 size_t n
1578 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001579
1580PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001581 const Py_UNICODE *s,
1582 Py_UNICODE c
Martin v. Löwis5b222132007-06-10 09:51:05 +00001583 );
1584
Victor Stinner331ea922010-08-10 16:37:20 +00001585PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001586 const Py_UNICODE *s,
1587 Py_UNICODE c
Victor Stinner331ea922010-08-10 16:37:20 +00001588 );
1589
Victor Stinner71133ff2010-09-01 23:43:53 +00001590/* Create a copy of a unicode string ending with a nul character. Return NULL
1591 and raise a MemoryError exception on memory allocation failure, otherwise
1592 return a new allocated buffer (use PyMem_Free() to free the buffer). */
1593
Victor Stinner46408602010-09-03 16:18:00 +00001594PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
Victor Stinner71133ff2010-09-01 23:43:53 +00001595 PyObject *unicode
1596 );
1597
Guido van Rossumd8225182000-03-10 22:33:05 +00001598#ifdef __cplusplus
1599}
1600#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001601#endif /* !Py_UNICODEOBJECT_H */