blob: 39a6b2ef6557330455e363d3e59114f269e1e5ea [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
Christian Heimesaf98da12008-01-27 15:18:18 +00004#include <stdarg.h>
5
Guido van Rossumd8225182000-03-10 22:33:05 +00006/*
7
8Unicode implementation based on original code by Fredrik Lundh,
9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10Unicode Integration Proposal (see file Misc/unicode.txt).
11
Guido van Rossum16b1ad92000-08-03 16:24:25 +000012Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumd8225182000-03-10 22:33:05 +000013
14
15 Original header:
16 --------------------------------------------------------------------
17
18 * Yet another Unicode string type for Python. This type supports the
19 * 16-bit Basic Multilingual Plane (BMP) only.
20 *
21 * Written by Fredrik Lundh, January 1999.
22 *
23 * Copyright (c) 1999 by Secret Labs AB.
24 * Copyright (c) 1999 by Fredrik Lundh.
25 *
26 * fredrik@pythonware.com
27 * http://www.pythonware.com
28 *
29 * --------------------------------------------------------------------
30 * This Unicode String Type is
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000031 *
Guido van Rossumd8225182000-03-10 22:33:05 +000032 * Copyright (c) 1999 by Secret Labs AB
33 * Copyright (c) 1999 by Fredrik Lundh
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 *
Guido van Rossumd8225182000-03-10 22:33:05 +000035 * By obtaining, using, and/or copying this software and/or its
36 * associated documentation, you agree that you have read, understood,
37 * and will comply with the following terms and conditions:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 *
Guido van Rossumd8225182000-03-10 22:33:05 +000039 * Permission to use, copy, modify, and distribute this software and its
40 * associated documentation for any purpose and without fee is hereby
41 * granted, provided that the above copyright notice appears in all
42 * copies, and that both that copyright notice and this permission notice
43 * appear in supporting documentation, and that the name of Secret Labs
44 * AB or the author not be used in advertising or publicity pertaining to
45 * distribution of the software without specific, written prior
46 * permission.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 *
Guido van Rossumd8225182000-03-10 22:33:05 +000048 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
49 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
50 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
51 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
52 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
53 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
54 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
55 * -------------------------------------------------------------------- */
56
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +000057#include <ctype.h>
Guido van Rossumd8225182000-03-10 22:33:05 +000058
59/* === Internal API ======================================================= */
60
61/* --- Internal Unicode Format -------------------------------------------- */
62
Christian Heimes0625e892008-01-07 21:04:21 +000063/* Python 3.x requires unicode */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000064#define Py_USING_UNICODE
Christian Heimes0625e892008-01-07 21:04:21 +000065
Fredrik Lundh9b14ab32001-06-26 22:59:49 +000066/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
67 properly set, but the default rules below doesn't set it. I'll
68 sort this out some other day -- fredrik@pythonware.com */
69
70#ifndef Py_UNICODE_SIZE
71#error Must define Py_UNICODE_SIZE
72#endif
73
Fredrik Lundh8f455852001-06-27 18:59:43 +000074/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
75 strings are stored as UCS-2 (with limited support for UTF-16) */
76
77#if Py_UNICODE_SIZE >= 4
78#define Py_UNICODE_WIDE
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000079#endif
Fredrik Lundh1294ad02001-06-26 17:17:07 +000080
Amaury Forgeot d'Arcfeb73072010-09-12 22:42:57 +000081/* Set these flags if the platform has "wchar.h" and the
Guido van Rossumd8225182000-03-10 22:33:05 +000082 wchar_t type is a 16-bit unsigned type */
83/* #define HAVE_WCHAR_H */
84/* #define HAVE_USABLE_WCHAR_T */
85
86/* Defaults for various platforms */
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000087#ifndef PY_UNICODE_TYPE
Guido van Rossumd8225182000-03-10 22:33:05 +000088
Fredrik Lundh1294ad02001-06-26 17:17:07 +000089/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh8f455852001-06-27 18:59:43 +000090# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossumd8225182000-03-10 22:33:05 +000091# define HAVE_USABLE_WCHAR_T
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000092# define PY_UNICODE_TYPE wchar_t
93# endif
94
Fredrik Lundh8f455852001-06-27 18:59:43 +000095# if defined(Py_UNICODE_WIDE)
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000096# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossumd8225182000-03-10 22:33:05 +000097# endif
98
99#endif
100
101/* If the compiler provides a wchar_t type we try to support it
Victor Stinner137c34c2010-09-29 10:25:54 +0000102 through the interface functions PyUnicode_FromWideChar(),
103 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
Guido van Rossumd8225182000-03-10 22:33:05 +0000104
105#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg1a731c62000-08-11 11:43:10 +0000106# ifndef HAVE_WCHAR_H
107# define HAVE_WCHAR_H
108# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000109#endif
110
111#ifdef HAVE_WCHAR_H
Guido van Rossum24bdb042000-03-28 20:29:59 +0000112/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
113# ifdef _HAVE_BSDI
114# include <time.h>
115# endif
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +0000116# include <wchar.h>
Guido van Rossumd8225182000-03-10 22:33:05 +0000117#endif
118
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000119/*
120 * Use this typedef when you need to represent a UTF-16 surrogate pair
121 * as single unsigned integer.
122 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123#if SIZEOF_INT >= 4
124typedef unsigned int Py_UCS4;
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000125#elif SIZEOF_LONG >= 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126typedef unsigned long Py_UCS4;
Guido van Rossumd8225182000-03-10 22:33:05 +0000127#endif
128
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000129/* Py_UNICODE is the native Unicode storage format (code unit) used by
130 Python and represents a single Unicode element in the Unicode
131 type. */
132
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000133typedef PY_UNICODE_TYPE Py_UNICODE;
Marc-André Lemburg43279102000-07-07 09:01:41 +0000134
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000135/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
136
137/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
138 produce different external names and thus cause import errors in
139 case Python interpreters and extensions with mixed compiled in
140 Unicode width assumptions are combined. */
141
142#ifndef Py_UNICODE_WIDE
143
144# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
145# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000146# define PyUnicode_AsDecodedObject PyUnicodeUCS2_AsDecodedObject
147# define PyUnicode_AsDecodedUnicode PyUnicodeUCS2_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000148# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000149# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000150# define PyUnicode_AsEncodedUnicode PyUnicodeUCS2_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000151# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
152# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000153# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000154# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
155# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
156# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
157# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
158# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000159# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000160# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000161# define PyUnicode_Compare PyUnicodeUCS2_Compare
Benjamin Petersonad465f92010-05-07 20:21:26 +0000162# define PyUnicode_CompareWithASCII PyUnicodeUCS2_CompareASCII
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000163# define PyUnicode_Concat PyUnicodeUCS2_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000164# define PyUnicode_Append PyUnicodeUCS2_Append
165# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000166# define PyUnicode_Contains PyUnicodeUCS2_Contains
167# define PyUnicode_Count PyUnicodeUCS2_Count
168# define PyUnicode_Decode PyUnicodeUCS2_Decode
169# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
170# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
171# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000172# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000173# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000174# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000175# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
176# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000177# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000178# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000179# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000180# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000181# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
182# define PyUnicode_Encode PyUnicodeUCS2_Encode
183# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
184# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
185# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
186# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
187# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000188# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000189# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
190# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
191# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
192# define PyUnicode_Find PyUnicodeUCS2_Find
193# define PyUnicode_Format PyUnicodeUCS2_Format
194# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000195# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
196# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000197# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000198# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000199# define PyUnicode_FromString PyUnicodeUCS2_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000200# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000201# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
Walter Dörwald14176a52007-05-18 17:04:42 +0000202# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000203# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000204# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000205# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
206# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
207# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000208# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000209# define PyUnicode_Join PyUnicodeUCS2_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000210# define PyUnicode_Partition PyUnicodeUCS2_Partition
211# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
212# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000213# define PyUnicode_Replace PyUnicodeUCS2_Replace
214# define PyUnicode_Resize PyUnicodeUCS2_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000215# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000216# define PyUnicode_Split PyUnicodeUCS2_Split
217# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
218# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
219# define PyUnicode_Translate PyUnicodeUCS2_Translate
220# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
221# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
222# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
223# define _PyUnicode_Init _PyUnicodeUCS2_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000224# define PyUnicode_strdup PyUnicodeUCS2_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000225
226#else
227
228# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
229# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000230# define PyUnicode_AsDecodedObject PyUnicodeUCS4_AsDecodedObject
231# define PyUnicode_AsDecodedUnicode PyUnicodeUCS4_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000232# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000233# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000234# define PyUnicode_AsEncodedUnicode PyUnicodeUCS4_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000235# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
236# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000237# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000238# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
239# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
240# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
241# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
242# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000243# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000244# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000245# define PyUnicode_Compare PyUnicodeUCS4_Compare
Benjamin Petersonad465f92010-05-07 20:21:26 +0000246# define PyUnicode_CompareWithASCII PyUnicodeUCS4_CompareWithASCII
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000247# define PyUnicode_Concat PyUnicodeUCS4_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000248# define PyUnicode_Append PyUnicodeUCS4_Append
249# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000250# define PyUnicode_Contains PyUnicodeUCS4_Contains
251# define PyUnicode_Count PyUnicodeUCS4_Count
252# define PyUnicode_Decode PyUnicodeUCS4_Decode
253# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
254# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
255# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000256# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000257# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000258# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000259# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
260# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000261# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000262# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000263# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000264# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000265# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
266# define PyUnicode_Encode PyUnicodeUCS4_Encode
267# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
268# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
269# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
270# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
271# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000272# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000273# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
274# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
275# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
276# define PyUnicode_Find PyUnicodeUCS4_Find
277# define PyUnicode_Format PyUnicodeUCS4_Format
278# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000279# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
280# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000281# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000282# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000283# define PyUnicode_FromString PyUnicodeUCS4_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000284# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000285# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000286# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000287# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000288# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000289# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
290# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
291# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000292# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000293# define PyUnicode_Join PyUnicodeUCS4_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000294# define PyUnicode_Partition PyUnicodeUCS4_Partition
295# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
296# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000297# define PyUnicode_Replace PyUnicodeUCS4_Replace
298# define PyUnicode_Resize PyUnicodeUCS4_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000299# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000300# define PyUnicode_Split PyUnicodeUCS4_Split
301# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
302# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
303# define PyUnicode_Translate PyUnicodeUCS4_Translate
304# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
305# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
306# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
307# define _PyUnicode_Init _PyUnicodeUCS4_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000308# define PyUnicode_strdup PyUnicodeUCS4_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000309
310#endif
311
Guido van Rossumd8225182000-03-10 22:33:05 +0000312/* --- Internal Unicode Operations ---------------------------------------- */
313
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000314/* Since splitting on whitespace is an important use case, and
315 whitespace in most situations is solely ASCII whitespace, we
316 optimize for the common case by using a quick look-up table
317 _Py_ascii_whitespace (see below) with an inlined check.
Christian Heimes190d79e2008-01-30 11:58:22 +0000318
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000319 */
Christian Heimes190d79e2008-01-30 11:58:22 +0000320#define Py_UNICODE_ISSPACE(ch) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
Guido van Rossumd8225182000-03-10 22:33:05 +0000322
323#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
324#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
325#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
326#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
327
328#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
329#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
330#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
331
332#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
333#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
334#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
Georg Brandl559e5d72008-06-11 18:37:52 +0000335#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000336
337#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
338#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
339#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
340
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000341#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000342
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000343#define Py_UNICODE_ISALNUM(ch) \
344 (Py_UNICODE_ISALPHA(ch) || \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 Py_UNICODE_ISDECIMAL(ch) || \
346 Py_UNICODE_ISDIGIT(ch) || \
347 Py_UNICODE_ISNUMERIC(ch))
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349#define Py_UNICODE_COPY(target, source, length) \
350 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossumd8225182000-03-10 22:33:05 +0000351
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000352#define Py_UNICODE_FILL(target, value, length) \
353 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
Thomas Wouters477c8d52006-05-27 19:21:47 +0000355 } while (0)
Guido van Rossumd8225182000-03-10 22:33:05 +0000356
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000357/* Check if substring matches at given offset. the offset must be
Thomas Wouters477c8d52006-05-27 19:21:47 +0000358 valid, and the substring must not be empty */
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000359
Thomas Wouters477c8d52006-05-27 19:21:47 +0000360#define Py_UNICODE_MATCH(string, offset, substring) \
361 ((*((string)->str + (offset)) == *((substring)->str)) && \
362 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
363 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000364
Barry Warsaw51ac5802000-03-20 16:36:48 +0000365#ifdef __cplusplus
366extern "C" {
367#endif
368
Guido van Rossumd8225182000-03-10 22:33:05 +0000369/* --- Unicode Type ------------------------------------------------------- */
370
371typedef struct {
372 PyObject_HEAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 Py_ssize_t length; /* Length of raw Unicode data in buffer */
374 Py_UNICODE *str; /* Raw Unicode buffer */
375 long hash; /* Hash value; -1 if not set */
376 int state; /* != 0 if interned. In this case the two
377 * references from the dictionary to this object
378 * are *not* counted in ob_refcnt. */
379 PyObject *defenc; /* (Default) Encoded version as Python
380 string, or NULL; this is used for
381 implementing the buffer protocol */
Guido van Rossumd8225182000-03-10 22:33:05 +0000382} PyUnicodeObject;
383
Mark Hammond91a681d2002-08-12 07:21:58 +0000384PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Christian Heimesa22e8bd2007-11-29 22:35:39 +0000385PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Guido van Rossumd8225182000-03-10 22:33:05 +0000386
Walter Dörwald16807132007-05-25 13:52:07 +0000387#define SSTATE_NOT_INTERNED 0
388#define SSTATE_INTERNED_MORTAL 1
389#define SSTATE_INTERNED_IMMORTAL 2
390
Thomas Wouters27d517b2007-02-25 20:39:11 +0000391#define PyUnicode_Check(op) \
Christian Heimes90aa7642007-12-19 02:45:37 +0000392 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
393#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
Guido van Rossumd8225182000-03-10 22:33:05 +0000394
395/* Fast access macros */
396#define PyUnicode_GET_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
Guido van Rossumd8225182000-03-10 22:33:05 +0000398#define PyUnicode_GET_DATA_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000400#define PyUnicode_AS_UNICODE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
Guido van Rossumd8225182000-03-10 22:33:05 +0000402#define PyUnicode_AS_DATA(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
Guido van Rossumd8225182000-03-10 22:33:05 +0000404
405/* --- Constants ---------------------------------------------------------- */
406
407/* This Unicode character will be used as replacement character during
408 decoding if the errors argument is set to "replace". Note: the
409 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
410 Unicode 3.0. */
411
412#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
413
414/* === Public API ========================================================= */
415
416/* --- Plain Py_UNICODE --------------------------------------------------- */
417
418/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 size.
Marc-André Lemburg8155e0e2001-04-23 14:44:21 +0000420
421 u may be NULL which causes the contents to be undefined. It is the
422 user's responsibility to fill in the needed data afterwards. Note
423 that modifying the Unicode object contents after construction is
424 only allowed if u was set to NULL.
Guido van Rossumd8225182000-03-10 22:33:05 +0000425
426 The buffer is copied into the new object. */
427
Mark Hammond91a681d2002-08-12 07:21:58 +0000428PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000429 const Py_UNICODE *u, /* Unicode buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000430 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000431 );
432
Georg Brandl952867a2010-06-27 10:17:12 +0000433/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
Walter Dörwaldd2034312007-05-18 16:29:38 +0000434PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
435 const char *u, /* char buffer */
436 Py_ssize_t size /* size of buffer */
437 );
438
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000439/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Georg Brandl952867a2010-06-27 10:17:12 +0000440 UTF-8 encoded bytes */
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000441PyAPI_FUNC(PyObject*) PyUnicode_FromString(
442 const char *u /* string */
443 );
444
Guido van Rossumd8225182000-03-10 22:33:05 +0000445/* Return a read-only pointer to the Unicode object's internal
446 Py_UNICODE buffer. */
447
Mark Hammond91a681d2002-08-12 07:21:58 +0000448PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000450 );
451
452/* Get the length of the Unicode object. */
453
Martin v. Löwis18e16552006-02-15 17:27:45 +0000454PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000456 );
457
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000458/* Get the maximum ordinal for a Unicode character. */
Mark Hammond91a681d2002-08-12 07:21:58 +0000459PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000460
Guido van Rossum52c23592000-04-10 13:41:41 +0000461/* Resize an already allocated Unicode object to the new size length.
462
463 *unicode is modified to point to the new (resized) object and 0
464 returned on success.
465
466 This API may only be called by the function which also called the
467 Unicode constructor. The refcount on the object must be 1. Otherwise,
468 an error is returned.
469
470 Error handling is implemented as follows: an exception is set, -1
471 is returned and *unicode left untouched.
472
473*/
474
Mark Hammond91a681d2002-08-12 07:21:58 +0000475PyAPI_FUNC(int) PyUnicode_Resize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 PyObject **unicode, /* Pointer to the Unicode object */
477 Py_ssize_t length /* New length */
Guido van Rossum52c23592000-04-10 13:41:41 +0000478 );
479
Guido van Rossumd8225182000-03-10 22:33:05 +0000480/* Coerce obj to an Unicode object and return a reference with
481 *incremented* refcount.
482
483 Coercion is done in the following way:
484
Georg Brandl952867a2010-06-27 10:17:12 +0000485 1. bytes, bytearray and other char buffer compatible objects are decoded
Fred Drakecb093fe2000-05-09 19:51:53 +0000486 under the assumptions that they contain data using the current
487 default encoding. Decoding is done in "strict" mode.
Guido van Rossumd8225182000-03-10 22:33:05 +0000488
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000489 2. All other objects (including Unicode objects) raise an
490 exception.
Guido van Rossumd8225182000-03-10 22:33:05 +0000491
492 The API returns NULL in case of an error. The caller is responsible
493 for decref'ing the returned objects.
494
495*/
496
Mark Hammond91a681d2002-08-12 07:21:58 +0000497PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 register PyObject *obj, /* Object */
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000499 const char *encoding, /* encoding */
500 const char *errors /* error handling */
501 );
502
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000503/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000504 *incremented* refcount.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000506 Unicode objects are passed back as-is (subclasses are converted to
507 true Unicode objects), all other objects are delegated to
508 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
Georg Brandl952867a2010-06-27 10:17:12 +0000509 using UTF-8 encoding as basis for decoding the object.
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000510
511 The API returns NULL in case of an error. The caller is responsible
512 for decref'ing the returned objects.
513
514*/
515
Mark Hammond91a681d2002-08-12 07:21:58 +0000516PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 register PyObject *obj /* Object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000518 );
519
Victor Stinner1205f272010-09-11 00:54:47 +0000520PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
521 const char *format, /* ASCII-encoded string */
522 va_list vargs
523 );
524PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
525 const char *format, /* ASCII-encoded string */
526 ...
527 );
Walter Dörwaldd2034312007-05-18 16:29:38 +0000528
Eric Smith4a7d76d2008-05-30 18:10:19 +0000529/* Format the object based on the format_spec, as defined in PEP 3101
530 (Advanced String Formatting). */
531PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
532 Py_UNICODE *format_spec,
533 Py_ssize_t format_spec_len);
534
Walter Dörwald16807132007-05-25 13:52:07 +0000535PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
536PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
537PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(const char *);
538PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
539
540/* Use only if you know it's a string */
541#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
542
Guido van Rossumd8225182000-03-10 22:33:05 +0000543/* --- wchar_t support for platforms which support it --------------------- */
544
545#ifdef HAVE_WCHAR_H
546
Georg Brandl952867a2010-06-27 10:17:12 +0000547/* Create a Unicode Object from the wchar_t buffer w of the given
Guido van Rossumd8225182000-03-10 22:33:05 +0000548 size.
549
550 The buffer is copied into the new object. */
551
Mark Hammond91a681d2002-08-12 07:21:58 +0000552PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000553 register const wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000555 );
556
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000557/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossumd8225182000-03-10 22:33:05 +0000558 most size wchar_t characters are copied.
559
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000560 Note that the resulting wchar_t string may or may not be
561 0-terminated. It is the responsibility of the caller to make sure
562 that the wchar_t string is 0-terminated in case this is required by
563 the application.
564
565 Returns the number of wchar_t characters copied (excluding a
566 possibly trailing 0-termination character) or -1 in case of an
Guido van Rossumd8225182000-03-10 22:33:05 +0000567 error. */
568
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000570 PyUnicodeObject *unicode, /* Unicode object */
571 register wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000572 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000573 );
574
Victor Stinner137c34c2010-09-29 10:25:54 +0000575/* Convert the Unicode object to a wide character string. The output string
576 always ends with a nul character. If size is not NULL, write the number of
577 wide characters (including the nul character) into *size.
578
579 Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
580 on success. On error, returns NULL, *size is undefined and raises a
581 MemoryError. */
582
583PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Victor Stinnerbeb4135b2010-10-07 01:02:42 +0000584 PyObject *unicode, /* Unicode object */
Victor Stinner137c34c2010-09-29 10:25:54 +0000585 Py_ssize_t *size /* number of characters of the result */
586 );
587
Guido van Rossumd8225182000-03-10 22:33:05 +0000588#endif
589
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000590/* --- Unicode ordinals --------------------------------------------------- */
591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592/* Create a Unicode Object from the given Unicode code point ordinal.
593
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000594 The ordinal must be in range(0x10000) on narrow Python builds
595 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
596 raised in case it is not.
597
598*/
599
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000600PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000601
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000602/* --- Free-list management ----------------------------------------------- */
603
604/* Clear the free list used by the Unicode implementation.
605
606 This can be used to release memory used for objects on the free
607 list back to the Python memory allocator.
608
609*/
610
611PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613/* === Builtin Codecs =====================================================
Guido van Rossumd8225182000-03-10 22:33:05 +0000614
615 Many of these APIs take two arguments encoding and errors. These
616 parameters encoding and errors have the same semantics as the ones
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 of the builtin unicode() API.
Guido van Rossumd8225182000-03-10 22:33:05 +0000618
Georg Brandl952867a2010-06-27 10:17:12 +0000619 Setting encoding to NULL causes the default encoding (UTF-8) to be used.
Guido van Rossumd8225182000-03-10 22:33:05 +0000620
621 Error handling is set by errors which may also be set to NULL
622 meaning to use the default handling defined for the codec. Default
623 error handling for all builtin codecs is "strict" (ValueErrors are
624 raised).
625
626 The codecs all use a similar interface. Only deviation from the
627 generic ones are documented.
628
629*/
630
Fred Drakecb093fe2000-05-09 19:51:53 +0000631/* --- Manage the default encoding ---------------------------------------- */
632
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000633/* Return a Python string holding the default encoded value of the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 Unicode object.
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000635
636 The resulting string is cached in the Unicode object for subsequent
637 usage by this function. The cached version is needed to implement
638 the character buffer interface and will live (at least) as long as
639 the Unicode object itself.
640
641 The refcount of the string is *not* incremented.
642
643 *** Exported for internal use by the interpreter only !!! ***
644
645*/
646
Mark Hammond91a681d2002-08-12 07:21:58 +0000647PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000648 PyObject *unicode,
649 const char *errors);
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000650
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000651/* Returns a pointer to the default encoding (normally, UTF-8) of the
652 Unicode object unicode and the size of the encoded representation
653 in bytes stored in *size.
Christian Heimes5894ba72007-11-04 11:43:14 +0000654
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000655 In case of an error, no *size is set.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000656
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000657 *** This API is for interpreter INTERNAL USE ONLY and will likely
658 *** be removed or changed for Python 3.1.
659
660 *** If you need to access the Unicode object as UTF-8 bytes string,
661 *** please use PyUnicode_AsUTF8String() instead.
662
Martin v. Löwis5b222132007-06-10 09:51:05 +0000663*/
664
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000665PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 PyObject *unicode,
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000667 Py_ssize_t *size);
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000668
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000669/* Returns a pointer to the default encoding (normally, UTf-8) of the
670 Unicode object unicode.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000671
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000672 Use of this API is DEPRECATED since no size information can be
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000673 extracted from the returned data.
674
675 *** This API is for interpreter INTERNAL USE ONLY and will likely
676 *** be removed or changed for Python 3.1.
677
678 *** If you need to access the Unicode object as UTF-8 bytes string,
679 *** please use PyUnicode_AsUTF8String() instead.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000680
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000681*/
Martin v. Löwis5b222132007-06-10 09:51:05 +0000682
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000683PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
Martin v. Löwis5b222132007-06-10 09:51:05 +0000684
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000685/* Returns the currently active default encoding.
Fred Drakecb093fe2000-05-09 19:51:53 +0000686
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000687 The default encoding is currently implemented as run-time settable
688 process global. This may change in future versions of the
689 interpreter to become a parameter which is managed on a per-thread
690 basis.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691
Fred Drakecb093fe2000-05-09 19:51:53 +0000692 */
693
Mark Hammond91a681d2002-08-12 07:21:58 +0000694PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drakecb093fe2000-05-09 19:51:53 +0000695
Guido van Rossumd8225182000-03-10 22:33:05 +0000696/* --- Generic Codecs ----------------------------------------------------- */
697
698/* Create a Unicode object by decoding the encoded string s of the
699 given size. */
700
Mark Hammond91a681d2002-08-12 07:21:58 +0000701PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000702 const char *s, /* encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703 Py_ssize_t size, /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000704 const char *encoding, /* encoding */
705 const char *errors /* error handling */
706 );
707
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000708/* Decode a Unicode object unicode and return the result as Python
709 object. */
710
711PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 PyObject *unicode, /* Unicode object */
713 const char *encoding, /* encoding */
714 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000715 );
716
717/* Decode a Unicode object unicode and return the result as Unicode
718 object. */
719
720PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 PyObject *unicode, /* Unicode object */
722 const char *encoding, /* encoding */
723 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000724 );
725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726/* Encodes a Py_UNICODE buffer of the given size and returns a
Guido van Rossumd8225182000-03-10 22:33:05 +0000727 Python string object. */
728
Mark Hammond91a681d2002-08-12 07:21:58 +0000729PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000730 const Py_UNICODE *s, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000731 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000732 const char *encoding, /* encoding */
733 const char *errors /* error handling */
734 );
735
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000736/* Encodes a Unicode object and returns the result as Python
737 object. */
738
739PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 PyObject *unicode, /* Unicode object */
741 const char *encoding, /* encoding */
742 const char *errors /* error handling */
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000743 );
744
Guido van Rossumd8225182000-03-10 22:33:05 +0000745/* Encodes a Unicode object and returns the result as Python string
746 object. */
747
Mark Hammond91a681d2002-08-12 07:21:58 +0000748PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 PyObject *unicode, /* Unicode object */
750 const char *encoding, /* encoding */
751 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000752 );
753
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000754/* Encodes a Unicode object and returns the result as Unicode
755 object. */
756
757PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 PyObject *unicode, /* Unicode object */
759 const char *encoding, /* encoding */
760 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000761 );
762
763/* Build an encoding map. */
764
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000765PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
766 PyObject* string /* 256 character map */
767 );
768
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000769/* --- UTF-7 Codecs ------------------------------------------------------- */
770
Mark Hammond91a681d2002-08-12 07:21:58 +0000771PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 const char *string, /* UTF-7 encoded string */
773 Py_ssize_t length, /* size of string */
774 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000775 );
776
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000777PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 const char *string, /* UTF-7 encoded string */
779 Py_ssize_t length, /* size of string */
780 const char *errors, /* error handling */
781 Py_ssize_t *consumed /* bytes consumed */
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000782 );
783
Mark Hammond91a681d2002-08-12 07:21:58 +0000784PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 const Py_UNICODE *data, /* Unicode char buffer */
786 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
787 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
788 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
789 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000790 );
791
Guido van Rossumd8225182000-03-10 22:33:05 +0000792/* --- UTF-8 Codecs ------------------------------------------------------- */
793
Mark Hammond91a681d2002-08-12 07:21:58 +0000794PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 const char *string, /* UTF-8 encoded string */
796 Py_ssize_t length, /* size of string */
797 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000798 );
799
Walter Dörwald69652032004-09-07 20:24:22 +0000800PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 const char *string, /* UTF-8 encoded string */
802 Py_ssize_t length, /* size of string */
803 const char *errors, /* error handling */
804 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000805 );
806
Mark Hammond91a681d2002-08-12 07:21:58 +0000807PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000809 );
810
Mark Hammond91a681d2002-08-12 07:21:58 +0000811PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 const Py_UNICODE *data, /* Unicode char buffer */
813 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
814 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000815 );
816
Walter Dörwald41980ca2007-08-16 21:55:45 +0000817/* --- UTF-32 Codecs ------------------------------------------------------ */
818
819/* Decodes length bytes from a UTF-32 encoded buffer string and returns
820 the corresponding Unicode object.
821
822 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 to "strict".
Walter Dörwald41980ca2007-08-16 21:55:45 +0000824
825 If byteorder is non-NULL, the decoder starts decoding using the
826 given byte order:
827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 *byteorder == -1: little endian
829 *byteorder == 0: native order
830 *byteorder == 1: big endian
Walter Dörwald41980ca2007-08-16 21:55:45 +0000831
832 In native mode, the first four bytes of the stream are checked for a
833 BOM mark. If found, the BOM mark is analysed, the byte order
834 adjusted and the BOM skipped. In the other modes, no BOM mark
835 interpretation is done. After completion, *byteorder is set to the
836 current byte order at the end of input data.
837
838 If byteorder is NULL, the codec starts in native order mode.
839
840*/
841
842PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 const char *string, /* UTF-32 encoded string */
844 Py_ssize_t length, /* size of string */
845 const char *errors, /* error handling */
846 int *byteorder /* pointer to byteorder to use
847 0=native;-1=LE,1=BE; updated on
848 exit */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000849 );
850
851PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 const char *string, /* UTF-32 encoded string */
853 Py_ssize_t length, /* size of string */
854 const char *errors, /* error handling */
855 int *byteorder, /* pointer to byteorder to use
856 0=native;-1=LE,1=BE; updated on
857 exit */
858 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000859 );
860
861/* Returns a Python string using the UTF-32 encoding in native byte
862 order. The string always starts with a BOM mark. */
863
864PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 PyObject *unicode /* Unicode object */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000866 );
867
868/* Returns a Python string object holding the UTF-32 encoded value of
869 the Unicode data.
870
871 If byteorder is not 0, output is written according to the following
872 byte order:
873
874 byteorder == -1: little endian
875 byteorder == 0: native byte order (writes a BOM mark)
876 byteorder == 1: big endian
877
878 If byteorder is 0, the output string will always start with the
879 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
880 prepended.
881
882*/
883
884PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 const Py_UNICODE *data, /* Unicode char buffer */
886 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
887 const char *errors, /* error handling */
888 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000889 );
890
Guido van Rossumd8225182000-03-10 22:33:05 +0000891/* --- UTF-16 Codecs ------------------------------------------------------ */
892
Guido van Rossum9e896b32000-04-05 20:11:21 +0000893/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossumd8225182000-03-10 22:33:05 +0000894 the corresponding Unicode object.
895
896 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 to "strict".
Guido van Rossumd8225182000-03-10 22:33:05 +0000898
899 If byteorder is non-NULL, the decoder starts decoding using the
900 given byte order:
901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 *byteorder == -1: little endian
903 *byteorder == 0: native order
904 *byteorder == 1: big endian
Guido van Rossumd8225182000-03-10 22:33:05 +0000905
Marc-André Lemburg489b56e2001-05-21 20:30:15 +0000906 In native mode, the first two bytes of the stream are checked for a
907 BOM mark. If found, the BOM mark is analysed, the byte order
908 adjusted and the BOM skipped. In the other modes, no BOM mark
909 interpretation is done. After completion, *byteorder is set to the
910 current byte order at the end of input data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000911
912 If byteorder is NULL, the codec starts in native order mode.
913
914*/
915
Mark Hammond91a681d2002-08-12 07:21:58 +0000916PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 const char *string, /* UTF-16 encoded string */
918 Py_ssize_t length, /* size of string */
919 const char *errors, /* error handling */
920 int *byteorder /* pointer to byteorder to use
921 0=native;-1=LE,1=BE; updated on
922 exit */
Guido van Rossumd8225182000-03-10 22:33:05 +0000923 );
924
Walter Dörwald69652032004-09-07 20:24:22 +0000925PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 const char *string, /* UTF-16 encoded string */
927 Py_ssize_t length, /* size of string */
928 const char *errors, /* error handling */
929 int *byteorder, /* pointer to byteorder to use
930 0=native;-1=LE,1=BE; updated on
931 exit */
932 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000933 );
934
Guido van Rossumd8225182000-03-10 22:33:05 +0000935/* Returns a Python string using the UTF-16 encoding in native byte
936 order. The string always starts with a BOM mark. */
937
Mark Hammond91a681d2002-08-12 07:21:58 +0000938PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000940 );
941
942/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum9e896b32000-04-05 20:11:21 +0000943 the Unicode data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000944
945 If byteorder is not 0, output is written according to the following
946 byte order:
947
948 byteorder == -1: little endian
949 byteorder == 0: native byte order (writes a BOM mark)
950 byteorder == 1: big endian
951
952 If byteorder is 0, the output string will always start with the
953 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
954 prepended.
955
956 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
957 UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters7e474022000-07-16 12:04:32 +0000958 at a later point without compromising the APIs.
Guido van Rossumd8225182000-03-10 22:33:05 +0000959
960*/
961
Mark Hammond91a681d2002-08-12 07:21:58 +0000962PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 const Py_UNICODE *data, /* Unicode char buffer */
964 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
965 const char *errors, /* error handling */
966 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Guido van Rossumd8225182000-03-10 22:33:05 +0000967 );
968
969/* --- Unicode-Escape Codecs ---------------------------------------------- */
970
Mark Hammond91a681d2002-08-12 07:21:58 +0000971PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 const char *string, /* Unicode-Escape encoded string */
973 Py_ssize_t length, /* size of string */
974 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000975 );
976
Mark Hammond91a681d2002-08-12 07:21:58 +0000977PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000979 );
980
Mark Hammond91a681d2002-08-12 07:21:58 +0000981PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 const Py_UNICODE *data, /* Unicode char buffer */
983 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000984 );
985
986/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
987
Mark Hammond91a681d2002-08-12 07:21:58 +0000988PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 const char *string, /* Raw-Unicode-Escape encoded string */
990 Py_ssize_t length, /* size of string */
991 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000992 );
993
Mark Hammond91a681d2002-08-12 07:21:58 +0000994PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000996 );
997
Mark Hammond91a681d2002-08-12 07:21:58 +0000998PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 const Py_UNICODE *data, /* Unicode char buffer */
1000 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +00001001 );
1002
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001003/* --- Unicode Internal Codec ---------------------------------------------
1004
1005 Only for internal use in _codecsmodule.c */
1006
1007PyObject *_PyUnicode_DecodeUnicodeInternal(
1008 const char *string,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001009 Py_ssize_t length,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001010 const char *errors
1011 );
1012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013/* --- Latin-1 Codecs -----------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001014
1015 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1016
1017*/
1018
Mark Hammond91a681d2002-08-12 07:21:58 +00001019PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 const char *string, /* Latin-1 encoded string */
1021 Py_ssize_t length, /* size of string */
1022 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001023 );
1024
Mark Hammond91a681d2002-08-12 07:21:58 +00001025PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001027 );
1028
Mark Hammond91a681d2002-08-12 07:21:58 +00001029PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 const Py_UNICODE *data, /* Unicode char buffer */
1031 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1032 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001033 );
1034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035/* --- ASCII Codecs -------------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001036
1037 Only 7-bit ASCII data is excepted. All other codes generate errors.
1038
1039*/
1040
Mark Hammond91a681d2002-08-12 07:21:58 +00001041PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 const char *string, /* ASCII encoded string */
1043 Py_ssize_t length, /* size of string */
1044 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001045 );
1046
Mark Hammond91a681d2002-08-12 07:21:58 +00001047PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001049 );
1050
Mark Hammond91a681d2002-08-12 07:21:58 +00001051PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 const Py_UNICODE *data, /* Unicode char buffer */
1053 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1054 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001055 );
1056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057/* --- Character Map Codecs -----------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 This codec uses mappings to encode and decode characters.
Guido van Rossumd8225182000-03-10 22:33:05 +00001060
1061 Decoding mappings must map single string characters to single
1062 Unicode characters, integers (which are then interpreted as Unicode
1063 ordinals) or None (meaning "undefined mapping" and causing an
1064 error).
1065
1066 Encoding mappings must map single Unicode characters to single
1067 string characters, integers (which are then interpreted as Latin-1
1068 ordinals) or None (meaning "undefined mapping" and causing an
1069 error).
1070
1071 If a character lookup fails with a LookupError, the character is
1072 copied as-is meaning that its ordinal value will be interpreted as
1073 Unicode or Latin-1 ordinal resp. Because of this mappings only need
1074 to contain those mappings which map characters to different code
1075 points.
1076
1077*/
1078
Mark Hammond91a681d2002-08-12 07:21:58 +00001079PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 const char *string, /* Encoded string */
1081 Py_ssize_t length, /* size of string */
1082 PyObject *mapping, /* character mapping
1083 (char ordinal -> unicode ordinal) */
1084 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001085 );
1086
Mark Hammond91a681d2002-08-12 07:21:58 +00001087PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 PyObject *unicode, /* Unicode object */
1089 PyObject *mapping /* character mapping
1090 (unicode ordinal -> char ordinal) */
Guido van Rossumd8225182000-03-10 22:33:05 +00001091 );
1092
Mark Hammond91a681d2002-08-12 07:21:58 +00001093PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 const Py_UNICODE *data, /* Unicode char buffer */
1095 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1096 PyObject *mapping, /* character mapping
1097 (unicode ordinal -> char ordinal) */
1098 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001099 );
1100
1101/* Translate a Py_UNICODE buffer of the given length by applying a
1102 character mapping table to it and return the resulting Unicode
1103 object.
1104
1105 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001107
1108 Mapping tables may be dictionaries or sequences. Unmapped character
1109 ordinals (ones which cause a LookupError) are left untouched and
1110 are copied as-is.
1111
1112*/
1113
Mark Hammond91a681d2002-08-12 07:21:58 +00001114PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 const Py_UNICODE *data, /* Unicode char buffer */
1116 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1117 PyObject *table, /* Translate table */
1118 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001119 );
1120
Guido van Rossumefec1152000-03-28 02:01:15 +00001121#ifdef MS_WIN32
Guido van Rossum24bdb042000-03-28 20:29:59 +00001122
Guido van Rossumefec1152000-03-28 02:01:15 +00001123/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001124
Mark Hammond91a681d2002-08-12 07:21:58 +00001125PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001126 const char *string, /* MBCS encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001127 Py_ssize_t length, /* size of string */
Guido van Rossumefec1152000-03-28 02:01:15 +00001128 const char *errors /* error handling */
1129 );
1130
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001131PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1132 const char *string, /* MBCS encoded string */
1133 Py_ssize_t length, /* size of string */
1134 const char *errors, /* error handling */
1135 Py_ssize_t *consumed /* bytes consumed */
1136 );
1137
Mark Hammond91a681d2002-08-12 07:21:58 +00001138PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossumefec1152000-03-28 02:01:15 +00001139 PyObject *unicode /* Unicode object */
1140 );
1141
Mark Hammond91a681d2002-08-12 07:21:58 +00001142PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001143 const Py_UNICODE *data, /* Unicode char buffer */
Neal Norwitzd78f6cf2007-08-08 04:49:37 +00001144 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumefec1152000-03-28 02:01:15 +00001145 const char *errors /* error handling */
1146 );
1147
Guido van Rossumefec1152000-03-28 02:01:15 +00001148#endif /* MS_WIN32 */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001149
Guido van Rossum9e896b32000-04-05 20:11:21 +00001150/* --- Decimal Encoder ---------------------------------------------------- */
1151
1152/* Takes a Unicode string holding a decimal value and writes it into
1153 an output buffer using standard ASCII digit codes.
1154
1155 The output buffer has to provide at least length+1 bytes of storage
1156 area. The output string is 0-terminated.
1157
1158 The encoder converts whitespace to ' ', decimal characters to their
1159 corresponding ASCII digit and all other Latin-1 characters except
1160 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1161 are treated as errors. This includes embedded NULL bytes.
1162
1163 Error handling is defined by the errors argument:
1164
1165 NULL or "strict": raise a ValueError
1166 "ignore": ignore the wrong characters (these are not copied to the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 output buffer)
Guido van Rossum9e896b32000-04-05 20:11:21 +00001168 "replace": replaces illegal characters with '?'
1169
1170 Returns 0 on success, -1 on failure.
1171
1172*/
1173
Mark Hammond91a681d2002-08-12 07:21:58 +00001174PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 Py_UNICODE *s, /* Unicode buffer */
1176 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1177 char *output, /* Output buffer; must have size >= length */
1178 const char *errors /* error handling */
Guido van Rossum9e896b32000-04-05 20:11:21 +00001179 );
1180
Martin v. Löwis011e8422009-05-05 04:43:17 +00001181/* --- File system encoding ---------------------------------------------- */
1182
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001183/* ParseTuple converter: encode str objects to bytes using
1184 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
Martin v. Löwis011e8422009-05-05 04:43:17 +00001185
1186PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1187
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001188/* ParseTuple converter: decode bytes objects to unicode using
1189 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1190
1191PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1192
Victor Stinner77c38622010-05-14 15:58:55 +00001193/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1194 and the "surrogateescape" error handler.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001195
Victor Stinner77c38622010-05-14 15:58:55 +00001196 If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001197
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001198 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001199*/
1200
1201PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1202 const char *s /* encoded string */
1203 );
1204
Victor Stinner77c38622010-05-14 15:58:55 +00001205/* Decode a string using Py_FileSystemDefaultEncoding
1206 and the "surrogateescape" error handler.
1207
1208 If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
1209*/
1210
Martin v. Löwis011e8422009-05-05 04:43:17 +00001211PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1212 const char *s, /* encoded string */
1213 Py_ssize_t size /* size */
1214 );
1215
Victor Stinnerae6265f2010-05-15 16:27:27 +00001216/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001217 "surrogateescape" error handler, and return bytes.
Victor Stinnerae6265f2010-05-15 16:27:27 +00001218
1219 If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
1220*/
1221
1222PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1223 PyObject *unicode
1224 );
1225
Guido van Rossumd8225182000-03-10 22:33:05 +00001226/* --- Methods & Slots ----------------------------------------------------
1227
1228 These are capable of handling Unicode objects and strings on input
1229 (we refer to them as strings in the descriptions) and return
1230 Unicode objects or integers as apporpriate. */
1231
1232/* Concat two strings giving a new Unicode string. */
1233
Mark Hammond91a681d2002-08-12 07:21:58 +00001234PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 PyObject *left, /* Left string */
1236 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001237 );
1238
Walter Dörwald1ab83302007-05-18 17:15:44 +00001239/* Concat two strings and put the result in *pleft
1240 (sets *pleft to NULL on error) */
1241
1242PyAPI_FUNC(void) PyUnicode_Append(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 PyObject **pleft, /* Pointer to left string */
1244 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001245 );
1246
1247/* Concat two strings, put the result in *pleft and drop the right object
1248 (sets *pleft to NULL on error) */
1249
1250PyAPI_FUNC(void) PyUnicode_AppendAndDel(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 PyObject **pleft, /* Pointer to left string */
1252 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001253 );
1254
Guido van Rossumd8225182000-03-10 22:33:05 +00001255/* Split a string giving a list of Unicode strings.
1256
1257 If sep is NULL, splitting will be done at all whitespace
1258 substrings. Otherwise, splits occur at the given separator.
1259
1260 At most maxsplit splits will be done. If negative, no limit is set.
1261
1262 Separators are not included in the resulting list.
1263
1264*/
1265
Mark Hammond91a681d2002-08-12 07:21:58 +00001266PyAPI_FUNC(PyObject*) PyUnicode_Split(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 PyObject *s, /* String to split */
1268 PyObject *sep, /* String separator */
1269 Py_ssize_t maxsplit /* Maxsplit count */
1270 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001271
1272/* Dito, but split at line breaks.
1273
1274 CRLF is considered to be one line break. Line breaks are not
1275 included in the resulting list. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276
Mark Hammond91a681d2002-08-12 07:21:58 +00001277PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 PyObject *s, /* String to split */
1279 int keepends /* If true, line end markers are included */
1280 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001281
Thomas Wouters477c8d52006-05-27 19:21:47 +00001282/* Partition a string using a given separator. */
1283
1284PyAPI_FUNC(PyObject*) PyUnicode_Partition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 PyObject *s, /* String to partition */
1286 PyObject *sep /* String separator */
1287 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001288
1289/* Partition a string using a given separator, searching from the end of the
1290 string. */
1291
1292PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 PyObject *s, /* String to partition */
1294 PyObject *sep /* String separator */
1295 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001296
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001297/* Split a string giving a list of Unicode strings.
1298
1299 If sep is NULL, splitting will be done at all whitespace
1300 substrings. Otherwise, splits occur at the given separator.
1301
1302 At most maxsplit splits will be done. But unlike PyUnicode_Split
1303 PyUnicode_RSplit splits from the end of the string. If negative,
1304 no limit is set.
1305
1306 Separators are not included in the resulting list.
1307
1308*/
1309
1310PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 PyObject *s, /* String to split */
1312 PyObject *sep, /* String separator */
1313 Py_ssize_t maxsplit /* Maxsplit count */
1314 );
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001315
Guido van Rossumd8225182000-03-10 22:33:05 +00001316/* Translate a string by applying a character mapping table to it and
1317 return the resulting Unicode object.
1318
1319 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001321
1322 Mapping tables may be dictionaries or sequences. Unmapped character
1323 ordinals (ones which cause a LookupError) are left untouched and
1324 are copied as-is.
1325
1326*/
1327
Mark Hammond91a681d2002-08-12 07:21:58 +00001328PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 PyObject *str, /* String */
1330 PyObject *table, /* Translate table */
1331 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001332 );
1333
1334/* Join a sequence of strings using the given separator and return
1335 the resulting Unicode string. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336
Mark Hammond91a681d2002-08-12 07:21:58 +00001337PyAPI_FUNC(PyObject*) PyUnicode_Join(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 PyObject *separator, /* Separator string */
1339 PyObject *seq /* Sequence object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001340 );
1341
1342/* Return 1 if substr matches str[start:end] at the given tail end, 0
1343 otherwise. */
1344
Martin v. Löwis18e16552006-02-15 17:27:45 +00001345PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 PyObject *str, /* String */
1347 PyObject *substr, /* Prefix or Suffix string */
1348 Py_ssize_t start, /* Start index */
1349 Py_ssize_t end, /* Stop index */
1350 int direction /* Tail end: -1 prefix, +1 suffix */
Guido van Rossumd8225182000-03-10 22:33:05 +00001351 );
1352
1353/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg4da6fd62002-05-29 11:33:13 +00001354 given search direction or -1 if not found. -2 is returned in case
1355 an error occurred and an exception is set. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001356
Martin v. Löwis18e16552006-02-15 17:27:45 +00001357PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 PyObject *str, /* String */
1359 PyObject *substr, /* Substring to find */
1360 Py_ssize_t start, /* Start index */
1361 Py_ssize_t end, /* Stop index */
1362 int direction /* Find direction: +1 forward, -1 backward */
Guido van Rossumd8225182000-03-10 22:33:05 +00001363 );
1364
Barry Warsaw51ac5802000-03-20 16:36:48 +00001365/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001366
Martin v. Löwis18e16552006-02-15 17:27:45 +00001367PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 PyObject *str, /* String */
1369 PyObject *substr, /* Substring to count */
1370 Py_ssize_t start, /* Start index */
1371 Py_ssize_t end /* Stop index */
Guido van Rossumd8225182000-03-10 22:33:05 +00001372 );
1373
Barry Warsaw51ac5802000-03-20 16:36:48 +00001374/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +00001375 and return the resulting Unicode object. */
1376
Mark Hammond91a681d2002-08-12 07:21:58 +00001377PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 PyObject *str, /* String */
1379 PyObject *substr, /* Substring to find */
1380 PyObject *replstr, /* Substring to replace */
1381 Py_ssize_t maxcount /* Max. number of replacements to apply;
1382 -1 = all */
Guido van Rossumd8225182000-03-10 22:33:05 +00001383 );
1384
1385/* Compare two strings and return -1, 0, 1 for less than, equal,
1386 greater than resp. */
1387
Mark Hammond91a681d2002-08-12 07:21:58 +00001388PyAPI_FUNC(int) PyUnicode_Compare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 PyObject *left, /* Left string */
1390 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001391 );
1392
Martin v. Löwis5b222132007-06-10 09:51:05 +00001393PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
1394 PyObject *left,
1395 const char *right
1396 );
1397
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001398/* Rich compare two strings and return one of the following:
1399
1400 - NULL in case an exception was raised
1401 - Py_True or Py_False for successfuly comparisons
1402 - Py_NotImplemented in case the type combination is unknown
1403
1404 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1405 case the conversion of the arguments to Unicode fails with a
1406 UnicodeDecodeError.
1407
1408 Possible values for op:
1409
1410 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1411
1412*/
1413
1414PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 PyObject *left, /* Left string */
1416 PyObject *right, /* Right string */
1417 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001418 );
1419
Thomas Wouters7e474022000-07-16 12:04:32 +00001420/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossumd8225182000-03-10 22:33:05 +00001421 the resulting Unicode string. */
1422
Mark Hammond91a681d2002-08-12 07:21:58 +00001423PyAPI_FUNC(PyObject *) PyUnicode_Format(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 PyObject *format, /* Format string */
1425 PyObject *args /* Argument tuple or dictionary */
Guido van Rossumd8225182000-03-10 22:33:05 +00001426 );
1427
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001428/* Checks whether element is contained in container and return 1/0
1429 accordingly.
1430
1431 element has to coerce to an one element Unicode string. -1 is
1432 returned in case of an error. */
1433
Mark Hammond91a681d2002-08-12 07:21:58 +00001434PyAPI_FUNC(int) PyUnicode_Contains(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 PyObject *container, /* Container string */
1436 PyObject *element /* Element string */
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001437 );
1438
Martin v. Löwis47383402007-08-15 07:32:56 +00001439/* Checks whether argument is a valid identifier. */
1440
1441PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1442
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001443/* Externally visible for str.strip(unicode) */
Mark Hammond91a681d2002-08-12 07:21:58 +00001444PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001445 PyUnicodeObject *self,
1446 int striptype,
1447 PyObject *sepobj
1448 );
1449
Eric Smith5807c412008-05-11 21:00:57 +00001450/* Using the current locale, insert the thousands grouping
1451 into the string pointed to by buffer. For the argument descriptions,
1452 see Objects/stringlib/localeutil.h */
1453
Eric Smith0923d1d2009-04-16 20:16:10 +00001454PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
1455 Py_ssize_t n_buffer,
1456 Py_UNICODE *digits,
1457 Py_ssize_t n_digits,
1458 Py_ssize_t min_width);
Eric Smith5807c412008-05-11 21:00:57 +00001459
Eric Smitha3b1ac82009-04-03 14:45:06 +00001460/* Using explicit passed-in values, insert the thousands grouping
1461 into the string pointed to by buffer. For the argument descriptions,
1462 see Objects/stringlib/localeutil.h */
Eric Smith0923d1d2009-04-16 20:16:10 +00001463PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
1464 Py_ssize_t n_buffer,
1465 Py_UNICODE *digits,
1466 Py_ssize_t n_digits,
1467 Py_ssize_t min_width,
1468 const char *grouping,
1469 const char *thousands_sep);
Guido van Rossumd8225182000-03-10 22:33:05 +00001470/* === Characters Type APIs =============================================== */
1471
Benjamin Peterson960cf0f2009-01-09 04:11:44 +00001472/* Helper array used by Py_UNICODE_ISSPACE(). */
1473
1474PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1475
Guido van Rossumd8225182000-03-10 22:33:05 +00001476/* These should not be used directly. Use the Py_UNICODE_IS* and
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 Py_UNICODE_TO* macros instead.
Guido van Rossumd8225182000-03-10 22:33:05 +00001478
1479 These APIs are implemented in Objects/unicodectype.c.
1480
1481*/
1482
Mark Hammond91a681d2002-08-12 07:21:58 +00001483PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001484 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001485 );
1486
Mark Hammond91a681d2002-08-12 07:21:58 +00001487PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001488 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001489 );
1490
Mark Hammond91a681d2002-08-12 07:21:58 +00001491PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001492 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001493 );
1494
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001495PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001496 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001497 );
1498
1499PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001500 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001501 );
1502
Mark Hammond91a681d2002-08-12 07:21:58 +00001503PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001504 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001505 );
1506
Mark Hammond91a681d2002-08-12 07:21:58 +00001507PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001508 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001509 );
1510
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001511PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1512 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001513 );
1514
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001515PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1516 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001517 );
1518
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001519PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1520 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001521 );
1522
Mark Hammond91a681d2002-08-12 07:21:58 +00001523PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001524 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001525 );
1526
Mark Hammond91a681d2002-08-12 07:21:58 +00001527PyAPI_FUNC(int) _PyUnicode_ToDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001528 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001529 );
1530
Mark Hammond91a681d2002-08-12 07:21:58 +00001531PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001532 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001533 );
1534
Mark Hammond91a681d2002-08-12 07:21:58 +00001535PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001536 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001537 );
1538
Mark Hammond91a681d2002-08-12 07:21:58 +00001539PyAPI_FUNC(int) _PyUnicode_IsDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001540 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001541 );
1542
Mark Hammond91a681d2002-08-12 07:21:58 +00001543PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001544 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001545 );
1546
Georg Brandl559e5d72008-06-11 18:37:52 +00001547PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001548 Py_UCS4 ch /* Unicode character */
Georg Brandl559e5d72008-06-11 18:37:52 +00001549 );
1550
Mark Hammond91a681d2002-08-12 07:21:58 +00001551PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001552 Py_UCS4 ch /* Unicode character */
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001553 );
1554
Victor Stinneref8d95c2010-08-16 22:03:11 +00001555PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1556 const Py_UNICODE *u
1557 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001558
1559PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001560 Py_UNICODE *s1,
1561 const Py_UNICODE *s2);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001562
Victor Stinnerc4eb7652010-09-01 23:43:50 +00001563PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1564 Py_UNICODE *s1, const Py_UNICODE *s2);
1565
Martin v. Löwis5b222132007-06-10 09:51:05 +00001566PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001567 Py_UNICODE *s1,
1568 const Py_UNICODE *s2,
1569 size_t n);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001570
1571PyAPI_FUNC(int) Py_UNICODE_strcmp(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001572 const Py_UNICODE *s1,
1573 const Py_UNICODE *s2
1574 );
1575
1576PyAPI_FUNC(int) Py_UNICODE_strncmp(
1577 const Py_UNICODE *s1,
1578 const Py_UNICODE *s2,
1579 size_t n
1580 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001581
1582PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001583 const Py_UNICODE *s,
1584 Py_UNICODE c
Martin v. Löwis5b222132007-06-10 09:51:05 +00001585 );
1586
Victor Stinner331ea922010-08-10 16:37:20 +00001587PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001588 const Py_UNICODE *s,
1589 Py_UNICODE c
Victor Stinner331ea922010-08-10 16:37:20 +00001590 );
1591
Victor Stinner71133ff2010-09-01 23:43:53 +00001592/* Create a copy of a unicode string ending with a nul character. Return NULL
1593 and raise a MemoryError exception on memory allocation failure, otherwise
1594 return a new allocated buffer (use PyMem_Free() to free the buffer). */
1595
Victor Stinner46408602010-09-03 16:18:00 +00001596PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
Victor Stinner71133ff2010-09-01 23:43:53 +00001597 PyObject *unicode
1598 );
1599
Guido van Rossumd8225182000-03-10 22:33:05 +00001600#ifdef __cplusplus
1601}
1602#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001603#endif /* !Py_UNICODEOBJECT_H */