blob: 0a3cfc69ca1fddc68e21fd632cd89eb3f2cb1d8e [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
Christian Heimesaf98da12008-01-27 15:18:18 +00004#include <stdarg.h>
5
Guido van Rossumd8225182000-03-10 22:33:05 +00006/*
7
8Unicode implementation based on original code by Fredrik Lundh,
9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10Unicode Integration Proposal (see file Misc/unicode.txt).
11
Guido van Rossum16b1ad92000-08-03 16:24:25 +000012Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumd8225182000-03-10 22:33:05 +000013
14
15 Original header:
16 --------------------------------------------------------------------
17
18 * Yet another Unicode string type for Python. This type supports the
19 * 16-bit Basic Multilingual Plane (BMP) only.
20 *
21 * Written by Fredrik Lundh, January 1999.
22 *
23 * Copyright (c) 1999 by Secret Labs AB.
24 * Copyright (c) 1999 by Fredrik Lundh.
25 *
26 * fredrik@pythonware.com
27 * http://www.pythonware.com
28 *
29 * --------------------------------------------------------------------
30 * This Unicode String Type is
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000031 *
Guido van Rossumd8225182000-03-10 22:33:05 +000032 * Copyright (c) 1999 by Secret Labs AB
33 * Copyright (c) 1999 by Fredrik Lundh
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 *
Guido van Rossumd8225182000-03-10 22:33:05 +000035 * By obtaining, using, and/or copying this software and/or its
36 * associated documentation, you agree that you have read, understood,
37 * and will comply with the following terms and conditions:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 *
Guido van Rossumd8225182000-03-10 22:33:05 +000039 * Permission to use, copy, modify, and distribute this software and its
40 * associated documentation for any purpose and without fee is hereby
41 * granted, provided that the above copyright notice appears in all
42 * copies, and that both that copyright notice and this permission notice
43 * appear in supporting documentation, and that the name of Secret Labs
44 * AB or the author not be used in advertising or publicity pertaining to
45 * distribution of the software without specific, written prior
46 * permission.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 *
Guido van Rossumd8225182000-03-10 22:33:05 +000048 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
49 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
50 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
51 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
52 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
53 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
54 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
55 * -------------------------------------------------------------------- */
56
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +000057#include <ctype.h>
Guido van Rossumd8225182000-03-10 22:33:05 +000058
59/* === Internal API ======================================================= */
60
61/* --- Internal Unicode Format -------------------------------------------- */
62
Christian Heimes0625e892008-01-07 21:04:21 +000063/* Python 3.x requires unicode */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000064#define Py_USING_UNICODE
Christian Heimes0625e892008-01-07 21:04:21 +000065
Fredrik Lundh9b14ab32001-06-26 22:59:49 +000066/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
67 properly set, but the default rules below doesn't set it. I'll
68 sort this out some other day -- fredrik@pythonware.com */
69
70#ifndef Py_UNICODE_SIZE
71#error Must define Py_UNICODE_SIZE
72#endif
73
Fredrik Lundh8f455852001-06-27 18:59:43 +000074/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
75 strings are stored as UCS-2 (with limited support for UTF-16) */
76
77#if Py_UNICODE_SIZE >= 4
78#define Py_UNICODE_WIDE
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000079#endif
Fredrik Lundh1294ad02001-06-26 17:17:07 +000080
Amaury Forgeot d'Arcfeb73072010-09-12 22:42:57 +000081/* Set these flags if the platform has "wchar.h" and the
Guido van Rossumd8225182000-03-10 22:33:05 +000082 wchar_t type is a 16-bit unsigned type */
83/* #define HAVE_WCHAR_H */
84/* #define HAVE_USABLE_WCHAR_T */
85
86/* Defaults for various platforms */
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000087#ifndef PY_UNICODE_TYPE
Guido van Rossumd8225182000-03-10 22:33:05 +000088
Fredrik Lundh1294ad02001-06-26 17:17:07 +000089/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh8f455852001-06-27 18:59:43 +000090# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossumd8225182000-03-10 22:33:05 +000091# define HAVE_USABLE_WCHAR_T
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000092# define PY_UNICODE_TYPE wchar_t
93# endif
94
Fredrik Lundh8f455852001-06-27 18:59:43 +000095# if defined(Py_UNICODE_WIDE)
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000096# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossumd8225182000-03-10 22:33:05 +000097# endif
98
99#endif
100
101/* If the compiler provides a wchar_t type we try to support it
Victor Stinner137c34c2010-09-29 10:25:54 +0000102 through the interface functions PyUnicode_FromWideChar(),
103 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
Guido van Rossumd8225182000-03-10 22:33:05 +0000104
105#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg1a731c62000-08-11 11:43:10 +0000106# ifndef HAVE_WCHAR_H
107# define HAVE_WCHAR_H
108# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000109#endif
110
111#ifdef HAVE_WCHAR_H
Guido van Rossum24bdb042000-03-28 20:29:59 +0000112/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
113# ifdef _HAVE_BSDI
114# include <time.h>
115# endif
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +0000116# include <wchar.h>
Guido van Rossumd8225182000-03-10 22:33:05 +0000117#endif
118
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000119/*
120 * Use this typedef when you need to represent a UTF-16 surrogate pair
121 * as single unsigned integer.
122 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123#if SIZEOF_INT >= 4
124typedef unsigned int Py_UCS4;
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000125#elif SIZEOF_LONG >= 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126typedef unsigned long Py_UCS4;
Guido van Rossumd8225182000-03-10 22:33:05 +0000127#endif
128
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000129/* Py_UNICODE is the native Unicode storage format (code unit) used by
130 Python and represents a single Unicode element in the Unicode
131 type. */
132
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000133typedef PY_UNICODE_TYPE Py_UNICODE;
Marc-André Lemburg43279102000-07-07 09:01:41 +0000134
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000135/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
136
137/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
138 produce different external names and thus cause import errors in
139 case Python interpreters and extensions with mixed compiled in
140 Unicode width assumptions are combined. */
141
142#ifndef Py_UNICODE_WIDE
143
144# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
145# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000146# define PyUnicode_AsDecodedObject PyUnicodeUCS2_AsDecodedObject
147# define PyUnicode_AsDecodedUnicode PyUnicodeUCS2_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000148# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000149# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000150# define PyUnicode_AsEncodedUnicode PyUnicodeUCS2_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000151# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
152# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000153# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000154# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
155# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
156# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
157# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
158# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000159# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000160# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000161# define PyUnicode_Compare PyUnicodeUCS2_Compare
Victor Stinner09f24bb2010-10-24 20:38:25 +0000162# define PyUnicode_CompareWithASCIIString PyUnicodeUCS2_CompareWithASCIIString
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000163# define PyUnicode_Concat PyUnicodeUCS2_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000164# define PyUnicode_Append PyUnicodeUCS2_Append
165# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000166# define PyUnicode_Contains PyUnicodeUCS2_Contains
167# define PyUnicode_Count PyUnicodeUCS2_Count
168# define PyUnicode_Decode PyUnicodeUCS2_Decode
169# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
170# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
171# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000172# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000173# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000174# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000175# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
176# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000177# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000178# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000179# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000180# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000181# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
182# define PyUnicode_Encode PyUnicodeUCS2_Encode
183# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
184# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
185# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
186# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
187# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000188# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000189# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
190# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
191# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
192# define PyUnicode_Find PyUnicodeUCS2_Find
193# define PyUnicode_Format PyUnicodeUCS2_Format
194# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000195# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
196# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000197# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000198# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000199# define PyUnicode_FromString PyUnicodeUCS2_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000200# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000201# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
Walter Dörwald14176a52007-05-18 17:04:42 +0000202# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000203# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000204# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000205# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
206# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
207# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000208# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000209# define PyUnicode_Join PyUnicodeUCS2_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000210# define PyUnicode_Partition PyUnicodeUCS2_Partition
211# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
212# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000213# define PyUnicode_Replace PyUnicodeUCS2_Replace
214# define PyUnicode_Resize PyUnicodeUCS2_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000215# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000216# define PyUnicode_Split PyUnicodeUCS2_Split
217# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
218# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
219# define PyUnicode_Translate PyUnicodeUCS2_Translate
220# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
221# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
222# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
223# define _PyUnicode_Init _PyUnicodeUCS2_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000224# define PyUnicode_strdup PyUnicodeUCS2_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000225
226#else
227
228# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
229# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000230# define PyUnicode_AsDecodedObject PyUnicodeUCS4_AsDecodedObject
231# define PyUnicode_AsDecodedUnicode PyUnicodeUCS4_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000232# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000233# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000234# define PyUnicode_AsEncodedUnicode PyUnicodeUCS4_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000235# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
236# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000237# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000238# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
239# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
240# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
241# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
242# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000243# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000244# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000245# define PyUnicode_Compare PyUnicodeUCS4_Compare
Victor Stinner09f24bb2010-10-24 20:38:25 +0000246# define PyUnicode_CompareWithASCIIString PyUnicodeUCS4_CompareWithASCIIString
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000247# define PyUnicode_Concat PyUnicodeUCS4_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000248# define PyUnicode_Append PyUnicodeUCS4_Append
249# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000250# define PyUnicode_Contains PyUnicodeUCS4_Contains
251# define PyUnicode_Count PyUnicodeUCS4_Count
252# define PyUnicode_Decode PyUnicodeUCS4_Decode
253# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
254# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
255# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000256# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000257# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000258# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000259# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
260# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000261# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000262# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000263# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000264# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000265# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
266# define PyUnicode_Encode PyUnicodeUCS4_Encode
267# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
268# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
269# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
270# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
271# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000272# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000273# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
274# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
275# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
276# define PyUnicode_Find PyUnicodeUCS4_Find
277# define PyUnicode_Format PyUnicodeUCS4_Format
278# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000279# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
280# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000281# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000282# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000283# define PyUnicode_FromString PyUnicodeUCS4_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000284# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000285# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000286# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000287# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000288# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000289# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
290# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
291# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000292# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000293# define PyUnicode_Join PyUnicodeUCS4_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000294# define PyUnicode_Partition PyUnicodeUCS4_Partition
295# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
296# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000297# define PyUnicode_Replace PyUnicodeUCS4_Replace
298# define PyUnicode_Resize PyUnicodeUCS4_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000299# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000300# define PyUnicode_Split PyUnicodeUCS4_Split
301# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
302# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
303# define PyUnicode_Translate PyUnicodeUCS4_Translate
304# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
305# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
306# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
307# define _PyUnicode_Init _PyUnicodeUCS4_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000308# define PyUnicode_strdup PyUnicodeUCS4_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000309
310#endif
311
Guido van Rossumd8225182000-03-10 22:33:05 +0000312/* --- Internal Unicode Operations ---------------------------------------- */
313
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000314/* Since splitting on whitespace is an important use case, and
315 whitespace in most situations is solely ASCII whitespace, we
316 optimize for the common case by using a quick look-up table
317 _Py_ascii_whitespace (see below) with an inlined check.
Christian Heimes190d79e2008-01-30 11:58:22 +0000318
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000319 */
Christian Heimes190d79e2008-01-30 11:58:22 +0000320#define Py_UNICODE_ISSPACE(ch) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
Guido van Rossumd8225182000-03-10 22:33:05 +0000322
323#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
324#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
325#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
326#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
327
328#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
329#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
330#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
331
332#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
333#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
334#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
Georg Brandl559e5d72008-06-11 18:37:52 +0000335#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000336
337#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
338#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
339#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
340
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000341#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000342
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000343#define Py_UNICODE_ISALNUM(ch) \
344 (Py_UNICODE_ISALPHA(ch) || \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 Py_UNICODE_ISDECIMAL(ch) || \
346 Py_UNICODE_ISDIGIT(ch) || \
347 Py_UNICODE_ISNUMERIC(ch))
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349#define Py_UNICODE_COPY(target, source, length) \
350 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossumd8225182000-03-10 22:33:05 +0000351
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000352#define Py_UNICODE_FILL(target, value, length) \
353 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
Thomas Wouters477c8d52006-05-27 19:21:47 +0000355 } while (0)
Guido van Rossumd8225182000-03-10 22:33:05 +0000356
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000357/* Check if substring matches at given offset. the offset must be
Thomas Wouters477c8d52006-05-27 19:21:47 +0000358 valid, and the substring must not be empty */
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000359
Thomas Wouters477c8d52006-05-27 19:21:47 +0000360#define Py_UNICODE_MATCH(string, offset, substring) \
361 ((*((string)->str + (offset)) == *((substring)->str)) && \
362 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
363 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000364
Barry Warsaw51ac5802000-03-20 16:36:48 +0000365#ifdef __cplusplus
366extern "C" {
367#endif
368
Guido van Rossumd8225182000-03-10 22:33:05 +0000369/* --- Unicode Type ------------------------------------------------------- */
370
371typedef struct {
372 PyObject_HEAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 Py_ssize_t length; /* Length of raw Unicode data in buffer */
374 Py_UNICODE *str; /* Raw Unicode buffer */
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000375 Py_hash_t hash; /* Hash value; -1 if not set */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int state; /* != 0 if interned. In this case the two
377 * references from the dictionary to this object
378 * are *not* counted in ob_refcnt. */
379 PyObject *defenc; /* (Default) Encoded version as Python
380 string, or NULL; this is used for
381 implementing the buffer protocol */
Guido van Rossumd8225182000-03-10 22:33:05 +0000382} PyUnicodeObject;
383
Mark Hammond91a681d2002-08-12 07:21:58 +0000384PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Christian Heimesa22e8bd2007-11-29 22:35:39 +0000385PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Guido van Rossumd8225182000-03-10 22:33:05 +0000386
Walter Dörwald16807132007-05-25 13:52:07 +0000387#define SSTATE_NOT_INTERNED 0
388#define SSTATE_INTERNED_MORTAL 1
389#define SSTATE_INTERNED_IMMORTAL 2
390
Thomas Wouters27d517b2007-02-25 20:39:11 +0000391#define PyUnicode_Check(op) \
Christian Heimes90aa7642007-12-19 02:45:37 +0000392 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
393#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
Guido van Rossumd8225182000-03-10 22:33:05 +0000394
395/* Fast access macros */
396#define PyUnicode_GET_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
Guido van Rossumd8225182000-03-10 22:33:05 +0000398#define PyUnicode_GET_DATA_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000400#define PyUnicode_AS_UNICODE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
Guido van Rossumd8225182000-03-10 22:33:05 +0000402#define PyUnicode_AS_DATA(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
Guido van Rossumd8225182000-03-10 22:33:05 +0000404
405/* --- Constants ---------------------------------------------------------- */
406
407/* This Unicode character will be used as replacement character during
408 decoding if the errors argument is set to "replace". Note: the
409 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
410 Unicode 3.0. */
411
412#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
413
414/* === Public API ========================================================= */
415
416/* --- Plain Py_UNICODE --------------------------------------------------- */
417
418/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 size.
Marc-André Lemburg8155e0e2001-04-23 14:44:21 +0000420
421 u may be NULL which causes the contents to be undefined. It is the
422 user's responsibility to fill in the needed data afterwards. Note
423 that modifying the Unicode object contents after construction is
424 only allowed if u was set to NULL.
Guido van Rossumd8225182000-03-10 22:33:05 +0000425
426 The buffer is copied into the new object. */
427
Mark Hammond91a681d2002-08-12 07:21:58 +0000428PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000429 const Py_UNICODE *u, /* Unicode buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000430 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000431 );
432
Georg Brandl952867a2010-06-27 10:17:12 +0000433/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
Walter Dörwaldd2034312007-05-18 16:29:38 +0000434PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
435 const char *u, /* char buffer */
436 Py_ssize_t size /* size of buffer */
437 );
438
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000439/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Georg Brandl952867a2010-06-27 10:17:12 +0000440 UTF-8 encoded bytes */
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000441PyAPI_FUNC(PyObject*) PyUnicode_FromString(
442 const char *u /* string */
443 );
444
Guido van Rossumd8225182000-03-10 22:33:05 +0000445/* Return a read-only pointer to the Unicode object's internal
446 Py_UNICODE buffer. */
447
Mark Hammond91a681d2002-08-12 07:21:58 +0000448PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000450 );
451
452/* Get the length of the Unicode object. */
453
Martin v. Löwis18e16552006-02-15 17:27:45 +0000454PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000456 );
457
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000458/* Get the maximum ordinal for a Unicode character. */
Mark Hammond91a681d2002-08-12 07:21:58 +0000459PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000460
Guido van Rossum52c23592000-04-10 13:41:41 +0000461/* Resize an already allocated Unicode object to the new size length.
462
463 *unicode is modified to point to the new (resized) object and 0
464 returned on success.
465
466 This API may only be called by the function which also called the
467 Unicode constructor. The refcount on the object must be 1. Otherwise,
468 an error is returned.
469
470 Error handling is implemented as follows: an exception is set, -1
471 is returned and *unicode left untouched.
472
473*/
474
Mark Hammond91a681d2002-08-12 07:21:58 +0000475PyAPI_FUNC(int) PyUnicode_Resize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 PyObject **unicode, /* Pointer to the Unicode object */
477 Py_ssize_t length /* New length */
Guido van Rossum52c23592000-04-10 13:41:41 +0000478 );
479
Guido van Rossumd8225182000-03-10 22:33:05 +0000480/* Coerce obj to an Unicode object and return a reference with
481 *incremented* refcount.
482
483 Coercion is done in the following way:
484
Georg Brandl952867a2010-06-27 10:17:12 +0000485 1. bytes, bytearray and other char buffer compatible objects are decoded
Fred Drakecb093fe2000-05-09 19:51:53 +0000486 under the assumptions that they contain data using the current
487 default encoding. Decoding is done in "strict" mode.
Guido van Rossumd8225182000-03-10 22:33:05 +0000488
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000489 2. All other objects (including Unicode objects) raise an
490 exception.
Guido van Rossumd8225182000-03-10 22:33:05 +0000491
492 The API returns NULL in case of an error. The caller is responsible
493 for decref'ing the returned objects.
494
495*/
496
Mark Hammond91a681d2002-08-12 07:21:58 +0000497PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 register PyObject *obj, /* Object */
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000499 const char *encoding, /* encoding */
500 const char *errors /* error handling */
501 );
502
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000503/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000504 *incremented* refcount.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000506 Unicode objects are passed back as-is (subclasses are converted to
507 true Unicode objects), all other objects are delegated to
508 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
Georg Brandl952867a2010-06-27 10:17:12 +0000509 using UTF-8 encoding as basis for decoding the object.
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000510
511 The API returns NULL in case of an error. The caller is responsible
512 for decref'ing the returned objects.
513
514*/
515
Mark Hammond91a681d2002-08-12 07:21:58 +0000516PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 register PyObject *obj /* Object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000518 );
519
Victor Stinner1205f272010-09-11 00:54:47 +0000520PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
521 const char *format, /* ASCII-encoded string */
522 va_list vargs
523 );
524PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
525 const char *format, /* ASCII-encoded string */
526 ...
527 );
Walter Dörwaldd2034312007-05-18 16:29:38 +0000528
Eric Smith4a7d76d2008-05-30 18:10:19 +0000529/* Format the object based on the format_spec, as defined in PEP 3101
530 (Advanced String Formatting). */
531PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
532 Py_UNICODE *format_spec,
533 Py_ssize_t format_spec_len);
534
Walter Dörwald16807132007-05-25 13:52:07 +0000535PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
536PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
537PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(const char *);
538PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
539
540/* Use only if you know it's a string */
541#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
542
Guido van Rossumd8225182000-03-10 22:33:05 +0000543/* --- wchar_t support for platforms which support it --------------------- */
544
545#ifdef HAVE_WCHAR_H
546
Georg Brandl952867a2010-06-27 10:17:12 +0000547/* Create a Unicode Object from the wchar_t buffer w of the given
Guido van Rossumd8225182000-03-10 22:33:05 +0000548 size.
549
550 The buffer is copied into the new object. */
551
Mark Hammond91a681d2002-08-12 07:21:58 +0000552PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000553 register const wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000555 );
556
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000557/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossumd8225182000-03-10 22:33:05 +0000558 most size wchar_t characters are copied.
559
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000560 Note that the resulting wchar_t string may or may not be
561 0-terminated. It is the responsibility of the caller to make sure
562 that the wchar_t string is 0-terminated in case this is required by
563 the application.
564
565 Returns the number of wchar_t characters copied (excluding a
566 possibly trailing 0-termination character) or -1 in case of an
Guido van Rossumd8225182000-03-10 22:33:05 +0000567 error. */
568
Martin v. Löwis18e16552006-02-15 17:27:45 +0000569PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000570 PyUnicodeObject *unicode, /* Unicode object */
571 register wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000572 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000573 );
574
Victor Stinner137c34c2010-09-29 10:25:54 +0000575/* Convert the Unicode object to a wide character string. The output string
576 always ends with a nul character. If size is not NULL, write the number of
577 wide characters (including the nul character) into *size.
578
579 Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
580 on success. On error, returns NULL, *size is undefined and raises a
581 MemoryError. */
582
583PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Victor Stinnerbeb4135b2010-10-07 01:02:42 +0000584 PyObject *unicode, /* Unicode object */
Victor Stinner137c34c2010-09-29 10:25:54 +0000585 Py_ssize_t *size /* number of characters of the result */
586 );
587
Guido van Rossumd8225182000-03-10 22:33:05 +0000588#endif
589
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000590/* --- Unicode ordinals --------------------------------------------------- */
591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592/* Create a Unicode Object from the given Unicode code point ordinal.
593
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000594 The ordinal must be in range(0x10000) on narrow Python builds
595 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
596 raised in case it is not.
597
598*/
599
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000600PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000601
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000602/* --- Free-list management ----------------------------------------------- */
603
604/* Clear the free list used by the Unicode implementation.
605
606 This can be used to release memory used for objects on the free
607 list back to the Python memory allocator.
608
609*/
610
611PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613/* === Builtin Codecs =====================================================
Guido van Rossumd8225182000-03-10 22:33:05 +0000614
615 Many of these APIs take two arguments encoding and errors. These
616 parameters encoding and errors have the same semantics as the ones
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 of the builtin unicode() API.
Guido van Rossumd8225182000-03-10 22:33:05 +0000618
Georg Brandl952867a2010-06-27 10:17:12 +0000619 Setting encoding to NULL causes the default encoding (UTF-8) to be used.
Guido van Rossumd8225182000-03-10 22:33:05 +0000620
621 Error handling is set by errors which may also be set to NULL
622 meaning to use the default handling defined for the codec. Default
623 error handling for all builtin codecs is "strict" (ValueErrors are
624 raised).
625
626 The codecs all use a similar interface. Only deviation from the
627 generic ones are documented.
628
629*/
630
Fred Drakecb093fe2000-05-09 19:51:53 +0000631/* --- Manage the default encoding ---------------------------------------- */
632
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000633/* Return a Python string holding the default encoded value of the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 Unicode object.
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000635
636 The resulting string is cached in the Unicode object for subsequent
637 usage by this function. The cached version is needed to implement
638 the character buffer interface and will live (at least) as long as
639 the Unicode object itself.
640
641 The refcount of the string is *not* incremented.
642
643 *** Exported for internal use by the interpreter only !!! ***
644
645*/
646
Mark Hammond91a681d2002-08-12 07:21:58 +0000647PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000648 PyObject *unicode,
649 const char *errors);
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000650
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000651/* Returns a pointer to the default encoding (normally, UTF-8) of the
652 Unicode object unicode and the size of the encoded representation
653 in bytes stored in *size.
Christian Heimes5894ba72007-11-04 11:43:14 +0000654
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000655 In case of an error, no *size is set.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000656
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000657 *** This API is for interpreter INTERNAL USE ONLY and will likely
658 *** be removed or changed for Python 3.1.
659
660 *** If you need to access the Unicode object as UTF-8 bytes string,
661 *** please use PyUnicode_AsUTF8String() instead.
662
Martin v. Löwis5b222132007-06-10 09:51:05 +0000663*/
664
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000665PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 PyObject *unicode,
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000667 Py_ssize_t *size);
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000668
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000669/* Returns a pointer to the default encoding (normally, UTf-8) of the
670 Unicode object unicode.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000671
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000672 Use of this API is DEPRECATED since no size information can be
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000673 extracted from the returned data.
674
675 *** This API is for interpreter INTERNAL USE ONLY and will likely
676 *** be removed or changed for Python 3.1.
677
678 *** If you need to access the Unicode object as UTF-8 bytes string,
679 *** please use PyUnicode_AsUTF8String() instead.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000680
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000681*/
Martin v. Löwis5b222132007-06-10 09:51:05 +0000682
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000683PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
Martin v. Löwis5b222132007-06-10 09:51:05 +0000684
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000685/* Returns the currently active default encoding.
Fred Drakecb093fe2000-05-09 19:51:53 +0000686
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000687 The default encoding is currently implemented as run-time settable
688 process global. This may change in future versions of the
689 interpreter to become a parameter which is managed on a per-thread
690 basis.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691
Fred Drakecb093fe2000-05-09 19:51:53 +0000692 */
693
Mark Hammond91a681d2002-08-12 07:21:58 +0000694PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drakecb093fe2000-05-09 19:51:53 +0000695
Guido van Rossumd8225182000-03-10 22:33:05 +0000696/* --- Generic Codecs ----------------------------------------------------- */
697
698/* Create a Unicode object by decoding the encoded string s of the
699 given size. */
700
Mark Hammond91a681d2002-08-12 07:21:58 +0000701PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000702 const char *s, /* encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703 Py_ssize_t size, /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000704 const char *encoding, /* encoding */
705 const char *errors /* error handling */
706 );
707
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000708/* Decode a Unicode object unicode and return the result as Python
709 object. */
710
711PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 PyObject *unicode, /* Unicode object */
713 const char *encoding, /* encoding */
714 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000715 );
716
717/* Decode a Unicode object unicode and return the result as Unicode
718 object. */
719
720PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 PyObject *unicode, /* Unicode object */
722 const char *encoding, /* encoding */
723 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000724 );
725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726/* Encodes a Py_UNICODE buffer of the given size and returns a
Guido van Rossumd8225182000-03-10 22:33:05 +0000727 Python string object. */
728
Mark Hammond91a681d2002-08-12 07:21:58 +0000729PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000730 const Py_UNICODE *s, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000731 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000732 const char *encoding, /* encoding */
733 const char *errors /* error handling */
734 );
735
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000736/* Encodes a Unicode object and returns the result as Python
737 object. */
738
739PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 PyObject *unicode, /* Unicode object */
741 const char *encoding, /* encoding */
742 const char *errors /* error handling */
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000743 );
744
Guido van Rossumd8225182000-03-10 22:33:05 +0000745/* Encodes a Unicode object and returns the result as Python string
746 object. */
747
Mark Hammond91a681d2002-08-12 07:21:58 +0000748PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 PyObject *unicode, /* Unicode object */
750 const char *encoding, /* encoding */
751 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000752 );
753
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000754/* Encodes a Unicode object and returns the result as Unicode
755 object. */
756
757PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 PyObject *unicode, /* Unicode object */
759 const char *encoding, /* encoding */
760 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000761 );
762
763/* Build an encoding map. */
764
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000765PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
766 PyObject* string /* 256 character map */
767 );
768
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000769/* --- UTF-7 Codecs ------------------------------------------------------- */
770
Mark Hammond91a681d2002-08-12 07:21:58 +0000771PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 const char *string, /* UTF-7 encoded string */
773 Py_ssize_t length, /* size of string */
774 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000775 );
776
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000777PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 const char *string, /* UTF-7 encoded string */
779 Py_ssize_t length, /* size of string */
780 const char *errors, /* error handling */
781 Py_ssize_t *consumed /* bytes consumed */
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000782 );
783
Mark Hammond91a681d2002-08-12 07:21:58 +0000784PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 const Py_UNICODE *data, /* Unicode char buffer */
786 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
787 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
788 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
789 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000790 );
791
Guido van Rossumd8225182000-03-10 22:33:05 +0000792/* --- UTF-8 Codecs ------------------------------------------------------- */
793
Mark Hammond91a681d2002-08-12 07:21:58 +0000794PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 const char *string, /* UTF-8 encoded string */
796 Py_ssize_t length, /* size of string */
797 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000798 );
799
Walter Dörwald69652032004-09-07 20:24:22 +0000800PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 const char *string, /* UTF-8 encoded string */
802 Py_ssize_t length, /* size of string */
803 const char *errors, /* error handling */
804 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000805 );
806
Mark Hammond91a681d2002-08-12 07:21:58 +0000807PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000809 );
810
Mark Hammond91a681d2002-08-12 07:21:58 +0000811PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 const Py_UNICODE *data, /* Unicode char buffer */
813 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
814 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000815 );
816
Walter Dörwald41980ca2007-08-16 21:55:45 +0000817/* --- UTF-32 Codecs ------------------------------------------------------ */
818
819/* Decodes length bytes from a UTF-32 encoded buffer string and returns
820 the corresponding Unicode object.
821
822 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 to "strict".
Walter Dörwald41980ca2007-08-16 21:55:45 +0000824
825 If byteorder is non-NULL, the decoder starts decoding using the
826 given byte order:
827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 *byteorder == -1: little endian
829 *byteorder == 0: native order
830 *byteorder == 1: big endian
Walter Dörwald41980ca2007-08-16 21:55:45 +0000831
832 In native mode, the first four bytes of the stream are checked for a
833 BOM mark. If found, the BOM mark is analysed, the byte order
834 adjusted and the BOM skipped. In the other modes, no BOM mark
835 interpretation is done. After completion, *byteorder is set to the
836 current byte order at the end of input data.
837
838 If byteorder is NULL, the codec starts in native order mode.
839
840*/
841
842PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 const char *string, /* UTF-32 encoded string */
844 Py_ssize_t length, /* size of string */
845 const char *errors, /* error handling */
846 int *byteorder /* pointer to byteorder to use
847 0=native;-1=LE,1=BE; updated on
848 exit */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000849 );
850
851PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 const char *string, /* UTF-32 encoded string */
853 Py_ssize_t length, /* size of string */
854 const char *errors, /* error handling */
855 int *byteorder, /* pointer to byteorder to use
856 0=native;-1=LE,1=BE; updated on
857 exit */
858 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000859 );
860
861/* Returns a Python string using the UTF-32 encoding in native byte
862 order. The string always starts with a BOM mark. */
863
864PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 PyObject *unicode /* Unicode object */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000866 );
867
868/* Returns a Python string object holding the UTF-32 encoded value of
869 the Unicode data.
870
871 If byteorder is not 0, output is written according to the following
872 byte order:
873
874 byteorder == -1: little endian
875 byteorder == 0: native byte order (writes a BOM mark)
876 byteorder == 1: big endian
877
878 If byteorder is 0, the output string will always start with the
879 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
880 prepended.
881
882*/
883
884PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 const Py_UNICODE *data, /* Unicode char buffer */
886 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
887 const char *errors, /* error handling */
888 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000889 );
890
Guido van Rossumd8225182000-03-10 22:33:05 +0000891/* --- UTF-16 Codecs ------------------------------------------------------ */
892
Guido van Rossum9e896b32000-04-05 20:11:21 +0000893/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossumd8225182000-03-10 22:33:05 +0000894 the corresponding Unicode object.
895
896 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 to "strict".
Guido van Rossumd8225182000-03-10 22:33:05 +0000898
899 If byteorder is non-NULL, the decoder starts decoding using the
900 given byte order:
901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 *byteorder == -1: little endian
903 *byteorder == 0: native order
904 *byteorder == 1: big endian
Guido van Rossumd8225182000-03-10 22:33:05 +0000905
Marc-André Lemburg489b56e2001-05-21 20:30:15 +0000906 In native mode, the first two bytes of the stream are checked for a
907 BOM mark. If found, the BOM mark is analysed, the byte order
908 adjusted and the BOM skipped. In the other modes, no BOM mark
909 interpretation is done. After completion, *byteorder is set to the
910 current byte order at the end of input data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000911
912 If byteorder is NULL, the codec starts in native order mode.
913
914*/
915
Mark Hammond91a681d2002-08-12 07:21:58 +0000916PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 const char *string, /* UTF-16 encoded string */
918 Py_ssize_t length, /* size of string */
919 const char *errors, /* error handling */
920 int *byteorder /* pointer to byteorder to use
921 0=native;-1=LE,1=BE; updated on
922 exit */
Guido van Rossumd8225182000-03-10 22:33:05 +0000923 );
924
Walter Dörwald69652032004-09-07 20:24:22 +0000925PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 const char *string, /* UTF-16 encoded string */
927 Py_ssize_t length, /* size of string */
928 const char *errors, /* error handling */
929 int *byteorder, /* pointer to byteorder to use
930 0=native;-1=LE,1=BE; updated on
931 exit */
932 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000933 );
934
Guido van Rossumd8225182000-03-10 22:33:05 +0000935/* Returns a Python string using the UTF-16 encoding in native byte
936 order. The string always starts with a BOM mark. */
937
Mark Hammond91a681d2002-08-12 07:21:58 +0000938PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000940 );
941
942/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum9e896b32000-04-05 20:11:21 +0000943 the Unicode data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000944
945 If byteorder is not 0, output is written according to the following
946 byte order:
947
948 byteorder == -1: little endian
949 byteorder == 0: native byte order (writes a BOM mark)
950 byteorder == 1: big endian
951
952 If byteorder is 0, the output string will always start with the
953 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
954 prepended.
955
956 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
957 UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters7e474022000-07-16 12:04:32 +0000958 at a later point without compromising the APIs.
Guido van Rossumd8225182000-03-10 22:33:05 +0000959
960*/
961
Mark Hammond91a681d2002-08-12 07:21:58 +0000962PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 const Py_UNICODE *data, /* Unicode char buffer */
964 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
965 const char *errors, /* error handling */
966 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Guido van Rossumd8225182000-03-10 22:33:05 +0000967 );
968
969/* --- Unicode-Escape Codecs ---------------------------------------------- */
970
Mark Hammond91a681d2002-08-12 07:21:58 +0000971PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 const char *string, /* Unicode-Escape encoded string */
973 Py_ssize_t length, /* size of string */
974 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000975 );
976
Mark Hammond91a681d2002-08-12 07:21:58 +0000977PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000979 );
980
Mark Hammond91a681d2002-08-12 07:21:58 +0000981PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 const Py_UNICODE *data, /* Unicode char buffer */
983 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000984 );
985
986/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
987
Mark Hammond91a681d2002-08-12 07:21:58 +0000988PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 const char *string, /* Raw-Unicode-Escape encoded string */
990 Py_ssize_t length, /* size of string */
991 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000992 );
993
Mark Hammond91a681d2002-08-12 07:21:58 +0000994PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000996 );
997
Mark Hammond91a681d2002-08-12 07:21:58 +0000998PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 const Py_UNICODE *data, /* Unicode char buffer */
1000 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +00001001 );
1002
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001003/* --- Unicode Internal Codec ---------------------------------------------
1004
1005 Only for internal use in _codecsmodule.c */
1006
1007PyObject *_PyUnicode_DecodeUnicodeInternal(
1008 const char *string,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001009 Py_ssize_t length,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001010 const char *errors
1011 );
1012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013/* --- Latin-1 Codecs -----------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001014
1015 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1016
1017*/
1018
Mark Hammond91a681d2002-08-12 07:21:58 +00001019PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 const char *string, /* Latin-1 encoded string */
1021 Py_ssize_t length, /* size of string */
1022 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001023 );
1024
Mark Hammond91a681d2002-08-12 07:21:58 +00001025PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001027 );
1028
Mark Hammond91a681d2002-08-12 07:21:58 +00001029PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 const Py_UNICODE *data, /* Unicode char buffer */
1031 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1032 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001033 );
1034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035/* --- ASCII Codecs -------------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001036
1037 Only 7-bit ASCII data is excepted. All other codes generate errors.
1038
1039*/
1040
Mark Hammond91a681d2002-08-12 07:21:58 +00001041PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 const char *string, /* ASCII encoded string */
1043 Py_ssize_t length, /* size of string */
1044 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001045 );
1046
Mark Hammond91a681d2002-08-12 07:21:58 +00001047PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001049 );
1050
Mark Hammond91a681d2002-08-12 07:21:58 +00001051PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 const Py_UNICODE *data, /* Unicode char buffer */
1053 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1054 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001055 );
1056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057/* --- Character Map Codecs -----------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 This codec uses mappings to encode and decode characters.
Guido van Rossumd8225182000-03-10 22:33:05 +00001060
1061 Decoding mappings must map single string characters to single
1062 Unicode characters, integers (which are then interpreted as Unicode
1063 ordinals) or None (meaning "undefined mapping" and causing an
1064 error).
1065
1066 Encoding mappings must map single Unicode characters to single
1067 string characters, integers (which are then interpreted as Latin-1
1068 ordinals) or None (meaning "undefined mapping" and causing an
1069 error).
1070
1071 If a character lookup fails with a LookupError, the character is
1072 copied as-is meaning that its ordinal value will be interpreted as
1073 Unicode or Latin-1 ordinal resp. Because of this mappings only need
1074 to contain those mappings which map characters to different code
1075 points.
1076
1077*/
1078
Mark Hammond91a681d2002-08-12 07:21:58 +00001079PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 const char *string, /* Encoded string */
1081 Py_ssize_t length, /* size of string */
1082 PyObject *mapping, /* character mapping
1083 (char ordinal -> unicode ordinal) */
1084 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001085 );
1086
Mark Hammond91a681d2002-08-12 07:21:58 +00001087PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 PyObject *unicode, /* Unicode object */
1089 PyObject *mapping /* character mapping
1090 (unicode ordinal -> char ordinal) */
Guido van Rossumd8225182000-03-10 22:33:05 +00001091 );
1092
Mark Hammond91a681d2002-08-12 07:21:58 +00001093PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 const Py_UNICODE *data, /* Unicode char buffer */
1095 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1096 PyObject *mapping, /* character mapping
1097 (unicode ordinal -> char ordinal) */
1098 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001099 );
1100
1101/* Translate a Py_UNICODE buffer of the given length by applying a
1102 character mapping table to it and return the resulting Unicode
1103 object.
1104
1105 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001107
1108 Mapping tables may be dictionaries or sequences. Unmapped character
1109 ordinals (ones which cause a LookupError) are left untouched and
1110 are copied as-is.
1111
1112*/
1113
Mark Hammond91a681d2002-08-12 07:21:58 +00001114PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 const Py_UNICODE *data, /* Unicode char buffer */
1116 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1117 PyObject *table, /* Translate table */
1118 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001119 );
1120
Guido van Rossumefec1152000-03-28 02:01:15 +00001121#ifdef MS_WIN32
Guido van Rossum24bdb042000-03-28 20:29:59 +00001122
Guido van Rossumefec1152000-03-28 02:01:15 +00001123/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001124
Mark Hammond91a681d2002-08-12 07:21:58 +00001125PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001126 const char *string, /* MBCS encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001127 Py_ssize_t length, /* size of string */
Guido van Rossumefec1152000-03-28 02:01:15 +00001128 const char *errors /* error handling */
1129 );
1130
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001131PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1132 const char *string, /* MBCS encoded string */
1133 Py_ssize_t length, /* size of string */
1134 const char *errors, /* error handling */
1135 Py_ssize_t *consumed /* bytes consumed */
1136 );
1137
Mark Hammond91a681d2002-08-12 07:21:58 +00001138PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossumefec1152000-03-28 02:01:15 +00001139 PyObject *unicode /* Unicode object */
1140 );
1141
Mark Hammond91a681d2002-08-12 07:21:58 +00001142PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001143 const Py_UNICODE *data, /* Unicode char buffer */
Neal Norwitzd78f6cf2007-08-08 04:49:37 +00001144 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumefec1152000-03-28 02:01:15 +00001145 const char *errors /* error handling */
1146 );
1147
Guido van Rossumefec1152000-03-28 02:01:15 +00001148#endif /* MS_WIN32 */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001149
Guido van Rossum9e896b32000-04-05 20:11:21 +00001150/* --- Decimal Encoder ---------------------------------------------------- */
1151
1152/* Takes a Unicode string holding a decimal value and writes it into
1153 an output buffer using standard ASCII digit codes.
1154
1155 The output buffer has to provide at least length+1 bytes of storage
1156 area. The output string is 0-terminated.
1157
1158 The encoder converts whitespace to ' ', decimal characters to their
1159 corresponding ASCII digit and all other Latin-1 characters except
1160 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1161 are treated as errors. This includes embedded NULL bytes.
1162
1163 Error handling is defined by the errors argument:
1164
1165 NULL or "strict": raise a ValueError
1166 "ignore": ignore the wrong characters (these are not copied to the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 output buffer)
Guido van Rossum9e896b32000-04-05 20:11:21 +00001168 "replace": replaces illegal characters with '?'
1169
1170 Returns 0 on success, -1 on failure.
1171
1172*/
1173
Mark Hammond91a681d2002-08-12 07:21:58 +00001174PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 Py_UNICODE *s, /* Unicode buffer */
1176 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1177 char *output, /* Output buffer; must have size >= length */
1178 const char *errors /* error handling */
Guido van Rossum9e896b32000-04-05 20:11:21 +00001179 );
1180
Martin v. Löwis011e8422009-05-05 04:43:17 +00001181/* --- File system encoding ---------------------------------------------- */
1182
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001183/* ParseTuple converter: encode str objects to bytes using
1184 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
Martin v. Löwis011e8422009-05-05 04:43:17 +00001185
1186PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1187
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001188/* ParseTuple converter: decode bytes objects to unicode using
1189 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1190
1191PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1192
Victor Stinner77c38622010-05-14 15:58:55 +00001193/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1194 and the "surrogateescape" error handler.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001195
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001196 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1197 encoding.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001198
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001199 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001200*/
1201
1202PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1203 const char *s /* encoded string */
1204 );
1205
Victor Stinner77c38622010-05-14 15:58:55 +00001206/* Decode a string using Py_FileSystemDefaultEncoding
1207 and the "surrogateescape" error handler.
1208
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001209 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1210 encoding.
Victor Stinner77c38622010-05-14 15:58:55 +00001211*/
1212
Martin v. Löwis011e8422009-05-05 04:43:17 +00001213PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1214 const char *s, /* encoded string */
1215 Py_ssize_t size /* size */
1216 );
1217
Victor Stinnerae6265f2010-05-15 16:27:27 +00001218/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001219 "surrogateescape" error handler, and return bytes.
Victor Stinnerae6265f2010-05-15 16:27:27 +00001220
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001221 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1222 encoding.
Victor Stinnerae6265f2010-05-15 16:27:27 +00001223*/
1224
1225PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1226 PyObject *unicode
1227 );
1228
Guido van Rossumd8225182000-03-10 22:33:05 +00001229/* --- Methods & Slots ----------------------------------------------------
1230
1231 These are capable of handling Unicode objects and strings on input
1232 (we refer to them as strings in the descriptions) and return
1233 Unicode objects or integers as apporpriate. */
1234
1235/* Concat two strings giving a new Unicode string. */
1236
Mark Hammond91a681d2002-08-12 07:21:58 +00001237PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 PyObject *left, /* Left string */
1239 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001240 );
1241
Walter Dörwald1ab83302007-05-18 17:15:44 +00001242/* Concat two strings and put the result in *pleft
1243 (sets *pleft to NULL on error) */
1244
1245PyAPI_FUNC(void) PyUnicode_Append(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 PyObject **pleft, /* Pointer to left string */
1247 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001248 );
1249
1250/* Concat two strings, put the result in *pleft and drop the right object
1251 (sets *pleft to NULL on error) */
1252
1253PyAPI_FUNC(void) PyUnicode_AppendAndDel(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 PyObject **pleft, /* Pointer to left string */
1255 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001256 );
1257
Guido van Rossumd8225182000-03-10 22:33:05 +00001258/* Split a string giving a list of Unicode strings.
1259
1260 If sep is NULL, splitting will be done at all whitespace
1261 substrings. Otherwise, splits occur at the given separator.
1262
1263 At most maxsplit splits will be done. If negative, no limit is set.
1264
1265 Separators are not included in the resulting list.
1266
1267*/
1268
Mark Hammond91a681d2002-08-12 07:21:58 +00001269PyAPI_FUNC(PyObject*) PyUnicode_Split(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 PyObject *s, /* String to split */
1271 PyObject *sep, /* String separator */
1272 Py_ssize_t maxsplit /* Maxsplit count */
1273 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001274
1275/* Dito, but split at line breaks.
1276
1277 CRLF is considered to be one line break. Line breaks are not
1278 included in the resulting list. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279
Mark Hammond91a681d2002-08-12 07:21:58 +00001280PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 PyObject *s, /* String to split */
1282 int keepends /* If true, line end markers are included */
1283 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001284
Thomas Wouters477c8d52006-05-27 19:21:47 +00001285/* Partition a string using a given separator. */
1286
1287PyAPI_FUNC(PyObject*) PyUnicode_Partition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 PyObject *s, /* String to partition */
1289 PyObject *sep /* String separator */
1290 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001291
1292/* Partition a string using a given separator, searching from the end of the
1293 string. */
1294
1295PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 PyObject *s, /* String to partition */
1297 PyObject *sep /* String separator */
1298 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001299
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001300/* Split a string giving a list of Unicode strings.
1301
1302 If sep is NULL, splitting will be done at all whitespace
1303 substrings. Otherwise, splits occur at the given separator.
1304
1305 At most maxsplit splits will be done. But unlike PyUnicode_Split
1306 PyUnicode_RSplit splits from the end of the string. If negative,
1307 no limit is set.
1308
1309 Separators are not included in the resulting list.
1310
1311*/
1312
1313PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 PyObject *s, /* String to split */
1315 PyObject *sep, /* String separator */
1316 Py_ssize_t maxsplit /* Maxsplit count */
1317 );
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001318
Guido van Rossumd8225182000-03-10 22:33:05 +00001319/* Translate a string by applying a character mapping table to it and
1320 return the resulting Unicode object.
1321
1322 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001324
1325 Mapping tables may be dictionaries or sequences. Unmapped character
1326 ordinals (ones which cause a LookupError) are left untouched and
1327 are copied as-is.
1328
1329*/
1330
Mark Hammond91a681d2002-08-12 07:21:58 +00001331PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 PyObject *str, /* String */
1333 PyObject *table, /* Translate table */
1334 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001335 );
1336
1337/* Join a sequence of strings using the given separator and return
1338 the resulting Unicode string. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339
Mark Hammond91a681d2002-08-12 07:21:58 +00001340PyAPI_FUNC(PyObject*) PyUnicode_Join(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 PyObject *separator, /* Separator string */
1342 PyObject *seq /* Sequence object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001343 );
1344
1345/* Return 1 if substr matches str[start:end] at the given tail end, 0
1346 otherwise. */
1347
Martin v. Löwis18e16552006-02-15 17:27:45 +00001348PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 PyObject *str, /* String */
1350 PyObject *substr, /* Prefix or Suffix string */
1351 Py_ssize_t start, /* Start index */
1352 Py_ssize_t end, /* Stop index */
1353 int direction /* Tail end: -1 prefix, +1 suffix */
Guido van Rossumd8225182000-03-10 22:33:05 +00001354 );
1355
1356/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg4da6fd62002-05-29 11:33:13 +00001357 given search direction or -1 if not found. -2 is returned in case
1358 an error occurred and an exception is set. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001359
Martin v. Löwis18e16552006-02-15 17:27:45 +00001360PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 PyObject *str, /* String */
1362 PyObject *substr, /* Substring to find */
1363 Py_ssize_t start, /* Start index */
1364 Py_ssize_t end, /* Stop index */
1365 int direction /* Find direction: +1 forward, -1 backward */
Guido van Rossumd8225182000-03-10 22:33:05 +00001366 );
1367
Barry Warsaw51ac5802000-03-20 16:36:48 +00001368/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001369
Martin v. Löwis18e16552006-02-15 17:27:45 +00001370PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 PyObject *str, /* String */
1372 PyObject *substr, /* Substring to count */
1373 Py_ssize_t start, /* Start index */
1374 Py_ssize_t end /* Stop index */
Guido van Rossumd8225182000-03-10 22:33:05 +00001375 );
1376
Barry Warsaw51ac5802000-03-20 16:36:48 +00001377/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +00001378 and return the resulting Unicode object. */
1379
Mark Hammond91a681d2002-08-12 07:21:58 +00001380PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 PyObject *str, /* String */
1382 PyObject *substr, /* Substring to find */
1383 PyObject *replstr, /* Substring to replace */
1384 Py_ssize_t maxcount /* Max. number of replacements to apply;
1385 -1 = all */
Guido van Rossumd8225182000-03-10 22:33:05 +00001386 );
1387
1388/* Compare two strings and return -1, 0, 1 for less than, equal,
1389 greater than resp. */
1390
Mark Hammond91a681d2002-08-12 07:21:58 +00001391PyAPI_FUNC(int) PyUnicode_Compare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 PyObject *left, /* Left string */
1393 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001394 );
1395
Martin v. Löwis5b222132007-06-10 09:51:05 +00001396PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
1397 PyObject *left,
1398 const char *right
1399 );
1400
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001401/* Rich compare two strings and return one of the following:
1402
1403 - NULL in case an exception was raised
1404 - Py_True or Py_False for successfuly comparisons
1405 - Py_NotImplemented in case the type combination is unknown
1406
1407 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1408 case the conversion of the arguments to Unicode fails with a
1409 UnicodeDecodeError.
1410
1411 Possible values for op:
1412
1413 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1414
1415*/
1416
1417PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 PyObject *left, /* Left string */
1419 PyObject *right, /* Right string */
1420 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001421 );
1422
Thomas Wouters7e474022000-07-16 12:04:32 +00001423/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossumd8225182000-03-10 22:33:05 +00001424 the resulting Unicode string. */
1425
Mark Hammond91a681d2002-08-12 07:21:58 +00001426PyAPI_FUNC(PyObject *) PyUnicode_Format(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 PyObject *format, /* Format string */
1428 PyObject *args /* Argument tuple or dictionary */
Guido van Rossumd8225182000-03-10 22:33:05 +00001429 );
1430
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001431/* Checks whether element is contained in container and return 1/0
1432 accordingly.
1433
1434 element has to coerce to an one element Unicode string. -1 is
1435 returned in case of an error. */
1436
Mark Hammond91a681d2002-08-12 07:21:58 +00001437PyAPI_FUNC(int) PyUnicode_Contains(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 PyObject *container, /* Container string */
1439 PyObject *element /* Element string */
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001440 );
1441
Martin v. Löwis47383402007-08-15 07:32:56 +00001442/* Checks whether argument is a valid identifier. */
1443
1444PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1445
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001446/* Externally visible for str.strip(unicode) */
Mark Hammond91a681d2002-08-12 07:21:58 +00001447PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001448 PyUnicodeObject *self,
1449 int striptype,
1450 PyObject *sepobj
1451 );
1452
Eric Smith5807c412008-05-11 21:00:57 +00001453/* Using the current locale, insert the thousands grouping
1454 into the string pointed to by buffer. For the argument descriptions,
1455 see Objects/stringlib/localeutil.h */
1456
Eric Smith0923d1d2009-04-16 20:16:10 +00001457PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
1458 Py_ssize_t n_buffer,
1459 Py_UNICODE *digits,
1460 Py_ssize_t n_digits,
1461 Py_ssize_t min_width);
Eric Smith5807c412008-05-11 21:00:57 +00001462
Eric Smitha3b1ac82009-04-03 14:45:06 +00001463/* Using explicit passed-in values, insert the thousands grouping
1464 into the string pointed to by buffer. For the argument descriptions,
1465 see Objects/stringlib/localeutil.h */
Eric Smith0923d1d2009-04-16 20:16:10 +00001466PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
1467 Py_ssize_t n_buffer,
1468 Py_UNICODE *digits,
1469 Py_ssize_t n_digits,
1470 Py_ssize_t min_width,
1471 const char *grouping,
1472 const char *thousands_sep);
Guido van Rossumd8225182000-03-10 22:33:05 +00001473/* === Characters Type APIs =============================================== */
1474
Benjamin Peterson960cf0f2009-01-09 04:11:44 +00001475/* Helper array used by Py_UNICODE_ISSPACE(). */
1476
1477PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1478
Guido van Rossumd8225182000-03-10 22:33:05 +00001479/* These should not be used directly. Use the Py_UNICODE_IS* and
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 Py_UNICODE_TO* macros instead.
Guido van Rossumd8225182000-03-10 22:33:05 +00001481
1482 These APIs are implemented in Objects/unicodectype.c.
1483
1484*/
1485
Mark Hammond91a681d2002-08-12 07:21:58 +00001486PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001487 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001488 );
1489
Mark Hammond91a681d2002-08-12 07:21:58 +00001490PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001491 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001492 );
1493
Mark Hammond91a681d2002-08-12 07:21:58 +00001494PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001495 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001496 );
1497
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001498PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001499 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001500 );
1501
1502PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001503 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001504 );
1505
Mark Hammond91a681d2002-08-12 07:21:58 +00001506PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001507 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001508 );
1509
Mark Hammond91a681d2002-08-12 07:21:58 +00001510PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001511 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001512 );
1513
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001514PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1515 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001516 );
1517
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001518PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1519 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001520 );
1521
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001522PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1523 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001524 );
1525
Mark Hammond91a681d2002-08-12 07:21:58 +00001526PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001527 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001528 );
1529
Mark Hammond91a681d2002-08-12 07:21:58 +00001530PyAPI_FUNC(int) _PyUnicode_ToDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001531 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001532 );
1533
Mark Hammond91a681d2002-08-12 07:21:58 +00001534PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001535 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001536 );
1537
Mark Hammond91a681d2002-08-12 07:21:58 +00001538PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001539 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001540 );
1541
Mark Hammond91a681d2002-08-12 07:21:58 +00001542PyAPI_FUNC(int) _PyUnicode_IsDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001543 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001544 );
1545
Mark Hammond91a681d2002-08-12 07:21:58 +00001546PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001547 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001548 );
1549
Georg Brandl559e5d72008-06-11 18:37:52 +00001550PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001551 Py_UCS4 ch /* Unicode character */
Georg Brandl559e5d72008-06-11 18:37:52 +00001552 );
1553
Mark Hammond91a681d2002-08-12 07:21:58 +00001554PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001555 Py_UCS4 ch /* Unicode character */
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001556 );
1557
Victor Stinneref8d95c2010-08-16 22:03:11 +00001558PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1559 const Py_UNICODE *u
1560 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001561
1562PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001563 Py_UNICODE *s1,
1564 const Py_UNICODE *s2);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001565
Victor Stinnerc4eb7652010-09-01 23:43:50 +00001566PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1567 Py_UNICODE *s1, const Py_UNICODE *s2);
1568
Martin v. Löwis5b222132007-06-10 09:51:05 +00001569PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001570 Py_UNICODE *s1,
1571 const Py_UNICODE *s2,
1572 size_t n);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001573
1574PyAPI_FUNC(int) Py_UNICODE_strcmp(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001575 const Py_UNICODE *s1,
1576 const Py_UNICODE *s2
1577 );
1578
1579PyAPI_FUNC(int) Py_UNICODE_strncmp(
1580 const Py_UNICODE *s1,
1581 const Py_UNICODE *s2,
1582 size_t n
1583 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001584
1585PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001586 const Py_UNICODE *s,
1587 Py_UNICODE c
Martin v. Löwis5b222132007-06-10 09:51:05 +00001588 );
1589
Victor Stinner331ea922010-08-10 16:37:20 +00001590PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001591 const Py_UNICODE *s,
1592 Py_UNICODE c
Victor Stinner331ea922010-08-10 16:37:20 +00001593 );
1594
Victor Stinner71133ff2010-09-01 23:43:53 +00001595/* Create a copy of a unicode string ending with a nul character. Return NULL
1596 and raise a MemoryError exception on memory allocation failure, otherwise
1597 return a new allocated buffer (use PyMem_Free() to free the buffer). */
1598
Victor Stinner46408602010-09-03 16:18:00 +00001599PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
Victor Stinner71133ff2010-09-01 23:43:53 +00001600 PyObject *unicode
1601 );
1602
Guido van Rossumd8225182000-03-10 22:33:05 +00001603#ifdef __cplusplus
1604}
1605#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001606#endif /* !Py_UNICODEOBJECT_H */