blob: 50efe72c1c70fb42bea6ad5c47dce3cbcf2a2ca8 [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
Christian Heimesaf98da12008-01-27 15:18:18 +00004#include <stdarg.h>
5
Guido van Rossumd8225182000-03-10 22:33:05 +00006/*
7
8Unicode implementation based on original code by Fredrik Lundh,
9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
Alexander Belopolsky83283c22010-11-16 14:29:01 +000010Unicode Integration Proposal. (See
11http://www.egenix.com/files/python/unicode-proposal.txt).
Guido van Rossumd8225182000-03-10 22:33:05 +000012
Guido van Rossum16b1ad92000-08-03 16:24:25 +000013Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumd8225182000-03-10 22:33:05 +000014
15
16 Original header:
17 --------------------------------------------------------------------
18
19 * Yet another Unicode string type for Python. This type supports the
20 * 16-bit Basic Multilingual Plane (BMP) only.
21 *
22 * Written by Fredrik Lundh, January 1999.
23 *
24 * Copyright (c) 1999 by Secret Labs AB.
25 * Copyright (c) 1999 by Fredrik Lundh.
26 *
27 * fredrik@pythonware.com
28 * http://www.pythonware.com
29 *
30 * --------------------------------------------------------------------
31 * This Unicode String Type is
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000032 *
Guido van Rossumd8225182000-03-10 22:33:05 +000033 * Copyright (c) 1999 by Secret Labs AB
34 * Copyright (c) 1999 by Fredrik Lundh
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000035 *
Guido van Rossumd8225182000-03-10 22:33:05 +000036 * By obtaining, using, and/or copying this software and/or its
37 * associated documentation, you agree that you have read, understood,
38 * and will comply with the following terms and conditions:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 *
Guido van Rossumd8225182000-03-10 22:33:05 +000040 * Permission to use, copy, modify, and distribute this software and its
41 * associated documentation for any purpose and without fee is hereby
42 * granted, provided that the above copyright notice appears in all
43 * copies, and that both that copyright notice and this permission notice
44 * appear in supporting documentation, and that the name of Secret Labs
45 * AB or the author not be used in advertising or publicity pertaining to
46 * distribution of the software without specific, written prior
47 * permission.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 *
Guido van Rossumd8225182000-03-10 22:33:05 +000049 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56 * -------------------------------------------------------------------- */
57
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +000058#include <ctype.h>
Guido van Rossumd8225182000-03-10 22:33:05 +000059
60/* === Internal API ======================================================= */
61
62/* --- Internal Unicode Format -------------------------------------------- */
63
Christian Heimes0625e892008-01-07 21:04:21 +000064/* Python 3.x requires unicode */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065#define Py_USING_UNICODE
Christian Heimes0625e892008-01-07 21:04:21 +000066
Fredrik Lundh9b14ab32001-06-26 22:59:49 +000067/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
68 properly set, but the default rules below doesn't set it. I'll
69 sort this out some other day -- fredrik@pythonware.com */
70
71#ifndef Py_UNICODE_SIZE
72#error Must define Py_UNICODE_SIZE
73#endif
74
Fredrik Lundh8f455852001-06-27 18:59:43 +000075/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
76 strings are stored as UCS-2 (with limited support for UTF-16) */
77
78#if Py_UNICODE_SIZE >= 4
79#define Py_UNICODE_WIDE
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000080#endif
Fredrik Lundh1294ad02001-06-26 17:17:07 +000081
Amaury Forgeot d'Arcfeb73072010-09-12 22:42:57 +000082/* Set these flags if the platform has "wchar.h" and the
Guido van Rossumd8225182000-03-10 22:33:05 +000083 wchar_t type is a 16-bit unsigned type */
84/* #define HAVE_WCHAR_H */
85/* #define HAVE_USABLE_WCHAR_T */
86
87/* Defaults for various platforms */
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000088#ifndef PY_UNICODE_TYPE
Guido van Rossumd8225182000-03-10 22:33:05 +000089
Fredrik Lundh1294ad02001-06-26 17:17:07 +000090/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh8f455852001-06-27 18:59:43 +000091# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossumd8225182000-03-10 22:33:05 +000092# define HAVE_USABLE_WCHAR_T
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000093# define PY_UNICODE_TYPE wchar_t
94# endif
95
Fredrik Lundh8f455852001-06-27 18:59:43 +000096# if defined(Py_UNICODE_WIDE)
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000097# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossumd8225182000-03-10 22:33:05 +000098# endif
99
100#endif
101
102/* If the compiler provides a wchar_t type we try to support it
Victor Stinner137c34c2010-09-29 10:25:54 +0000103 through the interface functions PyUnicode_FromWideChar(),
104 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
Guido van Rossumd8225182000-03-10 22:33:05 +0000105
106#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg1a731c62000-08-11 11:43:10 +0000107# ifndef HAVE_WCHAR_H
108# define HAVE_WCHAR_H
109# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000110#endif
111
Victor Stinner99b95382011-07-04 14:23:54 +0200112#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
113# define HAVE_MBCS
114#endif
115
Guido van Rossumd8225182000-03-10 22:33:05 +0000116#ifdef HAVE_WCHAR_H
Guido van Rossum24bdb042000-03-28 20:29:59 +0000117/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
118# ifdef _HAVE_BSDI
119# include <time.h>
120# endif
Marc-André Lemburg5e6007c2001-09-19 11:21:03 +0000121# include <wchar.h>
Guido van Rossumd8225182000-03-10 22:33:05 +0000122#endif
123
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000124/*
125 * Use this typedef when you need to represent a UTF-16 surrogate pair
126 * as single unsigned integer.
127 */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128#if SIZEOF_INT >= 4
129typedef unsigned int Py_UCS4;
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000130#elif SIZEOF_LONG >= 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131typedef unsigned long Py_UCS4;
Guido van Rossumd8225182000-03-10 22:33:05 +0000132#endif
133
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000134/* Py_UNICODE is the native Unicode storage format (code unit) used by
135 Python and represents a single Unicode element in the Unicode
136 type. */
137
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000138#ifndef Py_LIMITED_API
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000139typedef PY_UNICODE_TYPE Py_UNICODE;
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000140#endif
Marc-André Lemburg43279102000-07-07 09:01:41 +0000141
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000142/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
143
144/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
145 produce different external names and thus cause import errors in
146 case Python interpreters and extensions with mixed compiled in
147 Unicode width assumptions are combined. */
148
149#ifndef Py_UNICODE_WIDE
150
151# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
152# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000153# define PyUnicode_AsDecodedObject PyUnicodeUCS2_AsDecodedObject
154# define PyUnicode_AsDecodedUnicode PyUnicodeUCS2_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000155# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000156# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000157# define PyUnicode_AsEncodedUnicode PyUnicodeUCS2_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000158# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
159# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000160# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000161# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
162# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
163# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
164# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
165# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000166# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000167# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000168# define PyUnicode_Compare PyUnicodeUCS2_Compare
Victor Stinner09f24bb2010-10-24 20:38:25 +0000169# define PyUnicode_CompareWithASCIIString PyUnicodeUCS2_CompareWithASCIIString
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000170# define PyUnicode_Concat PyUnicodeUCS2_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000171# define PyUnicode_Append PyUnicodeUCS2_Append
172# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000173# define PyUnicode_Contains PyUnicodeUCS2_Contains
174# define PyUnicode_Count PyUnicodeUCS2_Count
175# define PyUnicode_Decode PyUnicodeUCS2_Decode
176# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
177# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
178# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000179# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000180# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000181# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000182# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
183# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000184# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000185# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000186# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000187# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000188# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
189# define PyUnicode_Encode PyUnicodeUCS2_Encode
190# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
191# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
192# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
193# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
194# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000195# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000196# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
197# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
198# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
199# define PyUnicode_Find PyUnicodeUCS2_Find
200# define PyUnicode_Format PyUnicodeUCS2_Format
201# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000202# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
203# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000204# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000205# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000206# define PyUnicode_FromString PyUnicodeUCS2_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000207# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000208# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
Walter Dörwald14176a52007-05-18 17:04:42 +0000209# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000210# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000211# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000212# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
213# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
214# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000215# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000216# define PyUnicode_Join PyUnicodeUCS2_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000217# define PyUnicode_Partition PyUnicodeUCS2_Partition
218# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
219# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000220# define PyUnicode_Replace PyUnicodeUCS2_Replace
221# define PyUnicode_Resize PyUnicodeUCS2_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000222# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000223# define PyUnicode_Split PyUnicodeUCS2_Split
224# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
225# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
226# define PyUnicode_Translate PyUnicodeUCS2_Translate
227# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
228# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
229# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
230# define _PyUnicode_Init _PyUnicodeUCS2_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000231# define PyUnicode_strdup PyUnicodeUCS2_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000232
233#else
234
235# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
236# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000237# define PyUnicode_AsDecodedObject PyUnicodeUCS4_AsDecodedObject
238# define PyUnicode_AsDecodedUnicode PyUnicodeUCS4_AsDecodedUnicode
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000239# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000240# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000241# define PyUnicode_AsEncodedUnicode PyUnicodeUCS4_AsEncodedUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000242# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
243# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
Walter Dörwald41980ca2007-08-16 21:55:45 +0000244# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000245# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
246# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
247# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
248# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
249# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
Victor Stinner137c34c2010-09-29 10:25:54 +0000250# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000251# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000252# define PyUnicode_Compare PyUnicodeUCS4_Compare
Victor Stinner09f24bb2010-10-24 20:38:25 +0000253# define PyUnicode_CompareWithASCIIString PyUnicodeUCS4_CompareWithASCIIString
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000254# define PyUnicode_Concat PyUnicodeUCS4_Concat
Walter Dörwald1ab83302007-05-18 17:15:44 +0000255# define PyUnicode_Append PyUnicodeUCS4_Append
256# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000257# define PyUnicode_Contains PyUnicodeUCS4_Contains
258# define PyUnicode_Count PyUnicodeUCS4_Count
259# define PyUnicode_Decode PyUnicodeUCS4_Decode
260# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
261# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
262# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000263# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
Christian Heimes5894ba72007-11-04 11:43:14 +0000264# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000265# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000266# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
267# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000268# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
Walter Dörwald69652032004-09-07 20:24:22 +0000269# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000270# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
Walter Dörwald69652032004-09-07 20:24:22 +0000271# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000272# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
273# define PyUnicode_Encode PyUnicodeUCS4_Encode
274# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
275# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
276# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
277# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
278# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
Walter Dörwald41980ca2007-08-16 21:55:45 +0000279# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000280# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
281# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
282# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
283# define PyUnicode_Find PyUnicodeUCS4_Find
284# define PyUnicode_Format PyUnicodeUCS4_Format
285# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000286# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
287# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000288# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000289# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000290# define PyUnicode_FromString PyUnicodeUCS4_FromString
Walter Dörwaldd2034312007-05-18 16:29:38 +0000291# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
Alexandre Vassalotti15fafbe2008-12-28 02:13:22 +0000292# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000293# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
Martin v. Löwis011e8422009-05-05 04:43:17 +0000294# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
Victor Stinner47fcb5b2010-08-13 23:59:58 +0000295# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000296# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
297# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
298# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
Martin v. Löwis47383402007-08-15 07:32:56 +0000299# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000300# define PyUnicode_Join PyUnicodeUCS4_Join
Thomas Wouters477c8d52006-05-27 19:21:47 +0000301# define PyUnicode_Partition PyUnicodeUCS4_Partition
302# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
303# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000304# define PyUnicode_Replace PyUnicodeUCS4_Replace
305# define PyUnicode_Resize PyUnicodeUCS4_Resize
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000306# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000307# define PyUnicode_Split PyUnicodeUCS4_Split
308# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
309# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
310# define PyUnicode_Translate PyUnicodeUCS4_Translate
311# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
312# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
313# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
314# define _PyUnicode_Init _PyUnicodeUCS4_Init
Victor Stinner71133ff2010-09-01 23:43:53 +0000315# define PyUnicode_strdup PyUnicodeUCS4_strdup
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000316
317#endif
318
Guido van Rossumd8225182000-03-10 22:33:05 +0000319/* --- Internal Unicode Operations ---------------------------------------- */
320
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000321/* Since splitting on whitespace is an important use case, and
322 whitespace in most situations is solely ASCII whitespace, we
323 optimize for the common case by using a quick look-up table
324 _Py_ascii_whitespace (see below) with an inlined check.
Christian Heimes190d79e2008-01-30 11:58:22 +0000325
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000326 */
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000327#ifndef Py_LIMITED_API
Christian Heimes190d79e2008-01-30 11:58:22 +0000328#define Py_UNICODE_ISSPACE(ch) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000329 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
Guido van Rossumd8225182000-03-10 22:33:05 +0000330
331#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
332#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
333#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
334#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
335
336#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
337#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
338#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
339
340#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
341#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
342#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
Georg Brandl559e5d72008-06-11 18:37:52 +0000343#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000344
345#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
346#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
347#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
348
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000349#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000350
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000351#define Py_UNICODE_ISALNUM(ch) \
352 (Py_UNICODE_ISALPHA(ch) || \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 Py_UNICODE_ISDECIMAL(ch) || \
354 Py_UNICODE_ISDIGIT(ch) || \
355 Py_UNICODE_ISNUMERIC(ch))
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357#define Py_UNICODE_COPY(target, source, length) \
358 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossumd8225182000-03-10 22:33:05 +0000359
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000360#define Py_UNICODE_FILL(target, value, length) \
361 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
Thomas Wouters477c8d52006-05-27 19:21:47 +0000363 } while (0)
Guido van Rossumd8225182000-03-10 22:33:05 +0000364
Ezio Melotti8c9375b2011-08-22 20:03:25 +0300365/* macros to work with surrogates */
366#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
367#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
368#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
369/* Join two surrogate characters and return a single Py_UCS4 value. */
370#define Py_UNICODE_JOIN_SURROGATES(high, low) \
371 (((((Py_UCS4)(high) & 0x03FF) << 10) | \
372 ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
373
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000374/* Check if substring matches at given offset. The offset must be
375 valid, and the substring must not be empty. */
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000376
Thomas Wouters477c8d52006-05-27 19:21:47 +0000377#define Py_UNICODE_MATCH(string, offset, substring) \
378 ((*((string)->str + (offset)) == *((substring)->str)) && \
379 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
380 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000381#endif /* Py_LIMITED_API */
Guido van Rossumd8225182000-03-10 22:33:05 +0000382
Barry Warsaw51ac5802000-03-20 16:36:48 +0000383#ifdef __cplusplus
384extern "C" {
385#endif
386
Guido van Rossumd8225182000-03-10 22:33:05 +0000387/* --- Unicode Type ------------------------------------------------------- */
388
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000389#ifndef Py_LIMITED_API
Guido van Rossumd8225182000-03-10 22:33:05 +0000390typedef struct {
391 PyObject_HEAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 Py_ssize_t length; /* Length of raw Unicode data in buffer */
393 Py_UNICODE *str; /* Raw Unicode buffer */
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000394 Py_hash_t hash; /* Hash value; -1 if not set */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 int state; /* != 0 if interned. In this case the two
396 * references from the dictionary to this object
397 * are *not* counted in ob_refcnt. */
398 PyObject *defenc; /* (Default) Encoded version as Python
399 string, or NULL; this is used for
400 implementing the buffer protocol */
Guido van Rossumd8225182000-03-10 22:33:05 +0000401} PyUnicodeObject;
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000402#endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000403
Mark Hammond91a681d2002-08-12 07:21:58 +0000404PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Christian Heimesa22e8bd2007-11-29 22:35:39 +0000405PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Guido van Rossumd8225182000-03-10 22:33:05 +0000406
Walter Dörwald16807132007-05-25 13:52:07 +0000407#define SSTATE_NOT_INTERNED 0
408#define SSTATE_INTERNED_MORTAL 1
409#define SSTATE_INTERNED_IMMORTAL 2
410
Thomas Wouters27d517b2007-02-25 20:39:11 +0000411#define PyUnicode_Check(op) \
Christian Heimes90aa7642007-12-19 02:45:37 +0000412 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
413#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
Guido van Rossumd8225182000-03-10 22:33:05 +0000414
415/* Fast access macros */
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000416#ifndef Py_LIMITED_API
Guido van Rossumd8225182000-03-10 22:33:05 +0000417#define PyUnicode_GET_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
Guido van Rossumd8225182000-03-10 22:33:05 +0000419#define PyUnicode_GET_DATA_SIZE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
Guido van Rossumd8225182000-03-10 22:33:05 +0000421#define PyUnicode_AS_UNICODE(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
Guido van Rossumd8225182000-03-10 22:33:05 +0000423#define PyUnicode_AS_DATA(op) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000425#endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000426
427/* --- Constants ---------------------------------------------------------- */
428
429/* This Unicode character will be used as replacement character during
430 decoding if the errors argument is set to "replace". Note: the
431 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
432 Unicode 3.0. */
433
434#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
435
436/* === Public API ========================================================= */
437
438/* --- Plain Py_UNICODE --------------------------------------------------- */
439
440/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 size.
Marc-André Lemburg8155e0e2001-04-23 14:44:21 +0000442
443 u may be NULL which causes the contents to be undefined. It is the
444 user's responsibility to fill in the needed data afterwards. Note
445 that modifying the Unicode object contents after construction is
446 only allowed if u was set to NULL.
Guido van Rossumd8225182000-03-10 22:33:05 +0000447
448 The buffer is copied into the new object. */
449
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000450#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +0000451PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000452 const Py_UNICODE *u, /* Unicode buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000453 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000454 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000455#endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000456
Georg Brandl952867a2010-06-27 10:17:12 +0000457/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
Walter Dörwaldd2034312007-05-18 16:29:38 +0000458PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
Victor Stinner0d711162010-12-27 02:39:20 +0000459 const char *u, /* UTF-8 encoded string */
Victor Stinnerdc2081f2010-12-27 01:49:29 +0000460 Py_ssize_t size /* size of buffer */
Walter Dörwaldd2034312007-05-18 16:29:38 +0000461 );
462
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000463/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Georg Brandl952867a2010-06-27 10:17:12 +0000464 UTF-8 encoded bytes */
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000465PyAPI_FUNC(PyObject*) PyUnicode_FromString(
Victor Stinnerdc2081f2010-12-27 01:49:29 +0000466 const char *u /* UTF-8 encoded string */
Walter Dörwaldacaa5a12007-05-05 12:00:46 +0000467 );
468
Guido van Rossumd8225182000-03-10 22:33:05 +0000469/* Return a read-only pointer to the Unicode object's internal
470 Py_UNICODE buffer. */
471
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000472#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +0000473PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000475 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000476#endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000477
478/* Get the length of the Unicode object. */
479
Martin v. Löwis18e16552006-02-15 17:27:45 +0000480PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000482 );
483
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000484#ifndef Py_LIMITED_API
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000485/* Get the maximum ordinal for a Unicode character. */
Mark Hammond91a681d2002-08-12 07:21:58 +0000486PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000487#endif
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000488
Guido van Rossum52c23592000-04-10 13:41:41 +0000489/* Resize an already allocated Unicode object to the new size length.
490
491 *unicode is modified to point to the new (resized) object and 0
492 returned on success.
493
494 This API may only be called by the function which also called the
495 Unicode constructor. The refcount on the object must be 1. Otherwise,
496 an error is returned.
497
498 Error handling is implemented as follows: an exception is set, -1
499 is returned and *unicode left untouched.
500
501*/
502
Mark Hammond91a681d2002-08-12 07:21:58 +0000503PyAPI_FUNC(int) PyUnicode_Resize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 PyObject **unicode, /* Pointer to the Unicode object */
505 Py_ssize_t length /* New length */
Guido van Rossum52c23592000-04-10 13:41:41 +0000506 );
507
Guido van Rossumd8225182000-03-10 22:33:05 +0000508/* Coerce obj to an Unicode object and return a reference with
509 *incremented* refcount.
510
511 Coercion is done in the following way:
512
Georg Brandl952867a2010-06-27 10:17:12 +0000513 1. bytes, bytearray and other char buffer compatible objects are decoded
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000514 under the assumptions that they contain data using the UTF-8
515 encoding. Decoding is done in "strict" mode.
Guido van Rossumd8225182000-03-10 22:33:05 +0000516
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000517 2. All other objects (including Unicode objects) raise an
518 exception.
Guido van Rossumd8225182000-03-10 22:33:05 +0000519
520 The API returns NULL in case of an error. The caller is responsible
521 for decref'ing the returned objects.
522
523*/
524
Mark Hammond91a681d2002-08-12 07:21:58 +0000525PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 register PyObject *obj, /* Object */
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000527 const char *encoding, /* encoding */
528 const char *errors /* error handling */
529 );
530
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000531/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000532 *incremented* refcount.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533
Guido van Rossumb8c65bc2001-10-19 02:01:31 +0000534 Unicode objects are passed back as-is (subclasses are converted to
535 true Unicode objects), all other objects are delegated to
536 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
Georg Brandl952867a2010-06-27 10:17:12 +0000537 using UTF-8 encoding as basis for decoding the object.
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000538
539 The API returns NULL in case of an error. The caller is responsible
540 for decref'ing the returned objects.
541
542*/
543
Mark Hammond91a681d2002-08-12 07:21:58 +0000544PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 register PyObject *obj /* Object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000546 );
547
Victor Stinner1205f272010-09-11 00:54:47 +0000548PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
549 const char *format, /* ASCII-encoded string */
550 va_list vargs
551 );
552PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
553 const char *format, /* ASCII-encoded string */
554 ...
555 );
Walter Dörwaldd2034312007-05-18 16:29:38 +0000556
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000557#ifndef Py_LIMITED_API
Eric Smith4a7d76d2008-05-30 18:10:19 +0000558/* Format the object based on the format_spec, as defined in PEP 3101
559 (Advanced String Formatting). */
560PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
561 Py_UNICODE *format_spec,
562 Py_ssize_t format_spec_len);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000563#endif
Eric Smith4a7d76d2008-05-30 18:10:19 +0000564
Walter Dörwald16807132007-05-25 13:52:07 +0000565PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
566PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
Victor Stinnerdc2081f2010-12-27 01:49:29 +0000567PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
568 const char *u /* UTF-8 encoded string */
569 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000570#ifndef Py_LIMITED_API
Walter Dörwald16807132007-05-25 13:52:07 +0000571PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000572#endif
Walter Dörwald16807132007-05-25 13:52:07 +0000573
574/* Use only if you know it's a string */
575#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
576
Guido van Rossumd8225182000-03-10 22:33:05 +0000577/* --- wchar_t support for platforms which support it --------------------- */
578
579#ifdef HAVE_WCHAR_H
580
Georg Brandl952867a2010-06-27 10:17:12 +0000581/* Create a Unicode Object from the wchar_t buffer w of the given
Guido van Rossumd8225182000-03-10 22:33:05 +0000582 size.
583
584 The buffer is copied into the new object. */
585
Mark Hammond91a681d2002-08-12 07:21:58 +0000586PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossumd8225182000-03-10 22:33:05 +0000587 register const wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000588 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000589 );
590
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000591/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossumd8225182000-03-10 22:33:05 +0000592 most size wchar_t characters are copied.
593
Marc-André Lemburga9cadcd2004-11-22 13:02:31 +0000594 Note that the resulting wchar_t string may or may not be
595 0-terminated. It is the responsibility of the caller to make sure
596 that the wchar_t string is 0-terminated in case this is required by
597 the application.
598
599 Returns the number of wchar_t characters copied (excluding a
600 possibly trailing 0-termination character) or -1 in case of an
Guido van Rossumd8225182000-03-10 22:33:05 +0000601 error. */
602
Martin v. Löwis18e16552006-02-15 17:27:45 +0000603PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000604 PyObject *unicode, /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000605 register wchar_t *w, /* wchar_t buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000606 Py_ssize_t size /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000607 );
608
Victor Stinner137c34c2010-09-29 10:25:54 +0000609/* Convert the Unicode object to a wide character string. The output string
610 always ends with a nul character. If size is not NULL, write the number of
Victor Stinnerd88d9832011-09-06 02:00:05 +0200611 wide characters (excluding the null character) into *size.
Victor Stinner137c34c2010-09-29 10:25:54 +0000612
613 Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
614 on success. On error, returns NULL, *size is undefined and raises a
615 MemoryError. */
616
617PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Victor Stinnerbeb4135b2010-10-07 01:02:42 +0000618 PyObject *unicode, /* Unicode object */
Victor Stinner137c34c2010-09-29 10:25:54 +0000619 Py_ssize_t *size /* number of characters of the result */
620 );
621
Guido van Rossumd8225182000-03-10 22:33:05 +0000622#endif
623
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000624/* --- Unicode ordinals --------------------------------------------------- */
625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626/* Create a Unicode Object from the given Unicode code point ordinal.
627
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000628 The ordinal must be in range(0x10000) on narrow Python builds
629 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
630 raised in case it is not.
631
632*/
633
Marc-André Lemburg9c329de2002-08-12 08:19:10 +0000634PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburgcc8764c2002-08-11 12:23:04 +0000635
Benjamin Peterson960cf0f2009-01-09 04:11:44 +0000636/* --- Free-list management ----------------------------------------------- */
637
638/* Clear the free list used by the Unicode implementation.
639
640 This can be used to release memory used for objects on the free
641 list back to the Python memory allocator.
642
643*/
644
645PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647/* === Builtin Codecs =====================================================
Guido van Rossumd8225182000-03-10 22:33:05 +0000648
649 Many of these APIs take two arguments encoding and errors. These
650 parameters encoding and errors have the same semantics as the ones
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000651 of the builtin str() API.
Guido van Rossumd8225182000-03-10 22:33:05 +0000652
Georg Brandl952867a2010-06-27 10:17:12 +0000653 Setting encoding to NULL causes the default encoding (UTF-8) to be used.
Guido van Rossumd8225182000-03-10 22:33:05 +0000654
655 Error handling is set by errors which may also be set to NULL
656 meaning to use the default handling defined for the codec. Default
657 error handling for all builtin codecs is "strict" (ValueErrors are
658 raised).
659
660 The codecs all use a similar interface. Only deviation from the
661 generic ones are documented.
662
663*/
664
Fred Drakecb093fe2000-05-09 19:51:53 +0000665/* --- Manage the default encoding ---------------------------------------- */
666
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000667/* Return a Python string holding the default encoded value of the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 Unicode object.
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000669
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000670 Same as PyUnicode_AsUTF8String() except
671 the resulting string is cached in the Unicode object for subsequent
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000672 usage by this function. The cached version is needed to implement
673 the character buffer interface and will live (at least) as long as
674 the Unicode object itself.
675
676 The refcount of the string is *not* incremented.
677
678 *** Exported for internal use by the interpreter only !!! ***
679
680*/
681
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000682#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +0000683PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
Victor Stinnerf3fd7332011-03-02 01:03:11 +0000684 PyObject *unicode);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000685#endif
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000686
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000687/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000688 Unicode object unicode and the size of the encoded representation
689 in bytes stored in *size.
Christian Heimes5894ba72007-11-04 11:43:14 +0000690
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000691 In case of an error, no *size is set.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000692
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000693 *** This API is for interpreter INTERNAL USE ONLY and will likely
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000694 *** be removed or changed in the future.
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000695
696 *** If you need to access the Unicode object as UTF-8 bytes string,
697 *** please use PyUnicode_AsUTF8String() instead.
698
Martin v. Löwis5b222132007-06-10 09:51:05 +0000699*/
700
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000701#ifndef Py_LIMITED_API
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000702PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 PyObject *unicode,
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000704 Py_ssize_t *size);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000705#endif
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000706
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000707/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000708 Unicode object unicode.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000709
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000710 Use of this API is DEPRECATED since no size information can be
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000711 extracted from the returned data.
712
713 *** This API is for interpreter INTERNAL USE ONLY and will likely
714 *** be removed or changed for Python 3.1.
715
716 *** If you need to access the Unicode object as UTF-8 bytes string,
717 *** please use PyUnicode_AsUTF8String() instead.
Guido van Rossum7d1df6c2007-08-29 13:53:23 +0000718
Marc-André Lemburg9155aa72008-04-29 11:14:08 +0000719*/
Martin v. Löwis5b222132007-06-10 09:51:05 +0000720
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000721#ifndef Py_LIMITED_API
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000722PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000723#endif
Martin v. Löwis5b222132007-06-10 09:51:05 +0000724
Alexander Belopolsky83283c22010-11-16 14:29:01 +0000725/* Returns "utf-8". */
Fred Drakecb093fe2000-05-09 19:51:53 +0000726
Mark Hammond91a681d2002-08-12 07:21:58 +0000727PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drakecb093fe2000-05-09 19:51:53 +0000728
Guido van Rossumd8225182000-03-10 22:33:05 +0000729/* --- Generic Codecs ----------------------------------------------------- */
730
731/* Create a Unicode object by decoding the encoded string s of the
732 given size. */
733
Mark Hammond91a681d2002-08-12 07:21:58 +0000734PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000735 const char *s, /* encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 Py_ssize_t size, /* size of buffer */
Guido van Rossumd8225182000-03-10 22:33:05 +0000737 const char *encoding, /* encoding */
738 const char *errors /* error handling */
739 );
740
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000741/* Decode a Unicode object unicode and return the result as Python
742 object. */
743
744PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000745 PyObject *unicode, /* Unicode object */
746 const char *encoding, /* encoding */
747 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000748 );
749
750/* Decode a Unicode object unicode and return the result as Unicode
751 object. */
752
753PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 PyObject *unicode, /* Unicode object */
755 const char *encoding, /* encoding */
756 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000757 );
758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000759/* Encodes a Py_UNICODE buffer of the given size and returns a
Guido van Rossumd8225182000-03-10 22:33:05 +0000760 Python string object. */
761
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000762#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +0000763PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossumd8225182000-03-10 22:33:05 +0000764 const Py_UNICODE *s, /* Unicode char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000765 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +0000766 const char *encoding, /* encoding */
767 const char *errors /* error handling */
768 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000769#endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000770
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000771/* Encodes a Unicode object and returns the result as Python
772 object. */
773
774PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 PyObject *unicode, /* Unicode object */
776 const char *encoding, /* encoding */
777 const char *errors /* error handling */
Marc-André Lemburgd2d45982004-07-08 17:57:32 +0000778 );
779
Guido van Rossumd8225182000-03-10 22:33:05 +0000780/* Encodes a Unicode object and returns the result as Python string
781 object. */
782
Mark Hammond91a681d2002-08-12 07:21:58 +0000783PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 PyObject *unicode, /* Unicode object */
785 const char *encoding, /* encoding */
786 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000787 );
788
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000789/* Encodes a Unicode object and returns the result as Unicode
790 object. */
791
792PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 PyObject *unicode, /* Unicode object */
794 const char *encoding, /* encoding */
795 const char *errors /* error handling */
Marc-André Lemburgb2750b52008-06-06 12:18:17 +0000796 );
797
798/* Build an encoding map. */
799
Thomas Wouters73e5a5b2006-06-08 15:35:45 +0000800PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
801 PyObject* string /* 256 character map */
802 );
803
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000804/* --- UTF-7 Codecs ------------------------------------------------------- */
805
Mark Hammond91a681d2002-08-12 07:21:58 +0000806PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 const char *string, /* UTF-7 encoded string */
808 Py_ssize_t length, /* size of string */
809 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000810 );
811
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000812PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 const char *string, /* UTF-7 encoded string */
814 Py_ssize_t length, /* size of string */
815 const char *errors, /* error handling */
816 Py_ssize_t *consumed /* bytes consumed */
Christian Heimes5d14c2b2007-11-20 23:38:09 +0000817 );
818
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000819#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +0000820PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 const Py_UNICODE *data, /* Unicode char buffer */
822 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
823 int base64SetO, /* Encode RFC2152 Set O characters in base64 */
824 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
825 const char *errors /* error handling */
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000826 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000827#endif
Marc-André Lemburgc60e6f72001-09-20 10:35:46 +0000828
Guido van Rossumd8225182000-03-10 22:33:05 +0000829/* --- UTF-8 Codecs ------------------------------------------------------- */
830
Mark Hammond91a681d2002-08-12 07:21:58 +0000831PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 const char *string, /* UTF-8 encoded string */
833 Py_ssize_t length, /* size of string */
834 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000835 );
836
Walter Dörwald69652032004-09-07 20:24:22 +0000837PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 const char *string, /* UTF-8 encoded string */
839 Py_ssize_t length, /* size of string */
840 const char *errors, /* error handling */
841 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000842 );
843
Mark Hammond91a681d2002-08-12 07:21:58 +0000844PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000846 );
847
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000848#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +0000849PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 const Py_UNICODE *data, /* Unicode char buffer */
851 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
852 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +0000853 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000854#endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000855
Walter Dörwald41980ca2007-08-16 21:55:45 +0000856/* --- UTF-32 Codecs ------------------------------------------------------ */
857
858/* Decodes length bytes from a UTF-32 encoded buffer string and returns
859 the corresponding Unicode object.
860
861 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 to "strict".
Walter Dörwald41980ca2007-08-16 21:55:45 +0000863
864 If byteorder is non-NULL, the decoder starts decoding using the
865 given byte order:
866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 *byteorder == -1: little endian
868 *byteorder == 0: native order
869 *byteorder == 1: big endian
Walter Dörwald41980ca2007-08-16 21:55:45 +0000870
871 In native mode, the first four bytes of the stream are checked for a
872 BOM mark. If found, the BOM mark is analysed, the byte order
873 adjusted and the BOM skipped. In the other modes, no BOM mark
874 interpretation is done. After completion, *byteorder is set to the
875 current byte order at the end of input data.
876
877 If byteorder is NULL, the codec starts in native order mode.
878
879*/
880
881PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 const char *string, /* UTF-32 encoded string */
883 Py_ssize_t length, /* size of string */
884 const char *errors, /* error handling */
885 int *byteorder /* pointer to byteorder to use
886 0=native;-1=LE,1=BE; updated on
887 exit */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000888 );
889
890PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 const char *string, /* UTF-32 encoded string */
892 Py_ssize_t length, /* size of string */
893 const char *errors, /* error handling */
894 int *byteorder, /* pointer to byteorder to use
895 0=native;-1=LE,1=BE; updated on
896 exit */
897 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000898 );
899
900/* Returns a Python string using the UTF-32 encoding in native byte
901 order. The string always starts with a BOM mark. */
902
903PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 PyObject *unicode /* Unicode object */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000905 );
906
907/* Returns a Python string object holding the UTF-32 encoded value of
908 the Unicode data.
909
910 If byteorder is not 0, output is written according to the following
911 byte order:
912
913 byteorder == -1: little endian
914 byteorder == 0: native byte order (writes a BOM mark)
915 byteorder == 1: big endian
916
917 If byteorder is 0, the output string will always start with the
918 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
919 prepended.
920
921*/
922
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000923#ifndef Py_LIMITED_API
Walter Dörwald41980ca2007-08-16 21:55:45 +0000924PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000925 const Py_UNICODE *data, /* Unicode char buffer */
926 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
927 const char *errors, /* error handling */
928 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Walter Dörwald41980ca2007-08-16 21:55:45 +0000929 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000930#endif
Walter Dörwald41980ca2007-08-16 21:55:45 +0000931
Guido van Rossumd8225182000-03-10 22:33:05 +0000932/* --- UTF-16 Codecs ------------------------------------------------------ */
933
Guido van Rossum9e896b32000-04-05 20:11:21 +0000934/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossumd8225182000-03-10 22:33:05 +0000935 the corresponding Unicode object.
936
937 errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 to "strict".
Guido van Rossumd8225182000-03-10 22:33:05 +0000939
940 If byteorder is non-NULL, the decoder starts decoding using the
941 given byte order:
942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 *byteorder == -1: little endian
944 *byteorder == 0: native order
945 *byteorder == 1: big endian
Guido van Rossumd8225182000-03-10 22:33:05 +0000946
Marc-André Lemburg489b56e2001-05-21 20:30:15 +0000947 In native mode, the first two bytes of the stream are checked for a
948 BOM mark. If found, the BOM mark is analysed, the byte order
949 adjusted and the BOM skipped. In the other modes, no BOM mark
950 interpretation is done. After completion, *byteorder is set to the
951 current byte order at the end of input data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000952
953 If byteorder is NULL, the codec starts in native order mode.
954
955*/
956
Mark Hammond91a681d2002-08-12 07:21:58 +0000957PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 const char *string, /* UTF-16 encoded string */
959 Py_ssize_t length, /* size of string */
960 const char *errors, /* error handling */
961 int *byteorder /* pointer to byteorder to use
962 0=native;-1=LE,1=BE; updated on
963 exit */
Guido van Rossumd8225182000-03-10 22:33:05 +0000964 );
965
Walter Dörwald69652032004-09-07 20:24:22 +0000966PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000967 const char *string, /* UTF-16 encoded string */
968 Py_ssize_t length, /* size of string */
969 const char *errors, /* error handling */
970 int *byteorder, /* pointer to byteorder to use
971 0=native;-1=LE,1=BE; updated on
972 exit */
973 Py_ssize_t *consumed /* bytes consumed */
Walter Dörwald69652032004-09-07 20:24:22 +0000974 );
975
Guido van Rossumd8225182000-03-10 22:33:05 +0000976/* Returns a Python string using the UTF-16 encoding in native byte
977 order. The string always starts with a BOM mark. */
978
Mark Hammond91a681d2002-08-12 07:21:58 +0000979PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +0000981 );
982
983/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum9e896b32000-04-05 20:11:21 +0000984 the Unicode data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000985
986 If byteorder is not 0, output is written according to the following
987 byte order:
988
989 byteorder == -1: little endian
990 byteorder == 0: native byte order (writes a BOM mark)
991 byteorder == 1: big endian
992
993 If byteorder is 0, the output string will always start with the
994 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
995 prepended.
996
997 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
998 UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters7e474022000-07-16 12:04:32 +0000999 at a later point without compromising the APIs.
Guido van Rossumd8225182000-03-10 22:33:05 +00001000
1001*/
1002
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001003#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001004PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 const Py_UNICODE *data, /* Unicode char buffer */
1006 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
1007 const char *errors, /* error handling */
1008 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Guido van Rossumd8225182000-03-10 22:33:05 +00001009 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001010#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001011
1012/* --- Unicode-Escape Codecs ---------------------------------------------- */
1013
Mark Hammond91a681d2002-08-12 07:21:58 +00001014PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 const char *string, /* Unicode-Escape encoded string */
1016 Py_ssize_t length, /* size of string */
1017 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001018 );
1019
Mark Hammond91a681d2002-08-12 07:21:58 +00001020PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001022 );
1023
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001024#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001025PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 const Py_UNICODE *data, /* Unicode char buffer */
1027 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +00001028 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001029#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001030
1031/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1032
Mark Hammond91a681d2002-08-12 07:21:58 +00001033PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 const char *string, /* Raw-Unicode-Escape encoded string */
1035 Py_ssize_t length, /* size of string */
1036 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001037 );
1038
Mark Hammond91a681d2002-08-12 07:21:58 +00001039PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001041 );
1042
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001043#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001044PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 const Py_UNICODE *data, /* Unicode char buffer */
1046 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossumd8225182000-03-10 22:33:05 +00001047 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001048#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001049
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001050/* --- Unicode Internal Codec ---------------------------------------------
1051
1052 Only for internal use in _codecsmodule.c */
1053
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001054#ifndef Py_LIMITED_API
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001055PyObject *_PyUnicode_DecodeUnicodeInternal(
1056 const char *string,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001057 Py_ssize_t length,
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001058 const char *errors
1059 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001060#endif
Walter Dörwalda47d1c02005-08-30 10:23:14 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062/* --- Latin-1 Codecs -----------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001063
1064 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1065
1066*/
1067
Mark Hammond91a681d2002-08-12 07:21:58 +00001068PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 const char *string, /* Latin-1 encoded string */
1070 Py_ssize_t length, /* size of string */
1071 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001072 );
1073
Mark Hammond91a681d2002-08-12 07:21:58 +00001074PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001076 );
1077
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001078#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001079PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 const Py_UNICODE *data, /* Unicode char buffer */
1081 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1082 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001083 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001084#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086/* --- ASCII Codecs -------------------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001087
1088 Only 7-bit ASCII data is excepted. All other codes generate errors.
1089
1090*/
1091
Mark Hammond91a681d2002-08-12 07:21:58 +00001092PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 const char *string, /* ASCII encoded string */
1094 Py_ssize_t length, /* size of string */
1095 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001096 );
1097
Mark Hammond91a681d2002-08-12 07:21:58 +00001098PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 PyObject *unicode /* Unicode object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001100 );
1101
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001102#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001103PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 const Py_UNICODE *data, /* Unicode char buffer */
1105 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1106 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001107 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001108#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110/* --- Character Map Codecs -----------------------------------------------
Guido van Rossumd8225182000-03-10 22:33:05 +00001111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 This codec uses mappings to encode and decode characters.
Guido van Rossumd8225182000-03-10 22:33:05 +00001113
1114 Decoding mappings must map single string characters to single
1115 Unicode characters, integers (which are then interpreted as Unicode
1116 ordinals) or None (meaning "undefined mapping" and causing an
1117 error).
1118
1119 Encoding mappings must map single Unicode characters to single
1120 string characters, integers (which are then interpreted as Latin-1
1121 ordinals) or None (meaning "undefined mapping" and causing an
1122 error).
1123
1124 If a character lookup fails with a LookupError, the character is
1125 copied as-is meaning that its ordinal value will be interpreted as
1126 Unicode or Latin-1 ordinal resp. Because of this mappings only need
1127 to contain those mappings which map characters to different code
1128 points.
1129
1130*/
1131
Mark Hammond91a681d2002-08-12 07:21:58 +00001132PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 const char *string, /* Encoded string */
1134 Py_ssize_t length, /* size of string */
1135 PyObject *mapping, /* character mapping
1136 (char ordinal -> unicode ordinal) */
1137 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001138 );
1139
Mark Hammond91a681d2002-08-12 07:21:58 +00001140PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 PyObject *unicode, /* Unicode object */
1142 PyObject *mapping /* character mapping
1143 (unicode ordinal -> char ordinal) */
Guido van Rossumd8225182000-03-10 22:33:05 +00001144 );
1145
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001146#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001147PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 const Py_UNICODE *data, /* Unicode char buffer */
1149 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1150 PyObject *mapping, /* character mapping
1151 (unicode ordinal -> char ordinal) */
1152 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001153 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001154#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001155
1156/* Translate a Py_UNICODE buffer of the given length by applying a
1157 character mapping table to it and return the resulting Unicode
1158 object.
1159
1160 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001162
1163 Mapping tables may be dictionaries or sequences. Unmapped character
1164 ordinals (ones which cause a LookupError) are left untouched and
1165 are copied as-is.
1166
1167*/
1168
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001169#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001170PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 const Py_UNICODE *data, /* Unicode char buffer */
1172 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1173 PyObject *table, /* Translate table */
1174 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001175 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001176#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001177
Victor Stinner99b95382011-07-04 14:23:54 +02001178#ifdef HAVE_MBCS
Guido van Rossum24bdb042000-03-28 20:29:59 +00001179
Guido van Rossumefec1152000-03-28 02:01:15 +00001180/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001181
Mark Hammond91a681d2002-08-12 07:21:58 +00001182PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001183 const char *string, /* MBCS encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001184 Py_ssize_t length, /* size of string */
Guido van Rossumefec1152000-03-28 02:01:15 +00001185 const char *errors /* error handling */
1186 );
1187
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001188PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1189 const char *string, /* MBCS encoded string */
1190 Py_ssize_t length, /* size of string */
1191 const char *errors, /* error handling */
1192 Py_ssize_t *consumed /* bytes consumed */
1193 );
1194
Mark Hammond91a681d2002-08-12 07:21:58 +00001195PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossumefec1152000-03-28 02:01:15 +00001196 PyObject *unicode /* Unicode object */
1197 );
1198
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001199#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001200PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossumefec1152000-03-28 02:01:15 +00001201 const Py_UNICODE *data, /* Unicode char buffer */
Neal Norwitzd78f6cf2007-08-08 04:49:37 +00001202 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossumefec1152000-03-28 02:01:15 +00001203 const char *errors /* error handling */
1204 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001205#endif
Guido van Rossumefec1152000-03-28 02:01:15 +00001206
Victor Stinner99b95382011-07-04 14:23:54 +02001207#endif /* HAVE_MBCS */
Guido van Rossum24bdb042000-03-28 20:29:59 +00001208
Guido van Rossum9e896b32000-04-05 20:11:21 +00001209/* --- Decimal Encoder ---------------------------------------------------- */
1210
1211/* Takes a Unicode string holding a decimal value and writes it into
1212 an output buffer using standard ASCII digit codes.
1213
1214 The output buffer has to provide at least length+1 bytes of storage
1215 area. The output string is 0-terminated.
1216
1217 The encoder converts whitespace to ' ', decimal characters to their
1218 corresponding ASCII digit and all other Latin-1 characters except
1219 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1220 are treated as errors. This includes embedded NULL bytes.
1221
1222 Error handling is defined by the errors argument:
1223
1224 NULL or "strict": raise a ValueError
1225 "ignore": ignore the wrong characters (these are not copied to the
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 output buffer)
Guido van Rossum9e896b32000-04-05 20:11:21 +00001227 "replace": replaces illegal characters with '?'
1228
1229 Returns 0 on success, -1 on failure.
1230
1231*/
1232
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001233#ifndef Py_LIMITED_API
Mark Hammond91a681d2002-08-12 07:21:58 +00001234PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 Py_UNICODE *s, /* Unicode buffer */
1236 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
1237 char *output, /* Output buffer; must have size >= length */
1238 const char *errors /* error handling */
Guido van Rossum9e896b32000-04-05 20:11:21 +00001239 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001240#endif
Guido van Rossum9e896b32000-04-05 20:11:21 +00001241
Alexander Belopolsky942af5a2010-12-04 03:38:46 +00001242/* Transforms code points that have decimal digit property to the
1243 corresponding ASCII digit code points.
1244
1245 Returns a new Unicode string on success, NULL on failure.
1246*/
1247
Georg Brandlb5503082010-12-05 11:40:48 +00001248#ifndef Py_LIMITED_API
Alexander Belopolsky942af5a2010-12-04 03:38:46 +00001249PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
1250 Py_UNICODE *s, /* Unicode buffer */
1251 Py_ssize_t length /* Number of Py_UNICODE chars to transform */
1252 );
Georg Brandlb5503082010-12-05 11:40:48 +00001253#endif
Alexander Belopolsky942af5a2010-12-04 03:38:46 +00001254
Martin v. Löwis011e8422009-05-05 04:43:17 +00001255/* --- File system encoding ---------------------------------------------- */
1256
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001257/* ParseTuple converter: encode str objects to bytes using
1258 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
Martin v. Löwis011e8422009-05-05 04:43:17 +00001259
1260PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1261
Victor Stinner47fcb5b2010-08-13 23:59:58 +00001262/* ParseTuple converter: decode bytes objects to unicode using
1263 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1264
1265PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1266
Victor Stinner77c38622010-05-14 15:58:55 +00001267/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1268 and the "surrogateescape" error handler.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001269
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001270 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1271 encoding.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001272
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001273 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
Martin v. Löwis011e8422009-05-05 04:43:17 +00001274*/
1275
1276PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1277 const char *s /* encoded string */
1278 );
1279
Victor Stinner77c38622010-05-14 15:58:55 +00001280/* Decode a string using Py_FileSystemDefaultEncoding
1281 and the "surrogateescape" error handler.
1282
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001283 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1284 encoding.
Victor Stinner77c38622010-05-14 15:58:55 +00001285*/
1286
Martin v. Löwis011e8422009-05-05 04:43:17 +00001287PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1288 const char *s, /* encoded string */
1289 Py_ssize_t size /* size */
1290 );
1291
Victor Stinnerae6265f2010-05-15 16:27:27 +00001292/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
Benjamin Petersonccbd6942010-05-15 17:43:18 +00001293 "surrogateescape" error handler, and return bytes.
Victor Stinnerae6265f2010-05-15 16:27:27 +00001294
Victor Stinnerf3170cc2010-10-15 12:04:23 +00001295 If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1296 encoding.
Victor Stinnerae6265f2010-05-15 16:27:27 +00001297*/
1298
1299PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1300 PyObject *unicode
1301 );
1302
Guido van Rossumd8225182000-03-10 22:33:05 +00001303/* --- Methods & Slots ----------------------------------------------------
1304
1305 These are capable of handling Unicode objects and strings on input
1306 (we refer to them as strings in the descriptions) and return
1307 Unicode objects or integers as apporpriate. */
1308
1309/* Concat two strings giving a new Unicode string. */
1310
Mark Hammond91a681d2002-08-12 07:21:58 +00001311PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 PyObject *left, /* Left string */
1313 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001314 );
1315
Walter Dörwald1ab83302007-05-18 17:15:44 +00001316/* Concat two strings and put the result in *pleft
1317 (sets *pleft to NULL on error) */
1318
1319PyAPI_FUNC(void) PyUnicode_Append(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 PyObject **pleft, /* Pointer to left string */
1321 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001322 );
1323
1324/* Concat two strings, put the result in *pleft and drop the right object
1325 (sets *pleft to NULL on error) */
1326
1327PyAPI_FUNC(void) PyUnicode_AppendAndDel(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 PyObject **pleft, /* Pointer to left string */
1329 PyObject *right /* Right string */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001330 );
1331
Guido van Rossumd8225182000-03-10 22:33:05 +00001332/* Split a string giving a list of Unicode strings.
1333
1334 If sep is NULL, splitting will be done at all whitespace
1335 substrings. Otherwise, splits occur at the given separator.
1336
1337 At most maxsplit splits will be done. If negative, no limit is set.
1338
1339 Separators are not included in the resulting list.
1340
1341*/
1342
Mark Hammond91a681d2002-08-12 07:21:58 +00001343PyAPI_FUNC(PyObject*) PyUnicode_Split(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 PyObject *s, /* String to split */
1345 PyObject *sep, /* String separator */
1346 Py_ssize_t maxsplit /* Maxsplit count */
1347 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001348
1349/* Dito, but split at line breaks.
1350
1351 CRLF is considered to be one line break. Line breaks are not
1352 included in the resulting list. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353
Mark Hammond91a681d2002-08-12 07:21:58 +00001354PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 PyObject *s, /* String to split */
1356 int keepends /* If true, line end markers are included */
1357 );
Guido van Rossumd8225182000-03-10 22:33:05 +00001358
Thomas Wouters477c8d52006-05-27 19:21:47 +00001359/* Partition a string using a given separator. */
1360
1361PyAPI_FUNC(PyObject*) PyUnicode_Partition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 PyObject *s, /* String to partition */
1363 PyObject *sep /* String separator */
1364 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365
1366/* Partition a string using a given separator, searching from the end of the
1367 string. */
1368
1369PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 PyObject *s, /* String to partition */
1371 PyObject *sep /* String separator */
1372 );
Thomas Wouters477c8d52006-05-27 19:21:47 +00001373
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001374/* Split a string giving a list of Unicode strings.
1375
1376 If sep is NULL, splitting will be done at all whitespace
1377 substrings. Otherwise, splits occur at the given separator.
1378
1379 At most maxsplit splits will be done. But unlike PyUnicode_Split
1380 PyUnicode_RSplit splits from the end of the string. If negative,
1381 no limit is set.
1382
1383 Separators are not included in the resulting list.
1384
1385*/
1386
1387PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 PyObject *s, /* String to split */
1389 PyObject *sep, /* String separator */
1390 Py_ssize_t maxsplit /* Maxsplit count */
1391 );
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001392
Guido van Rossumd8225182000-03-10 22:33:05 +00001393/* Translate a string by applying a character mapping table to it and
1394 return the resulting Unicode object.
1395
1396 The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 ordinal integers or None (causing deletion of the character).
Guido van Rossumd8225182000-03-10 22:33:05 +00001398
1399 Mapping tables may be dictionaries or sequences. Unmapped character
1400 ordinals (ones which cause a LookupError) are left untouched and
1401 are copied as-is.
1402
1403*/
1404
Mark Hammond91a681d2002-08-12 07:21:58 +00001405PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 PyObject *str, /* String */
1407 PyObject *table, /* Translate table */
1408 const char *errors /* error handling */
Guido van Rossumd8225182000-03-10 22:33:05 +00001409 );
1410
1411/* Join a sequence of strings using the given separator and return
1412 the resulting Unicode string. */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413
Mark Hammond91a681d2002-08-12 07:21:58 +00001414PyAPI_FUNC(PyObject*) PyUnicode_Join(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 PyObject *separator, /* Separator string */
1416 PyObject *seq /* Sequence object */
Guido van Rossumd8225182000-03-10 22:33:05 +00001417 );
1418
1419/* Return 1 if substr matches str[start:end] at the given tail end, 0
1420 otherwise. */
1421
Martin v. Löwis18e16552006-02-15 17:27:45 +00001422PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 PyObject *str, /* String */
1424 PyObject *substr, /* Prefix or Suffix string */
1425 Py_ssize_t start, /* Start index */
1426 Py_ssize_t end, /* Stop index */
1427 int direction /* Tail end: -1 prefix, +1 suffix */
Guido van Rossumd8225182000-03-10 22:33:05 +00001428 );
1429
1430/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg4da6fd62002-05-29 11:33:13 +00001431 given search direction or -1 if not found. -2 is returned in case
1432 an error occurred and an exception is set. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001433
Martin v. Löwis18e16552006-02-15 17:27:45 +00001434PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 PyObject *str, /* String */
1436 PyObject *substr, /* Substring to find */
1437 Py_ssize_t start, /* Start index */
1438 Py_ssize_t end, /* Stop index */
1439 int direction /* Find direction: +1 forward, -1 backward */
Guido van Rossumd8225182000-03-10 22:33:05 +00001440 );
1441
Barry Warsaw51ac5802000-03-20 16:36:48 +00001442/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +00001443
Martin v. Löwis18e16552006-02-15 17:27:45 +00001444PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 PyObject *str, /* String */
1446 PyObject *substr, /* Substring to count */
1447 Py_ssize_t start, /* Start index */
1448 Py_ssize_t end /* Stop index */
Guido van Rossumd8225182000-03-10 22:33:05 +00001449 );
1450
Barry Warsaw51ac5802000-03-20 16:36:48 +00001451/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +00001452 and return the resulting Unicode object. */
1453
Mark Hammond91a681d2002-08-12 07:21:58 +00001454PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 PyObject *str, /* String */
1456 PyObject *substr, /* Substring to find */
1457 PyObject *replstr, /* Substring to replace */
1458 Py_ssize_t maxcount /* Max. number of replacements to apply;
1459 -1 = all */
Guido van Rossumd8225182000-03-10 22:33:05 +00001460 );
1461
1462/* Compare two strings and return -1, 0, 1 for less than, equal,
1463 greater than resp. */
1464
Mark Hammond91a681d2002-08-12 07:21:58 +00001465PyAPI_FUNC(int) PyUnicode_Compare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 PyObject *left, /* Left string */
1467 PyObject *right /* Right string */
Guido van Rossumd8225182000-03-10 22:33:05 +00001468 );
1469
Martin v. Löwis5b222132007-06-10 09:51:05 +00001470PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
1471 PyObject *left,
Victor Stinnerdc2081f2010-12-27 01:49:29 +00001472 const char *right /* ASCII-encoded string */
Martin v. Löwis5b222132007-06-10 09:51:05 +00001473 );
1474
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001475/* Rich compare two strings and return one of the following:
1476
1477 - NULL in case an exception was raised
1478 - Py_True or Py_False for successfuly comparisons
1479 - Py_NotImplemented in case the type combination is unknown
1480
1481 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1482 case the conversion of the arguments to Unicode fails with a
1483 UnicodeDecodeError.
1484
1485 Possible values for op:
1486
1487 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1488
1489*/
1490
1491PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 PyObject *left, /* Left string */
1493 PyObject *right, /* Right string */
1494 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001495 );
1496
Thomas Wouters7e474022000-07-16 12:04:32 +00001497/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossumd8225182000-03-10 22:33:05 +00001498 the resulting Unicode string. */
1499
Mark Hammond91a681d2002-08-12 07:21:58 +00001500PyAPI_FUNC(PyObject *) PyUnicode_Format(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 PyObject *format, /* Format string */
1502 PyObject *args /* Argument tuple or dictionary */
Guido van Rossumd8225182000-03-10 22:33:05 +00001503 );
1504
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001505/* Checks whether element is contained in container and return 1/0
1506 accordingly.
1507
1508 element has to coerce to an one element Unicode string. -1 is
1509 returned in case of an error. */
1510
Mark Hammond91a681d2002-08-12 07:21:58 +00001511PyAPI_FUNC(int) PyUnicode_Contains(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001512 PyObject *container, /* Container string */
1513 PyObject *element /* Element string */
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001514 );
1515
Martin v. Löwis47383402007-08-15 07:32:56 +00001516/* Checks whether argument is a valid identifier. */
1517
1518PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1519
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001520#ifndef Py_LIMITED_API
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001521/* Externally visible for str.strip(unicode) */
Mark Hammond91a681d2002-08-12 07:21:58 +00001522PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001523 PyUnicodeObject *self,
1524 int striptype,
1525 PyObject *sepobj
1526 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001527#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001528
Eric Smith5807c412008-05-11 21:00:57 +00001529/* Using the current locale, insert the thousands grouping
1530 into the string pointed to by buffer. For the argument descriptions,
1531 see Objects/stringlib/localeutil.h */
1532
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001533#ifndef Py_LIMITED_API
Eric Smith0923d1d2009-04-16 20:16:10 +00001534PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
1535 Py_ssize_t n_buffer,
1536 Py_UNICODE *digits,
1537 Py_ssize_t n_digits,
1538 Py_ssize_t min_width);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001539#endif
Eric Smith5807c412008-05-11 21:00:57 +00001540
Eric Smitha3b1ac82009-04-03 14:45:06 +00001541/* Using explicit passed-in values, insert the thousands grouping
1542 into the string pointed to by buffer. For the argument descriptions,
1543 see Objects/stringlib/localeutil.h */
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001544#ifndef Py_LIMITED_API
Eric Smith0923d1d2009-04-16 20:16:10 +00001545PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
1546 Py_ssize_t n_buffer,
1547 Py_UNICODE *digits,
1548 Py_ssize_t n_digits,
1549 Py_ssize_t min_width,
1550 const char *grouping,
1551 const char *thousands_sep);
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001552#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001553/* === Characters Type APIs =============================================== */
1554
Benjamin Peterson960cf0f2009-01-09 04:11:44 +00001555/* Helper array used by Py_UNICODE_ISSPACE(). */
1556
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001557#ifndef Py_LIMITED_API
Benjamin Peterson960cf0f2009-01-09 04:11:44 +00001558PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1559
Guido van Rossumd8225182000-03-10 22:33:05 +00001560/* These should not be used directly. Use the Py_UNICODE_IS* and
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 Py_UNICODE_TO* macros instead.
Guido van Rossumd8225182000-03-10 22:33:05 +00001562
1563 These APIs are implemented in Objects/unicodectype.c.
1564
1565*/
1566
Mark Hammond91a681d2002-08-12 07:21:58 +00001567PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001568 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001569 );
1570
Mark Hammond91a681d2002-08-12 07:21:58 +00001571PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001572 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001573 );
1574
Mark Hammond91a681d2002-08-12 07:21:58 +00001575PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001576 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001577 );
1578
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001579PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001580 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001581 );
1582
1583PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001584 Py_UCS4 ch /* Unicode character */
Martin v. Löwis13c3e382007-08-14 22:37:03 +00001585 );
1586
Mark Hammond91a681d2002-08-12 07:21:58 +00001587PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001588 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001589 );
1590
Mark Hammond91a681d2002-08-12 07:21:58 +00001591PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001592 const Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001593 );
1594
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001595PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1596 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001597 );
1598
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001599PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1600 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001601 );
1602
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001603PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1604 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001605 );
1606
Mark Hammond91a681d2002-08-12 07:21:58 +00001607PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001608 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001609 );
1610
Mark Hammond91a681d2002-08-12 07:21:58 +00001611PyAPI_FUNC(int) _PyUnicode_ToDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001612 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001613 );
1614
Mark Hammond91a681d2002-08-12 07:21:58 +00001615PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001616 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001617 );
1618
Mark Hammond91a681d2002-08-12 07:21:58 +00001619PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001620 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001621 );
1622
Mark Hammond91a681d2002-08-12 07:21:58 +00001623PyAPI_FUNC(int) _PyUnicode_IsDigit(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001624 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001625 );
1626
Mark Hammond91a681d2002-08-12 07:21:58 +00001627PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001628 Py_UCS4 ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001629 );
1630
Georg Brandl559e5d72008-06-11 18:37:52 +00001631PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001632 Py_UCS4 ch /* Unicode character */
Georg Brandl559e5d72008-06-11 18:37:52 +00001633 );
1634
Mark Hammond91a681d2002-08-12 07:21:58 +00001635PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Amaury Forgeot d'Arc324ac652010-08-18 20:44:58 +00001636 Py_UCS4 ch /* Unicode character */
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001637 );
1638
Victor Stinneref8d95c2010-08-16 22:03:11 +00001639PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1640 const Py_UNICODE *u
1641 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001642
1643PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001644 Py_UNICODE *s1,
1645 const Py_UNICODE *s2);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001646
Victor Stinnerc4eb7652010-09-01 23:43:50 +00001647PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1648 Py_UNICODE *s1, const Py_UNICODE *s2);
1649
Martin v. Löwis5b222132007-06-10 09:51:05 +00001650PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001651 Py_UNICODE *s1,
1652 const Py_UNICODE *s2,
1653 size_t n);
Martin v. Löwis5b222132007-06-10 09:51:05 +00001654
1655PyAPI_FUNC(int) Py_UNICODE_strcmp(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001656 const Py_UNICODE *s1,
1657 const Py_UNICODE *s2
1658 );
1659
1660PyAPI_FUNC(int) Py_UNICODE_strncmp(
1661 const Py_UNICODE *s1,
1662 const Py_UNICODE *s2,
1663 size_t n
1664 );
Martin v. Löwis5b222132007-06-10 09:51:05 +00001665
1666PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001667 const Py_UNICODE *s,
1668 Py_UNICODE c
Martin v. Löwis5b222132007-06-10 09:51:05 +00001669 );
1670
Victor Stinner331ea922010-08-10 16:37:20 +00001671PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Victor Stinneref8d95c2010-08-16 22:03:11 +00001672 const Py_UNICODE *s,
1673 Py_UNICODE c
Victor Stinner331ea922010-08-10 16:37:20 +00001674 );
1675
Victor Stinner71133ff2010-09-01 23:43:53 +00001676/* Create a copy of a unicode string ending with a nul character. Return NULL
1677 and raise a MemoryError exception on memory allocation failure, otherwise
1678 return a new allocated buffer (use PyMem_Free() to free the buffer). */
1679
Victor Stinner46408602010-09-03 16:18:00 +00001680PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
Victor Stinner71133ff2010-09-01 23:43:53 +00001681 PyObject *unicode
1682 );
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001683#endif /* Py_LIMITED_API */
Victor Stinner71133ff2010-09-01 23:43:53 +00001684
Guido van Rossumd8225182000-03-10 22:33:05 +00001685#ifdef __cplusplus
1686}
1687#endif
Guido van Rossumd8225182000-03-10 22:33:05 +00001688#endif /* !Py_UNICODEOBJECT_H */