Blame - Include/unicodeobject.h - platform/external/python/cpython3

blob: 203dcef09fd9abf87d6ebd0cc562f9586920ba76 [file] [log] [blame]

Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1	#ifndef Py_UNICODEOBJECT_H
				2	#define Py_UNICODEOBJECT_H
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	3
				4	/*
				5
				6	Unicode implementation based on original code by Fredrik Lundh,
				7	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
				8	Unicode Integration Proposal (see file Misc/unicode.txt).
				9
Guido van Rossum	16b1ad9	2000-08-03 16:24:25 +0000	[diff] [blame]	10	Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	11
				12
				13	Original header:
				14	--------------------------------------------------------------------
				15
				16	* Yet another Unicode string type for Python. This type supports the
				17	* 16-bit Basic Multilingual Plane (BMP) only.
				18	*
				19	* Written by Fredrik Lundh, January 1999.
				20	*
				21	* Copyright (c) 1999 by Secret Labs AB.
				22	* Copyright (c) 1999 by Fredrik Lundh.
				23	*
				24	* fredrik@pythonware.com
				25	* http://www.pythonware.com
				26	*
				27	* --------------------------------------------------------------------
				28	* This Unicode String Type is
				29	*
				30	* Copyright (c) 1999 by Secret Labs AB
				31	* Copyright (c) 1999 by Fredrik Lundh
				32	*
				33	* By obtaining, using, and/or copying this software and/or its
				34	* associated documentation, you agree that you have read, understood,
				35	* and will comply with the following terms and conditions:
				36	*
				37	* Permission to use, copy, modify, and distribute this software and its
				38	* associated documentation for any purpose and without fee is hereby
				39	* granted, provided that the above copyright notice appears in all
				40	* copies, and that both that copyright notice and this permission notice
				41	* appear in supporting documentation, and that the name of Secret Labs
				42	* AB or the author not be used in advertising or publicity pertaining to
				43	* distribution of the software without specific, written prior
				44	* permission.
				45	*
				46	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
				47	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
				48	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
				49	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
				50	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
				51	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
				52	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
				53	* -------------------------------------------------------------------- */
				54
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	55	#include <ctype.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	56
				57	/* === Internal API ======================================================= */
				58
				59	/* --- Internal Unicode Format -------------------------------------------- */
				60
Fredrik Lundh	9b14ab3	2001-06-26 22:59:49 +0000	[diff] [blame]	61	/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
				62	properly set, but the default rules below doesn't set it. I'll
				63	sort this out some other day -- fredrik@pythonware.com */
				64
				65	#ifndef Py_UNICODE_SIZE
				66	#error Must define Py_UNICODE_SIZE
				67	#endif
				68
Fredrik Lundh	8f45585	2001-06-27 18:59:43 +0000	[diff] [blame]	69	/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
				70	strings are stored as UCS-2 (with limited support for UTF-16) */
				71
				72	#if Py_UNICODE_SIZE >= 4
				73	#define Py_UNICODE_WIDE
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	74	#endif
Fredrik Lundh	1294ad0	2001-06-26 17:17:07 +0000	[diff] [blame]	75
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	76	/* Set these flags if the platform has "wchar.h", "wctype.h" and the
				77	wchar_t type is a 16-bit unsigned type */
				78	/* #define HAVE_WCHAR_H */
				79	/* #define HAVE_USABLE_WCHAR_T */
				80
				81	/* Defaults for various platforms */
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	82	#ifndef PY_UNICODE_TYPE
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	83
Fredrik Lundh	1294ad0	2001-06-26 17:17:07 +0000	[diff] [blame]	84	/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh	8f45585	2001-06-27 18:59:43 +0000	[diff] [blame]	85	# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	86	# define HAVE_USABLE_WCHAR_T
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	87	# define PY_UNICODE_TYPE wchar_t
				88	# endif
				89
Fredrik Lundh	8f45585	2001-06-27 18:59:43 +0000	[diff] [blame]	90	# if defined(Py_UNICODE_WIDE)
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	91	# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	92	# endif
				93
				94	#endif
				95
				96	/* If the compiler provides a wchar_t type we try to support it
				97	through the interface functions PyUnicode_FromWideChar() and
				98	PyUnicode_AsWideChar(). */
				99
				100	#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg	1a731c6	2000-08-11 11:43:10 +0000	[diff] [blame]	101	# ifndef HAVE_WCHAR_H
				102	# define HAVE_WCHAR_H
				103	# endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	104	#endif
				105
				106	#ifdef HAVE_WCHAR_H
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	107	/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
				108	# ifdef _HAVE_BSDI
				109	# include <time.h>
				110	# endif
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	111	# include <wchar.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	112	#endif
				113
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	114	/*
				115	* Use this typedef when you need to represent a UTF-16 surrogate pair
				116	* as single unsigned integer.
				117	*/
				118	#if SIZEOF_INT >= 4
				119	typedef unsigned int Py_UCS4;
				120	#elif SIZEOF_LONG >= 4
				121	typedef unsigned long Py_UCS4;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	122	#endif
				123
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	124	typedef PY_UNICODE_TYPE Py_UNICODE;
Marc-André Lemburg	4327910	2000-07-07 09:01:41 +0000	[diff] [blame]	125
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	126	/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
				127
				128	/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
				129	produce different external names and thus cause import errors in
				130	case Python interpreters and extensions with mixed compiled in
				131	Unicode width assumptions are combined. */
				132
				133	#ifndef Py_UNICODE_WIDE
				134
				135	# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
				136	# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	137	# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	138	# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
				139	# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
				140	# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	141	# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	142	# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
				143	# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
				144	# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
				145	# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
				146	# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
				147	# define PyUnicode_Compare PyUnicodeUCS2_Compare
				148	# define PyUnicode_Concat PyUnicodeUCS2_Concat
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	149	# define PyUnicode_Append PyUnicodeUCS2_Append
				150	# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	151	# define PyUnicode_Contains PyUnicodeUCS2_Contains
				152	# define PyUnicode_Count PyUnicodeUCS2_Count
				153	# define PyUnicode_Decode PyUnicodeUCS2_Decode
				154	# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
				155	# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
				156	# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
Guido van Rossum	00bc0e0	2007-10-15 02:52:41 +0000	[diff] [blame]	157	# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
Christian Heimes	5894ba7	2007-11-04 11:43:14 +0000	[diff] [blame]	158	# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	159	# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	160	# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
				161	# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	162	# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	163	# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	164	# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	165	# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	166	# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
				167	# define PyUnicode_Encode PyUnicodeUCS2_Encode
				168	# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
				169	# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
				170	# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
				171	# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
				172	# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	173	# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	174	# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
				175	# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
				176	# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
				177	# define PyUnicode_Find PyUnicodeUCS2_Find
				178	# define PyUnicode_Format PyUnicodeUCS2_Format
				179	# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
				180	# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
Marc-André Lemburg	9c329de	2002-08-12 08:19:10 +0000	[diff] [blame]	181	# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	182	# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	183	# define PyUnicode_FromString PyUnicodeUCS2_FromString
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	184	# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
				185	# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
				186	# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
Walter Dörwald	14176a5	2007-05-18 17:04:42 +0000	[diff] [blame]	187	# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	188	# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
				189	# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
				190	# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
Martin v. Löwis	4738340	2007-08-15 07:32:56 +0000	[diff] [blame]	191	# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	192	# define PyUnicode_Join PyUnicodeUCS2_Join
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	193	# define PyUnicode_Partition PyUnicodeUCS2_Partition
				194	# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
				195	# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	196	# define PyUnicode_Replace PyUnicodeUCS2_Replace
				197	# define PyUnicode_Resize PyUnicodeUCS2_Resize
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	198	# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	199	# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
				200	# define PyUnicode_Split PyUnicodeUCS2_Split
				201	# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
				202	# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
				203	# define PyUnicode_Translate PyUnicodeUCS2_Translate
				204	# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
				205	# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
				206	# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
				207	# define _PyUnicode_Init _PyUnicodeUCS2_Init
				208	# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
				209	# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
				210	# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
				211	# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
				212	# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
				213	# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
				214	# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	215	# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
				216	# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	217	# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
				218	# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
				219	# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
				220	# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
				221	# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
				222	# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
				223	# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
				224	# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
				225
				226	#else
				227
				228	# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
				229	# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	230	# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	231	# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
				232	# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
				233	# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	234	# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	235	# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
				236	# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
				237	# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
				238	# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
				239	# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
				240	# define PyUnicode_Compare PyUnicodeUCS4_Compare
				241	# define PyUnicode_Concat PyUnicodeUCS4_Concat
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	242	# define PyUnicode_Append PyUnicodeUCS4_Append
				243	# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	244	# define PyUnicode_Contains PyUnicodeUCS4_Contains
				245	# define PyUnicode_Count PyUnicodeUCS4_Count
				246	# define PyUnicode_Decode PyUnicodeUCS4_Decode
				247	# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
				248	# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
				249	# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
Guido van Rossum	00bc0e0	2007-10-15 02:52:41 +0000	[diff] [blame]	250	# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
Christian Heimes	5894ba7	2007-11-04 11:43:14 +0000	[diff] [blame]	251	# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	252	# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	253	# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
				254	# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	255	# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	256	# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	257	# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	258	# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	259	# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
				260	# define PyUnicode_Encode PyUnicodeUCS4_Encode
				261	# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
				262	# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
				263	# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
				264	# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
				265	# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	266	# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	267	# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
				268	# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
				269	# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
				270	# define PyUnicode_Find PyUnicodeUCS4_Find
				271	# define PyUnicode_Format PyUnicodeUCS4_Format
				272	# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
				273	# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
Marc-André Lemburg	9c329de	2002-08-12 08:19:10 +0000	[diff] [blame]	274	# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	275	# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	276	# define PyUnicode_FromString PyUnicodeUCS4_FromString
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	277	# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
				278	# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
				279	# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	280	# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
				281	# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
				282	# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
				283	# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
Martin v. Löwis	4738340	2007-08-15 07:32:56 +0000	[diff] [blame]	284	# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	285	# define PyUnicode_Join PyUnicodeUCS4_Join
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	286	# define PyUnicode_Partition PyUnicodeUCS4_Partition
				287	# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
				288	# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	289	# define PyUnicode_Replace PyUnicodeUCS4_Replace
				290	# define PyUnicode_Resize PyUnicodeUCS4_Resize
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	291	# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	292	# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
				293	# define PyUnicode_Split PyUnicodeUCS4_Split
				294	# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
				295	# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
				296	# define PyUnicode_Translate PyUnicodeUCS4_Translate
				297	# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
				298	# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
				299	# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
				300	# define _PyUnicode_Init _PyUnicodeUCS4_Init
				301	# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
				302	# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
				303	# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
				304	# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
				305	# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
				306	# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
				307	# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	308	# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
				309	# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	310	# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
				311	# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
				312	# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
				313	# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
				314	# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
				315	# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
				316	# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
				317	# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
				318
				319
				320	#endif
				321
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	322	/* --- Internal Unicode Operations ---------------------------------------- */
				323
				324	/* If you want Python to use the compiler's wctype.h functions instead
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	325	of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
Raymond Hettinger	57341c3	2004-10-31 05:46:59 +0000	[diff] [blame]	326	configure Python using --with-wctype-functions. This reduces the
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	327	interpreter's code size. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	328
				329	#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
				330
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	331	#include <wctype.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	332
				333	#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
				334
				335	#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
				336	#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
				337	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				338	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				339
				340	#define Py_UNICODE_TOLOWER(ch) towlower(ch)
				341	#define Py_UNICODE_TOUPPER(ch) towupper(ch)
				342	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				343
				344	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				345	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				346	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
				347
				348	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				349	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				350	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				351
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	352	#define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
				353
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	354	#else
				355
				356	#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
				357
				358	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
				359	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
				360	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				361	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				362
				363	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
				364	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
				365	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				366
				367	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				368	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				369	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
				370
				371	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				372	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				373	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				374
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	375	#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	376
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	377	#endif
Marc-André Lemburg	a9c103b	2000-07-03 10:52:13 +0000	[diff] [blame]	378
				379	#define Py_UNICODE_ISALNUM(ch) \
				380	(Py_UNICODE_ISALPHA(ch) \|\| \
				381	Py_UNICODE_ISDECIMAL(ch) \|\| \
				382	Py_UNICODE_ISDIGIT(ch) \|\| \
				383	Py_UNICODE_ISNUMERIC(ch))
				384
Thomas Wouters	4d70c3d	2006-06-08 14:42:34 +0000	[diff] [blame]	385	#define Py_UNICODE_COPY(target, source, length) \
				386	Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	387
				388	#define Py_UNICODE_FILL(target, value, length) do\
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	389	{Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
				390	for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
				391	} while (0)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	392
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	393	/* check if substring matches at given offset. the offset must be
				394	valid, and the substring must not be empty */
				395	#define Py_UNICODE_MATCH(string, offset, substring) \
				396	((((string)->str + (offset)) == ((substring)->str)) && \
				397	((((string)->str + (offset) + (substring)->length-1) == ((substring)->str + (substring)->length-1))) && \
				398	!memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	399
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	400	#ifdef __cplusplus
				401	extern "C" {
				402	#endif
				403
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	404	/* --- Unicode Type ------------------------------------------------------- */
				405
				406	typedef struct {
				407	PyObject_HEAD
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	408	Py_ssize_t length; /* Length of raw Unicode data in buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	409	Py_UNICODE str; / Raw Unicode buffer */
				410	long hash; /* Hash value; -1 if not set */
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	411	int state; /* != 0 if interned. In this case the two
				412	* references from the dictionary to this object
				413	* are not counted in ob_refcnt. */
Marc-André Lemburg	bff879c	2000-08-03 18:46:08 +0000	[diff] [blame]	414	PyObject defenc; / (Default) Encoded version as Python
				415	string, or NULL; this is used for
				416	implementing the buffer protocol */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	417	} PyUnicodeObject;
				418
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	419	PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	420
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	421	#define SSTATE_NOT_INTERNED 0
				422	#define SSTATE_INTERNED_MORTAL 1
				423	#define SSTATE_INTERNED_IMMORTAL 2
				424
Thomas Wouters	27d517b	2007-02-25 20:39:11 +0000	[diff] [blame]	425	#define PyUnicode_Check(op) \
Martin v. Löwis	9f2e346	2007-07-21 17:22:18 +0000	[diff] [blame]	426	PyType_FastSubclass(Py_Type(op), Py_TPFLAGS_UNICODE_SUBCLASS)
				427	#define PyUnicode_CheckExact(op) (Py_Type(op) == &PyUnicode_Type)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	428
				429	/* Fast access macros */
				430	#define PyUnicode_GET_SIZE(op) \
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	431	(assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	432	#define PyUnicode_GET_DATA_SIZE(op) \
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	433	(assert(PyUnicode_Check(op)),(((PyUnicodeObject )(op))->length sizeof(Py_UNICODE)))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	434	#define PyUnicode_AS_UNICODE(op) \
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	435	(assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	436	#define PyUnicode_AS_DATA(op) \
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	437	(assert(PyUnicode_Check(op)),((const char )((PyUnicodeObject )(op))->str))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	438
				439	/* --- Constants ---------------------------------------------------------- */
				440
				441	/* This Unicode character will be used as replacement character during
				442	decoding if the errors argument is set to "replace". Note: the
				443	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
				444	Unicode 3.0. */
				445
				446	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
				447
				448	/* === Public API ========================================================= */
				449
				450	/* --- Plain Py_UNICODE --------------------------------------------------- */
				451
				452	/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Marc-André Lemburg	8155e0e	2001-04-23 14:44:21 +0000	[diff] [blame]	453	size.
				454
				455	u may be NULL which causes the contents to be undefined. It is the
				456	user's responsibility to fill in the needed data afterwards. Note
				457	that modifying the Unicode object contents after construction is
				458	only allowed if u was set to NULL.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	459
				460	The buffer is copied into the new object. */
				461
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	462	PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	463	const Py_UNICODE u, / Unicode buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	464	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	465	);
				466
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	467	/* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
				468	PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
				469	const char u, / char buffer */
				470	Py_ssize_t size /* size of buffer */
				471	);
				472
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	473	/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
				474	Latin-1 encoded bytes */
				475	PyAPI_FUNC(PyObject*) PyUnicode_FromString(
				476	const char u / string */
				477	);
				478
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	479	/* Return a read-only pointer to the Unicode object's internal
				480	Py_UNICODE buffer. */
				481
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	482	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	483	PyObject unicode / Unicode object */
				484	);
				485
				486	/* Get the length of the Unicode object. */
				487
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	488	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	489	PyObject unicode / Unicode object */
				490	);
				491
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	492	/* Get the maximum ordinal for a Unicode character. */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	493	PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	494
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	495	/* Resize an already allocated Unicode object to the new size length.
				496
				497	*unicode is modified to point to the new (resized) object and 0
				498	returned on success.
				499
				500	This API may only be called by the function which also called the
				501	Unicode constructor. The refcount on the object must be 1. Otherwise,
				502	an error is returned.
				503
				504	Error handling is implemented as follows: an exception is set, -1
				505	is returned and *unicode left untouched.
				506
				507	*/
				508
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	509	PyAPI_FUNC(int) PyUnicode_Resize(
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	510	PyObject *unicode, / Pointer to the Unicode object */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	511	Py_ssize_t length /* New length */
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	512	);
				513
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	514	/* Coerce obj to an Unicode object and return a reference with
				515	incremented refcount.
				516
				517	Coercion is done in the following way:
				518
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	519	1. String and other char buffer compatible objects are decoded
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	520	under the assumptions that they contain data using the current
				521	default encoding. Decoding is done in "strict" mode.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	522
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	523	2. All other objects (including Unicode objects) raise an
				524	exception.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	525
				526	The API returns NULL in case of an error. The caller is responsible
				527	for decref'ing the returned objects.
				528
				529	*/
				530
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	531	PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	532	register PyObject obj, / Object */
				533	const char encoding, / encoding */
				534	const char errors / error handling */
				535	);
				536
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	537	/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	538	incremented refcount.
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	539
				540	Unicode objects are passed back as-is (subclasses are converted to
				541	true Unicode objects), all other objects are delegated to
				542	PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
				543	using the default encoding as basis for decoding the object.
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	544
				545	The API returns NULL in case of an error. The caller is responsible
				546	for decref'ing the returned objects.
				547
				548	*/
				549
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	550	PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	551	register PyObject obj / Object */
				552	);
				553
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	554	PyAPI_FUNC(PyObject ) PyUnicode_FromFormatV(const char, va_list);
				555	PyAPI_FUNC(PyObject ) PyUnicode_FromFormat(const char, ...);
				556
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	557	PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
				558	PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
				559	PyAPI_FUNC(PyObject ) PyUnicode_InternFromString(const char );
				560	PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
				561
				562	/* Use only if you know it's a string */
				563	#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
				564
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	565	/* --- wchar_t support for platforms which support it --------------------- */
				566
				567	#ifdef HAVE_WCHAR_H
				568
				569	/* Create a Unicode Object from the whcar_t buffer w of the given
				570	size.
				571
				572	The buffer is copied into the new object. */
				573
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	574	PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	575	register const wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	576	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	577	);
				578
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	579	/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	580	most size wchar_t characters are copied.
				581
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	582	Note that the resulting wchar_t string may or may not be
				583	0-terminated. It is the responsibility of the caller to make sure
				584	that the wchar_t string is 0-terminated in case this is required by
				585	the application.
				586
				587	Returns the number of wchar_t characters copied (excluding a
				588	possibly trailing 0-termination character) or -1 in case of an
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	589	error. */
				590
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	591	PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	592	PyUnicodeObject unicode, / Unicode object */
				593	register wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	594	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	595	);
				596
				597	#endif
				598
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	599	/* --- Unicode ordinals --------------------------------------------------- */
				600
				601	/* Create a Unicode Object from the given Unicode code point ordinal.
				602
				603	The ordinal must be in range(0x10000) on narrow Python builds
				604	(UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
				605	raised in case it is not.
				606
				607	*/
				608
Marc-André Lemburg	9c329de	2002-08-12 08:19:10 +0000	[diff] [blame]	609	PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	610
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	611	/* === Builtin Codecs =====================================================
				612
				613	Many of these APIs take two arguments encoding and errors. These
				614	parameters encoding and errors have the same semantics as the ones
				615	of the builtin unicode() API.
				616
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	617	Setting encoding to NULL causes the default encoding to be used.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	618
				619	Error handling is set by errors which may also be set to NULL
				620	meaning to use the default handling defined for the codec. Default
				621	error handling for all builtin codecs is "strict" (ValueErrors are
				622	raised).
				623
				624	The codecs all use a similar interface. Only deviation from the
				625	generic ones are documented.
				626
				627	*/
				628
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	629	/* --- Manage the default encoding ---------------------------------------- */
				630
Jeremy Hylton	3ce4538	2001-07-30 22:34:24 +0000	[diff] [blame]	631	/* Return a Python string holding the default encoded value of the
				632	Unicode object.
				633
				634	The resulting string is cached in the Unicode object for subsequent
				635	usage by this function. The cached version is needed to implement
				636	the character buffer interface and will live (at least) as long as
				637	the Unicode object itself.
				638
				639	The refcount of the string is not incremented.
				640
				641	* Exported for internal use by the interpreter only !!! *
				642
				643	*/
				644
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	645	PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
Jeremy Hylton	3ce4538	2001-07-30 22:34:24 +0000	[diff] [blame]	646	PyObject , const char );
				647
Guido van Rossum	00bc0e0	2007-10-15 02:52:41 +0000	[diff] [blame]	648	/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
				649
				650	If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
				651	UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
				652	invalid characters with '?'.
				653
				654	The function is intended to be used for paths and file names only
				655	during bootstrapping process where the codecs are not set up.
				656	*/
				657
				658	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
				659	const char s / encoded string */
				660	);
				661
Christian Heimes	5894ba7	2007-11-04 11:43:14 +0000	[diff] [blame]	662	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
				663	const char s, / encoded string */
				664	Py_ssize_t size /* size */
				665	);
				666
				667
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	668	/* Return a char* holding the UTF-8 encoded value of the
				669	Unicode object.
				670
				671	DEPRECATED: use PyUnicode_AsStringAndSize() instead.
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	672	*/
				673
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	674	PyAPI_FUNC(char ) PyUnicode_AsStringAndSize(PyObject, Py_ssize_t *);
				675
				676	/* Returns the UTF-8 encoding, and its size.
				677
				678	If the output argument is NULL, no size is stored.
				679	*/
				680
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	681	PyAPI_FUNC(char ) PyUnicode_AsString(PyObject);
				682
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	683	/* Returns the UTF-8 encoding.
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	684
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	685	This is equivalent to PyUnicode_AsStringAndSize(x, NULL).
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	686
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	687	*/
				688
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	689	PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	690
				691	/* Sets the currently active default encoding.
				692
				693	Returns 0 on success, -1 in case of an error.
				694
				695	*/
				696
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	697	PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	698	const char encoding / Encoding name in standard form */
				699	);
				700
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	701	/* --- Generic Codecs ----------------------------------------------------- */
				702
				703	/* Create a Unicode object by decoding the encoded string s of the
				704	given size. */
				705
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	706	PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	707	const char s, / encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	708	Py_ssize_t size, /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	709	const char encoding, / encoding */
				710	const char errors / error handling */
				711	);
				712
				713	/* Encodes a Py_UNICODE buffer of the given size and returns a
				714	Python string object. */
				715
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	716	PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	717	const Py_UNICODE s, / Unicode char buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	718	Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	719	const char encoding, / encoding */
				720	const char errors / error handling */
				721	);
				722
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	723	/* Encodes a Unicode object and returns the result as Python
				724	object. */
				725
				726	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
				727	PyObject unicode, / Unicode object */
				728	const char encoding, / encoding */
				729	const char errors / error handling */
				730	);
				731
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	732	/* Encodes a Unicode object and returns the result as Python string
				733	object. */
				734
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	735	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	736	PyObject unicode, / Unicode object */
				737	const char encoding, / encoding */
				738	const char errors / error handling */
				739	);
				740
Thomas Wouters	73e5a5b	2006-06-08 15:35:45 +0000	[diff] [blame]	741	PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
				742	PyObject* string /* 256 character map */
				743	);
				744
				745
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	746	/* --- UTF-7 Codecs ------------------------------------------------------- */
				747
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	748	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	749	const char string, / UTF-7 encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	750	Py_ssize_t length, /* size of string */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	751	const char errors / error handling */
				752	);
				753
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	754	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	755	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	756	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	757	int encodeSetO, /* force the encoder to encode characters in
				758	Set O, as described in RFC2152 */
				759	int encodeWhiteSpace, /* force the encoder to encode space, tab,
				760	carriage return and linefeed characters */
				761	const char errors / error handling */
				762	);
				763
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	764	/* --- UTF-8 Codecs ------------------------------------------------------- */
				765
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	766	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	767	const char string, / UTF-8 encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	768	Py_ssize_t length, /* size of string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	769	const char errors / error handling */
				770	);
				771
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	772	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
				773	const char string, / UTF-8 encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	774	Py_ssize_t length, /* size of string */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	775	const char errors, / error handling */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	776	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	777	);
				778
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	779	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	780	PyObject unicode / Unicode object */
				781	);
				782
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	783	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	784	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	785	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	786	const char errors / error handling */
				787	);
				788
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	789	/* --- UTF-32 Codecs ------------------------------------------------------ */
				790
				791	/* Decodes length bytes from a UTF-32 encoded buffer string and returns
				792	the corresponding Unicode object.
				793
				794	errors (if non-NULL) defines the error handling. It defaults
				795	to "strict".
				796
				797	If byteorder is non-NULL, the decoder starts decoding using the
				798	given byte order:
				799
				800	*byteorder == -1: little endian
				801	*byteorder == 0: native order
				802	*byteorder == 1: big endian
				803
				804	In native mode, the first four bytes of the stream are checked for a
				805	BOM mark. If found, the BOM mark is analysed, the byte order
				806	adjusted and the BOM skipped. In the other modes, no BOM mark
				807	interpretation is done. After completion, *byteorder is set to the
				808	current byte order at the end of input data.
				809
				810	If byteorder is NULL, the codec starts in native order mode.
				811
				812	*/
				813
				814	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
				815	const char string, / UTF-32 encoded string */
				816	Py_ssize_t length, /* size of string */
				817	const char errors, / error handling */
				818	int byteorder / pointer to byteorder to use
				819	0=native;-1=LE,1=BE; updated on
				820	exit */
				821	);
				822
				823	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
				824	const char string, / UTF-32 encoded string */
				825	Py_ssize_t length, /* size of string */
				826	const char errors, / error handling */
				827	int byteorder, / pointer to byteorder to use
				828	0=native;-1=LE,1=BE; updated on
				829	exit */
				830	Py_ssize_t consumed / bytes consumed */
				831	);
				832
				833	/* Returns a Python string using the UTF-32 encoding in native byte
				834	order. The string always starts with a BOM mark. */
				835
				836	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
				837	PyObject unicode / Unicode object */
				838	);
				839
				840	/* Returns a Python string object holding the UTF-32 encoded value of
				841	the Unicode data.
				842
				843	If byteorder is not 0, output is written according to the following
				844	byte order:
				845
				846	byteorder == -1: little endian
				847	byteorder == 0: native byte order (writes a BOM mark)
				848	byteorder == 1: big endian
				849
				850	If byteorder is 0, the output string will always start with the
				851	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				852	prepended.
				853
				854	*/
				855
				856	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
				857	const Py_UNICODE data, / Unicode char buffer */
				858	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				859	const char errors, / error handling */
				860	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				861	);
				862
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	863	/* --- UTF-16 Codecs ------------------------------------------------------ */
				864
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	865	/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	866	the corresponding Unicode object.
				867
				868	errors (if non-NULL) defines the error handling. It defaults
				869	to "strict".
				870
				871	If byteorder is non-NULL, the decoder starts decoding using the
				872	given byte order:
				873
				874	*byteorder == -1: little endian
				875	*byteorder == 0: native order
				876	*byteorder == 1: big endian
				877
Marc-André Lemburg	489b56e	2001-05-21 20:30:15 +0000	[diff] [blame]	878	In native mode, the first two bytes of the stream are checked for a
				879	BOM mark. If found, the BOM mark is analysed, the byte order
				880	adjusted and the BOM skipped. In the other modes, no BOM mark
				881	interpretation is done. After completion, *byteorder is set to the
				882	current byte order at the end of input data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	883
				884	If byteorder is NULL, the codec starts in native order mode.
				885
				886	*/
				887
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	888	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	889	const char string, / UTF-16 encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	890	Py_ssize_t length, /* size of string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	891	const char errors, / error handling */
				892	int byteorder / pointer to byteorder to use
				893	0=native;-1=LE,1=BE; updated on
				894	exit */
				895	);
				896
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	897	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
				898	const char string, / UTF-16 encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	899	Py_ssize_t length, /* size of string */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	900	const char errors, / error handling */
				901	int byteorder, / pointer to byteorder to use
				902	0=native;-1=LE,1=BE; updated on
				903	exit */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	904	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	905	);
				906
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	907	/* Returns a Python string using the UTF-16 encoding in native byte
				908	order. The string always starts with a BOM mark. */
				909
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	910	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	911	PyObject unicode / Unicode object */
				912	);
				913
				914	/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	915	the Unicode data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	916
				917	If byteorder is not 0, output is written according to the following
				918	byte order:
				919
				920	byteorder == -1: little endian
				921	byteorder == 0: native byte order (writes a BOM mark)
				922	byteorder == 1: big endian
				923
				924	If byteorder is 0, the output string will always start with the
				925	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				926	prepended.
				927
				928	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
				929	UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters	7e47402	2000-07-16 12:04:32 +0000	[diff] [blame]	930	at a later point without compromising the APIs.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	931
				932	*/
				933
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	934	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	935	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	936	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	937	const char errors, / error handling */
				938	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				939	);
				940
				941	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				942
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	943	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	944	const char string, / Unicode-Escape encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	945	Py_ssize_t length, /* size of string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	946	const char errors / error handling */
				947	);
				948
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	949	PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	950	PyObject unicode / Unicode object */
				951	);
				952
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	953	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	954	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	955	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	956	);
				957
				958	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				959
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	960	PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	961	const char string, / Raw-Unicode-Escape encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	962	Py_ssize_t length, /* size of string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	963	const char errors / error handling */
				964	);
				965
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	966	PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	967	PyObject unicode / Unicode object */
				968	);
				969
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	970	PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	971	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	972	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	973	);
				974
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	975	/* --- Unicode Internal Codec ---------------------------------------------
				976
				977	Only for internal use in _codecsmodule.c */
				978
				979	PyObject *_PyUnicode_DecodeUnicodeInternal(
				980	const char *string,
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	981	Py_ssize_t length,
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	982	const char *errors
				983	);
				984
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	985	/* --- Latin-1 Codecs -----------------------------------------------------
				986
				987	Note: Latin-1 corresponds to the first 256 Unicode ordinals.
				988
				989	*/
				990
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	991	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	992	const char string, / Latin-1 encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	993	Py_ssize_t length, /* size of string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	994	const char errors / error handling */
				995	);
				996
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	997	PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	998	PyObject unicode / Unicode object */
				999	);
				1000
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1001	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1002	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	1003	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1004	const char errors / error handling */
				1005	);
				1006
				1007	/* --- ASCII Codecs -------------------------------------------------------
				1008
				1009	Only 7-bit ASCII data is excepted. All other codes generate errors.
				1010
				1011	*/
				1012
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1013	PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1014	const char string, / ASCII encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1015	Py_ssize_t length, /* size of string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1016	const char errors / error handling */
				1017	);
				1018
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1019	PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1020	PyObject unicode / Unicode object */
				1021	);
				1022
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1023	PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1024	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	1025	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1026	const char errors / error handling */
				1027	);
				1028
				1029	/* --- Character Map Codecs -----------------------------------------------
				1030
				1031	This codec uses mappings to encode and decode characters.
				1032
				1033	Decoding mappings must map single string characters to single
				1034	Unicode characters, integers (which are then interpreted as Unicode
				1035	ordinals) or None (meaning "undefined mapping" and causing an
				1036	error).
				1037
				1038	Encoding mappings must map single Unicode characters to single
				1039	string characters, integers (which are then interpreted as Latin-1
				1040	ordinals) or None (meaning "undefined mapping" and causing an
				1041	error).
				1042
				1043	If a character lookup fails with a LookupError, the character is
				1044	copied as-is meaning that its ordinal value will be interpreted as
				1045	Unicode or Latin-1 ordinal resp. Because of this mappings only need
				1046	to contain those mappings which map characters to different code
				1047	points.
				1048
				1049	*/
				1050
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1051	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1052	const char string, / Encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1053	Py_ssize_t length, /* size of string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1054	PyObject mapping, / character mapping
				1055	(char ordinal -> unicode ordinal) */
				1056	const char errors / error handling */
				1057	);
				1058
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1059	PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1060	PyObject unicode, / Unicode object */
				1061	PyObject mapping / character mapping
				1062	(unicode ordinal -> char ordinal) */
				1063	);
				1064
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1065	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1066	const Py_UNICODE data, / Unicode char buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1067	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1068	PyObject mapping, / character mapping
				1069	(unicode ordinal -> char ordinal) */
				1070	const char errors / error handling */
				1071	);
				1072
				1073	/* Translate a Py_UNICODE buffer of the given length by applying a
				1074	character mapping table to it and return the resulting Unicode
				1075	object.
				1076
				1077	The mapping table must map Unicode ordinal integers to Unicode
				1078	ordinal integers or None (causing deletion of the character).
				1079
				1080	Mapping tables may be dictionaries or sequences. Unmapped character
				1081	ordinals (ones which cause a LookupError) are left untouched and
				1082	are copied as-is.
				1083
				1084	*/
				1085
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1086	PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1087	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	1088	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1089	PyObject table, / Translate table */
				1090	const char errors / error handling */
				1091	);
				1092
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1093	#ifdef MS_WIN32
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1094
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1095	/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1096
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1097	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1098	const char string, / MBCS encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1099	Py_ssize_t length, /* size of string */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1100	const char errors / error handling */
				1101	);
				1102
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1103	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
				1104	const char string, / MBCS encoded string */
				1105	Py_ssize_t length, /* size of string */
				1106	const char errors, / error handling */
				1107	Py_ssize_t consumed / bytes consumed */
				1108	);
				1109
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1110	PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1111	PyObject unicode / Unicode object */
				1112	);
				1113
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1114	PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1115	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	1116	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1117	const char errors / error handling */
				1118	);
				1119
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1120	#endif /* MS_WIN32 */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1121
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1122	/* --- Decimal Encoder ---------------------------------------------------- */
				1123
				1124	/* Takes a Unicode string holding a decimal value and writes it into
				1125	an output buffer using standard ASCII digit codes.
				1126
				1127	The output buffer has to provide at least length+1 bytes of storage
				1128	area. The output string is 0-terminated.
				1129
				1130	The encoder converts whitespace to ' ', decimal characters to their
				1131	corresponding ASCII digit and all other Latin-1 characters except
				1132	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
				1133	are treated as errors. This includes embedded NULL bytes.
				1134
				1135	Error handling is defined by the errors argument:
				1136
				1137	NULL or "strict": raise a ValueError
				1138	"ignore": ignore the wrong characters (these are not copied to the
				1139	output buffer)
				1140	"replace": replaces illegal characters with '?'
				1141
				1142	Returns 0 on success, -1 on failure.
				1143
				1144	*/
				1145
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1146	PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1147	Py_UNICODE s, / Unicode buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	1148	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1149	char output, / Output buffer; must have size >= length */
				1150	const char errors / error handling */
				1151	);
				1152
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1153	/* --- Methods & Slots ----------------------------------------------------
				1154
				1155	These are capable of handling Unicode objects and strings on input
				1156	(we refer to them as strings in the descriptions) and return
				1157	Unicode objects or integers as apporpriate. */
				1158
				1159	/* Concat two strings giving a new Unicode string. */
				1160
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1161	PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1162	PyObject left, / Left string */
				1163	PyObject right / Right string */
				1164	);
				1165
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1166	/* Concat two strings and put the result in *pleft
				1167	(sets pleft to NULL on error) /
				1168
				1169	PyAPI_FUNC(void) PyUnicode_Append(
				1170	PyObject *pleft, / Pointer to left string */
				1171	PyObject right / Right string */
				1172	);
				1173
				1174	/* Concat two strings, put the result in *pleft and drop the right object
				1175	(sets pleft to NULL on error) /
				1176
				1177	PyAPI_FUNC(void) PyUnicode_AppendAndDel(
				1178	PyObject *pleft, / Pointer to left string */
				1179	PyObject right / Right string */
				1180	);
				1181
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1182	/* Split a string giving a list of Unicode strings.
				1183
				1184	If sep is NULL, splitting will be done at all whitespace
				1185	substrings. Otherwise, splits occur at the given separator.
				1186
				1187	At most maxsplit splits will be done. If negative, no limit is set.
				1188
				1189	Separators are not included in the resulting list.
				1190
				1191	*/
				1192
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1193	PyAPI_FUNC(PyObject*) PyUnicode_Split(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1194	PyObject s, / String to split */
				1195	PyObject sep, / String separator */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1196	Py_ssize_t maxsplit /* Maxsplit count */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1197	);
				1198
				1199	/* Dito, but split at line breaks.
				1200
				1201	CRLF is considered to be one line break. Line breaks are not
				1202	included in the resulting list. */
				1203
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1204	PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1205	PyObject s, / String to split */
Guido van Rossum	004d64f	2000-04-11 15:39:46 +0000	[diff] [blame]	1206	int keepends /* If true, line end markers are included */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1207	);
				1208
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1209	/* Partition a string using a given separator. */
				1210
				1211	PyAPI_FUNC(PyObject*) PyUnicode_Partition(
				1212	PyObject s, / String to partition */
				1213	PyObject sep / String separator */
				1214	);
				1215
				1216	/* Partition a string using a given separator, searching from the end of the
				1217	string. */
				1218
				1219	PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
				1220	PyObject s, / String to partition */
				1221	PyObject sep / String separator */
				1222	);
				1223
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1224	/* Split a string giving a list of Unicode strings.
				1225
				1226	If sep is NULL, splitting will be done at all whitespace
				1227	substrings. Otherwise, splits occur at the given separator.
				1228
				1229	At most maxsplit splits will be done. But unlike PyUnicode_Split
				1230	PyUnicode_RSplit splits from the end of the string. If negative,
				1231	no limit is set.
				1232
				1233	Separators are not included in the resulting list.
				1234
				1235	*/
				1236
				1237	PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
				1238	PyObject s, / String to split */
				1239	PyObject sep, / String separator */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1240	Py_ssize_t maxsplit /* Maxsplit count */
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1241	);
				1242
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1243	/* Translate a string by applying a character mapping table to it and
				1244	return the resulting Unicode object.
				1245
				1246	The mapping table must map Unicode ordinal integers to Unicode
				1247	ordinal integers or None (causing deletion of the character).
				1248
				1249	Mapping tables may be dictionaries or sequences. Unmapped character
				1250	ordinals (ones which cause a LookupError) are left untouched and
				1251	are copied as-is.
				1252
				1253	*/
				1254
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1255	PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1256	PyObject str, / String */
				1257	PyObject table, / Translate table */
				1258	const char errors / error handling */
				1259	);
				1260
				1261	/* Join a sequence of strings using the given separator and return
				1262	the resulting Unicode string. */
				1263
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1264	PyAPI_FUNC(PyObject*) PyUnicode_Join(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1265	PyObject separator, / Separator string */
				1266	PyObject seq / Sequence object */
				1267	);
				1268
				1269	/* Return 1 if substr matches str[start:end] at the given tail end, 0
				1270	otherwise. */
				1271
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1272	PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1273	PyObject str, / String */
				1274	PyObject substr, / Prefix or Suffix string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1275	Py_ssize_t start, /* Start index */
				1276	Py_ssize_t end, /* Stop index */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1277	int direction /* Tail end: -1 prefix, +1 suffix */
				1278	);
				1279
				1280	/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg	4da6fd6	2002-05-29 11:33:13 +0000	[diff] [blame]	1281	given search direction or -1 if not found. -2 is returned in case
				1282	an error occurred and an exception is set. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1283
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1284	PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1285	PyObject str, / String */
				1286	PyObject substr, / Substring to find */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1287	Py_ssize_t start, /* Start index */
				1288	Py_ssize_t end, /* Stop index */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1289	int direction /* Find direction: +1 forward, -1 backward */
				1290	);
				1291
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1292	/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1293
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1294	PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1295	PyObject str, / String */
				1296	PyObject substr, / Substring to count */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1297	Py_ssize_t start, /* Start index */
				1298	Py_ssize_t end /* Stop index */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1299	);
				1300
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1301	/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1302	and return the resulting Unicode object. */
				1303
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1304	PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1305	PyObject str, / String */
				1306	PyObject substr, / Substring to find */
				1307	PyObject replstr, / Substring to replace */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1308	Py_ssize_t maxcount /* Max. number of replacements to apply;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1309	-1 = all */
				1310	);
				1311
				1312	/* Compare two strings and return -1, 0, 1 for less than, equal,
				1313	greater than resp. */
				1314
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1315	PyAPI_FUNC(int) PyUnicode_Compare(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1316	PyObject left, / Left string */
				1317	PyObject right / Right string */
				1318	);
				1319
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1320	PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
				1321	PyObject *left,
				1322	const char *right
				1323	);
				1324
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	1325	/* Rich compare two strings and return one of the following:
				1326
				1327	- NULL in case an exception was raised
				1328	- Py_True or Py_False for successfuly comparisons
				1329	- Py_NotImplemented in case the type combination is unknown
				1330
				1331	Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
				1332	case the conversion of the arguments to Unicode fails with a
				1333	UnicodeDecodeError.
				1334
				1335	Possible values for op:
				1336
				1337	Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
				1338
				1339	*/
				1340
				1341	PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
				1342	PyObject left, / Left string */
				1343	PyObject right, / Right string */
				1344	int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
				1345	);
				1346
Thomas Wouters	7e47402	2000-07-16 12:04:32 +0000	[diff] [blame]	1347	/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1348	the resulting Unicode string. */
				1349
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1350	PyAPI_FUNC(PyObject *) PyUnicode_Format(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1351	PyObject format, / Format string */
				1352	PyObject args / Argument tuple or dictionary */
				1353	);
				1354
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	1355	/* Checks whether element is contained in container and return 1/0
				1356	accordingly.
				1357
				1358	element has to coerce to an one element Unicode string. -1 is
				1359	returned in case of an error. */
				1360
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1361	PyAPI_FUNC(int) PyUnicode_Contains(
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	1362	PyObject container, / Container string */
				1363	PyObject element / Element string */
				1364	);
				1365
Martin v. Löwis	4738340	2007-08-15 07:32:56 +0000	[diff] [blame]	1366	/* Checks whether argument is a valid identifier. */
				1367
				1368	PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
				1369
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1370	/* Externally visible for str.strip(unicode) */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1371	PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1372	PyUnicodeObject *self,
				1373	int striptype,
				1374	PyObject *sepobj
				1375	);
				1376
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1377	/* === Characters Type APIs =============================================== */
				1378
				1379	/* These should not be used directly. Use the Py_UNICODE_IS* and
				1380	Py_UNICODE_TO* macros instead.
				1381
				1382	These APIs are implemented in Objects/unicodectype.c.
				1383
				1384	*/
				1385
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1386	PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1387	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1388	);
				1389
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1390	PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1391	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1392	);
				1393
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1394	PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1395	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1396	);
				1397
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	1398	PyAPI_FUNC(int) _PyUnicode_IsXidStart(
				1399	Py_UNICODE ch /* Unicode character */
				1400	);
				1401
				1402	PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
				1403	Py_UNICODE ch /* Unicode character */
				1404	);
				1405
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1406	PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Tim Peters	2576c97	2005-10-29 02:33:18 +0000	[diff] [blame]	1407	const Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1408	);
				1409
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1410	PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Tim Peters	2576c97	2005-10-29 02:33:18 +0000	[diff] [blame]	1411	const Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1412	);
				1413
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1414	PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1415	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1416	);
				1417
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1418	PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1419	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1420	);
				1421
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1422	PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1423	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1424	);
				1425
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1426	PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1427	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1428	);
				1429
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1430	PyAPI_FUNC(int) _PyUnicode_ToDigit(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1431	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1432	);
				1433
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1434	PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1435	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1436	);
				1437
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1438	PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1439	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1440	);
				1441
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1442	PyAPI_FUNC(int) _PyUnicode_IsDigit(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1443	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1444	);
				1445
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1446	PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1447	Py_UNICODE ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1448	);
				1449
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1450	PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Fredrik Lundh	72b0685	2001-06-27 22:08:26 +0000	[diff] [blame]	1451	Py_UNICODE ch /* Unicode character */
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	1452	);
				1453
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1454	PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u);
				1455
				1456	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
				1457	Py_UNICODE s1, const Py_UNICODE s2);
				1458
				1459	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
				1460	Py_UNICODE s1, const Py_UNICODE s2, size_t n);
				1461
				1462	PyAPI_FUNC(int) Py_UNICODE_strcmp(
				1463	const Py_UNICODE s1, const Py_UNICODE s2);
				1464
				1465	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
				1466	const Py_UNICODE *s, Py_UNICODE c
				1467	);
				1468
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1469	#ifdef __cplusplus
				1470	}
				1471	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1472	#endif /* !Py_UNICODEOBJECT_H */