Blame - Include/unicodeobject.h - platform/external/python/cpython2

blob: 3c691c1c1c104d544749d1596cd92016a2e0fdee [file] [log] [blame]

Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1	#ifndef Py_UNICODEOBJECT_H
				2	#define Py_UNICODEOBJECT_H
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	3
Christian Heimes	af98da1	2008-01-27 15:18:18 +0000	[diff] [blame]	4	#include <stdarg.h>
				5
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	6	/*
				7
				8	Unicode implementation based on original code by Fredrik Lundh,
				9	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	10	Unicode Integration Proposal. (See
				11	http://www.egenix.com/files/python/unicode-proposal.txt).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	12
Guido van Rossum	16b1ad9	2000-08-03 16:24:25 +0000	[diff] [blame]	13	Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	14
				15
				16	Original header:
				17	--------------------------------------------------------------------
				18
				19	* Yet another Unicode string type for Python. This type supports the
				20	* 16-bit Basic Multilingual Plane (BMP) only.
				21	*
				22	* Written by Fredrik Lundh, January 1999.
				23	*
				24	* Copyright (c) 1999 by Secret Labs AB.
				25	* Copyright (c) 1999 by Fredrik Lundh.
				26	*
				27	* fredrik@pythonware.com
				28	* http://www.pythonware.com
				29	*
				30	* --------------------------------------------------------------------
				31	* This Unicode String Type is
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	32	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	33	* Copyright (c) 1999 by Secret Labs AB
				34	* Copyright (c) 1999 by Fredrik Lundh
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	35	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	36	* By obtaining, using, and/or copying this software and/or its
				37	* associated documentation, you agree that you have read, understood,
				38	* and will comply with the following terms and conditions:
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	39	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	40	* Permission to use, copy, modify, and distribute this software and its
				41	* associated documentation for any purpose and without fee is hereby
				42	* granted, provided that the above copyright notice appears in all
				43	* copies, and that both that copyright notice and this permission notice
				44	* appear in supporting documentation, and that the name of Secret Labs
				45	* AB or the author not be used in advertising or publicity pertaining to
				46	* distribution of the software without specific, written prior
				47	* permission.
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	48	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	49	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
				50	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
				51	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
				52	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
				53	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
				54	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
				55	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
				56	* -------------------------------------------------------------------- */
				57
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	58	#include <ctype.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	59
				60	/* === Internal API ======================================================= */
				61
				62	/* --- Internal Unicode Format -------------------------------------------- */
				63
Christian Heimes	0625e89	2008-01-07 21:04:21 +0000	[diff] [blame]	64	/* Python 3.x requires unicode */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	65	#define Py_USING_UNICODE
Christian Heimes	0625e89	2008-01-07 21:04:21 +0000	[diff] [blame]	66
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	67	#ifndef SIZEOF_WCHAR_T
				68	#error Must define SIZEOF_WCHAR_T
Fredrik Lundh	9b14ab3	2001-06-26 22:59:49 +0000	[diff] [blame]	69	#endif
				70
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	71	#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
				72
				73	/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
				74	Otherwise, Unicode strings are stored as UCS-2 (with limited support
				75	for UTF-16) */
Fredrik Lundh	8f45585	2001-06-27 18:59:43 +0000	[diff] [blame]	76
				77	#if Py_UNICODE_SIZE >= 4
				78	#define Py_UNICODE_WIDE
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	79	#endif
Fredrik Lundh	1294ad0	2001-06-26 17:17:07 +0000	[diff] [blame]	80
Amaury Forgeot d'Arc	feb7307	2010-09-12 22:42:57 +0000	[diff] [blame]	81	/* Set these flags if the platform has "wchar.h" and the
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	82	wchar_t type is a 16-bit unsigned type */
				83	/* #define HAVE_WCHAR_H */
				84	/* #define HAVE_USABLE_WCHAR_T */
				85
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	86	/* Py_UNICODE was the native Unicode storage format (code unit) used by
				87	Python and represents a single Unicode element in the Unicode type.
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	88	With PEP 393, Py_UNICODE is deprecated and replaced with a
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	89	typedef to wchar_t. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	90
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	91	#ifndef Py_LIMITED_API
				92	#define PY_UNICODE_TYPE wchar_t
				93	typedef wchar_t Py_UNICODE;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	94	#endif
				95
				96	/* If the compiler provides a wchar_t type we try to support it
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	97	through the interface functions PyUnicode_FromWideChar(),
				98	PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	99
				100	#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg	1a731c6	2000-08-11 11:43:10 +0000	[diff] [blame]	101	# ifndef HAVE_WCHAR_H
				102	# define HAVE_WCHAR_H
				103	# endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	104	#endif
				105
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	106	#if defined(MS_WINDOWS)
Victor Stinner	99b9538	2011-07-04 14:23:54 +0200	[diff] [blame]	107	# define HAVE_MBCS
				108	#endif
				109
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	110	#ifdef HAVE_WCHAR_H
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	111	/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
				112	# ifdef _HAVE_BSDI
				113	# include <time.h>
				114	# endif
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	115	# include <wchar.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	116	#endif
				117
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	118	/* Py_UCS4 and Py_UCS2 are typedefs for the respective
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	119	unicode representations. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	120	#if SIZEOF_INT >= 4
				121	typedef unsigned int Py_UCS4;
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	122	#elif SIZEOF_LONG >= 4
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	123	typedef unsigned long Py_UCS4;
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	124	#else
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	125	#error "Could not find a proper typedef for Py_UCS4"
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	126	#endif
				127
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	128	typedef unsigned short Py_UCS2;
				129	typedef unsigned char Py_UCS1;
				130
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	131	/* --- Internal Unicode Operations ---------------------------------------- */
				132
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	133	/* Since splitting on whitespace is an important use case, and
				134	whitespace in most situations is solely ASCII whitespace, we
				135	optimize for the common case by using a quick look-up table
				136	_Py_ascii_whitespace (see below) with an inlined check.
Christian Heimes	190d79e	2008-01-30 11:58:22 +0000	[diff] [blame]	137
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	138	*/
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	139	#ifndef Py_LIMITED_API
Christian Heimes	190d79e	2008-01-30 11:58:22 +0000	[diff] [blame]	140	#define Py_UNICODE_ISSPACE(ch) \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	141	((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	142
				143	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
				144	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
				145	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				146	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				147
				148	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
				149	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
				150	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				151
				152	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				153	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				154	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	155	#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	156
				157	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				158	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				159	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				160
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	161	#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	162
Marc-André Lemburg	a9c103b	2000-07-03 10:52:13 +0000	[diff] [blame]	163	#define Py_UNICODE_ISALNUM(ch) \
				164	(Py_UNICODE_ISALPHA(ch) \|\| \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	165	Py_UNICODE_ISDECIMAL(ch) \|\| \
				166	Py_UNICODE_ISDIGIT(ch) \|\| \
				167	Py_UNICODE_ISNUMERIC(ch))
Marc-André Lemburg	a9c103b	2000-07-03 10:52:13 +0000	[diff] [blame]	168
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	169	#define Py_UNICODE_COPY(target, source, length) \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	170	Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	171
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	172	#define Py_UNICODE_FILL(target, value, length) \
				173	do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	174	for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	175	} while (0)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	176
Ezio Melotti	8c9375b	2011-08-22 20:03:25 +0300	[diff] [blame]	177	/* macros to work with surrogates */
				178	#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
				179	#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
				180	#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
				181	/* Join two surrogate characters and return a single Py_UCS4 value. */
				182	#define Py_UNICODE_JOIN_SURROGATES(high, low) \
				183	(((((Py_UCS4)(high) & 0x03FF) << 10) \| \
				184	((Py_UCS4)(low) & 0x03FF)) + 0x10000)
				185
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	186	/* Check if substring matches at given offset. The offset must be
				187	valid, and the substring must not be empty. */
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	188
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	189	#define Py_UNICODE_MATCH(string, offset, substring) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	190	((((string)->wstr + (offset)) == ((substring)->wstr)) && \
				191	((((string)->wstr + (offset) + (substring)->wstr_length-1) == ((substring)->wstr + (substring)->wstr_length-1))) && \
				192	!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
				193
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	194	#endif /* Py_LIMITED_API */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	195
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	196	#ifdef __cplusplus
				197	extern "C" {
				198	#endif
				199
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	200	/* --- Unicode Type ------------------------------------------------------- */
				201
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	202	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	203
				204	/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
				205	structure. state.ascii and state.compact are set, and the data
				206	immediately follow the structure. utf8_length and wstr_length can be found
				207	in the length field; the utf8 pointer is equal to the data pointer. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	208	typedef struct {
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	209	/* There a 4 forms of Unicode strings:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	210
				211	- compact ascii:
				212
				213	* structure = PyASCIIObject
				214	* kind = PyUnicode_1BYTE_KIND
				215	* compact = 1
				216	* ascii = 1
				217	* ready = 1
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	218	* (length is the length of the utf8 and wstr strings)
				219	* (data starts just after the structure)
				220	* (since ASCII is decoded from UTF-8, the utf8 string are the data)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	221
				222	- compact:
				223
				224	* structure = PyCompactUnicodeObject
				225	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				226	PyUnicode_4BYTE_KIND
				227	* compact = 1
				228	* ready = 1
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	229	* ascii = 0
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	230	* utf8 is not shared with data
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	231	* utf8_length = 0 if utf8 is NULL
				232	* wstr is shared with data and wstr_length=length
				233	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
				234	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
				235	* wstr_length = 0 if wstr is NULL
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	236	* (data starts just after the structure)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	237
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	238	- legacy string, not ready:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	239
				240	* structure = PyUnicodeObject
				241	* kind = PyUnicode_WCHAR_KIND
				242	* compact = 0
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	243	* ascii = 0
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	244	* ready = 0
				245	* wstr is not NULL
				246	* data.any is NULL
				247	* utf8 is NULL
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	248	* utf8_length = 0
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	249	* interned = SSTATE_NOT_INTERNED
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	250
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	251	- legacy string, ready:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	252
				253	* structure = PyUnicodeObject structure
				254	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				255	PyUnicode_4BYTE_KIND
				256	* compact = 0
				257	* ready = 1
				258	* data.any is not NULL
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	259	* utf8 is shared and utf8_length = length with data.any if ascii = 1
				260	* utf8_length = 0 if utf8 is NULL
				261	* wstr is shared and wstr_length = length with data.any
				262	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
				263	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
				264	* wstr_length = 0 if wstr is NULL
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	265
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	266	Compact strings use only one memory block (structure + characters),
				267	whereas legacy strings use one block for the structure and one block
				268	for characters.
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	269
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	270	Legacy strings are created by PyUnicode_FromUnicode() and
				271	PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
				272	when PyUnicode_READY() is called.
				273
				274	See also _PyUnicode_CheckConsistency().
				275	*/
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	276	PyObject_HEAD
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	277	Py_ssize_t length; /* Number of code points in the string */
Benjamin Peterson	8f67d08	2010-10-17 20:54:53 +0000	[diff] [blame]	278	Py_hash_t hash; /* Hash value; -1 if not set */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	279	struct {
				280	/*
				281	SSTATE_NOT_INTERNED (0)
				282	SSTATE_INTERNED_MORTAL (1)
				283	SSTATE_INTERNED_IMMORTAL (2)
				284
				285	If interned != SSTATE_NOT_INTERNED, the two references from the
				286	dictionary to this object are not counted in ob_refcnt.
				287	*/
				288	unsigned int interned:2;
				289	/* Character size:
				290
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	291	- PyUnicode_WCHAR_KIND (0):
				292
				293	* character type = wchar_t (16 or 32 bits, depending on the
				294	platform)
				295
				296	- PyUnicode_1BYTE_KIND (1):
				297
				298	* character type = Py_UCS1 (8 bits, unsigned)
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame^]	299	* if ascii is set, all characters must be in range
				300	U+0000-U+007F, otherwise at least one character must be in range
				301	U+0080-U+00FF
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	302
				303	- PyUnicode_2BYTE_KIND (2):
				304
				305	* character type = Py_UCS2 (16 bits, unsigned)
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame^]	306	* at least one character must be in range U+0100-U+FFFF
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	307
				308	- PyUnicode_4BYTE_KIND (3):
				309
				310	* character type = Py_UCS4 (32 bits, unsigned)
				311	* at least one character must be in range U+10000-U+10FFFF
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	312	*/
				313	unsigned int kind:2;
				314	/* Compact is with respect to the allocation scheme. Compact unicode
				315	objects only require one memory block while non-compact objects use
				316	one block for the PyUnicodeObject struct and another for its data
				317	buffer. */
				318	unsigned int compact:1;
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame^]	319	/* The string only contains characters in range U+0000-U+007F (ASCII)
				320	and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
				321	set, use the PyASCIIObject structure. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	322	unsigned int ascii:1;
				323	/* The ready flag indicates whether the object layout is initialized
				324	completely. This means that this is either a compact object, or
				325	the data pointer is filled out. The bit is redundant, and helps
				326	to minimize the test in PyUnicode_IS_READY(). */
				327	unsigned int ready:1;
				328	} state;
				329	wchar_t wstr; / wchar_t representation (null-terminated) */
				330	} PyASCIIObject;
				331
				332	/* Non-ASCII strings allocated through PyUnicode_New use the
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	333	PyCompactUnicodeObject structure. state.compact is set, and the data
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	334	immediately follow the structure. */
				335	typedef struct {
				336	PyASCIIObject _base;
				337	Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
				338	* terminating \0. */
				339	char utf8; / UTF-8 representation (null-terminated) */
				340	Py_ssize_t wstr_length; /* Number of code points in wstr, possible
				341	* surrogates count as two code points. */
				342	} PyCompactUnicodeObject;
				343
				344	/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
				345	PyUnicodeObject structure. The actual string data is initially in the wstr
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	346	block, and copied into the data block using _PyUnicode_Ready. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	347	typedef struct {
				348	PyCompactUnicodeObject _base;
				349	union {
				350	void *any;
				351	Py_UCS1 *latin1;
				352	Py_UCS2 *ucs2;
				353	Py_UCS4 *ucs4;
				354	} data; /* Canonical, smallest-form Unicode buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	355	} PyUnicodeObject;
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	356	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	357
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	358	PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Christian Heimes	a22e8bd	2007-11-29 22:35:39 +0000	[diff] [blame]	359	PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	360
Thomas Wouters	27d517b	2007-02-25 20:39:11 +0000	[diff] [blame]	361	#define PyUnicode_Check(op) \
Christian Heimes	90aa764	2007-12-19 02:45:37 +0000	[diff] [blame]	362	PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
				363	#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	364
				365	/* Fast access macros */
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	366	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	367
				368	#define PyUnicode_WSTR_LENGTH(op) \
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	369	(PyUnicode_IS_COMPACT_ASCII(op) ? \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	370	((PyASCIIObject*)op)->length : \
				371	((PyCompactUnicodeObject*)op)->wstr_length)
				372
				373	/* Returns the deprecated Py_UNICODE representation's size in code units
				374	(this includes surrogate pairs as 2 units).
				375	If the Py_UNICODE representation is not available, it will be computed
				376	on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
				377
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	378	#define PyUnicode_GET_SIZE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	379	(assert(PyUnicode_Check(op)), \
				380	(((PyASCIIObject *)(op))->wstr) ? \
				381	PyUnicode_WSTR_LENGTH(op) : \
				382	((void)PyUnicode_AsUnicode((PyObject *)(op)), \
				383	PyUnicode_WSTR_LENGTH(op)))
				384
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	385	#define PyUnicode_GET_DATA_SIZE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	386	(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
				387
				388	/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
				389	representation on demand. Using this macro is very inefficient now,
				390	try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
				391	use PyUnicode_WRITE() and PyUnicode_READ(). */
				392
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	393	#define PyUnicode_AS_UNICODE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	394	(assert(PyUnicode_Check(op)), \
				395	(((PyASCIIObject )(op))->wstr) ? (((PyASCIIObject )(op))->wstr) : \
				396	PyUnicode_AsUnicode((PyObject *)(op)))
				397
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	398	#define PyUnicode_AS_DATA(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	399	((const char *)(PyUnicode_AS_UNICODE(op)))
				400
				401
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	402	/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	403
				404	/* Values for PyUnicodeObject.state: */
				405
				406	/* Interning state. */
				407	#define SSTATE_NOT_INTERNED 0
				408	#define SSTATE_INTERNED_MORTAL 1
				409	#define SSTATE_INTERNED_IMMORTAL 2
				410
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	411	/* Return true if the string contains only ASCII characters, or 0 if not. The
				412	string may be compact (PyUnicode_IS_COMPACT_ASCII) or not. No type checks
				413	or Ready calls are performed. */
				414	#define PyUnicode_IS_ASCII(op) \
				415	(((PyASCIIObject*)op)->state.ascii)
				416
				417	/* Return true if the string is compact or 0 if not.
				418	No type checks or Ready calls are performed. */
				419	#define PyUnicode_IS_COMPACT(op) \
				420	(((PyASCIIObject*)(op))->state.compact)
				421
				422	/* Return true if the string is a compact ASCII string (use PyASCIIObject
				423	structure), or 0 if not. No type checks or Ready calls are performed. */
				424	#define PyUnicode_IS_COMPACT_ASCII(op) \
				425	(PyUnicode_IS_ASCII(op) && PyUnicode_IS_COMPACT(op))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	426
				427	/* String contains only wstr byte characters. This is only possible
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	428	when the string was created with a legacy API and _PyUnicode_Ready()
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	429	has not been called yet. */
				430	#define PyUnicode_WCHAR_KIND 0
				431
				432	/* Return values of the PyUnicode_KIND() macro: */
				433
				434	#define PyUnicode_1BYTE_KIND 1
				435	#define PyUnicode_2BYTE_KIND 2
				436	#define PyUnicode_4BYTE_KIND 3
				437
				438
				439	/* Return the number of bytes the string uses to represent single characters,
Victor Stinner	4584a5b	2011-10-01 02:39:37 +0200	[diff] [blame]	440	this can be 1, 2 or 4.
				441
				442	See also PyUnicode_KIND_SIZE(). */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	443	#define PyUnicode_CHARACTER_SIZE(op) \
				444	(1 << (PyUnicode_KIND(op) - 1))
				445
Georg Brandl	4975a9b	2011-10-05 16:12:21 +0200	[diff] [blame]	446	/* Return pointers to the canonical representation cast to unsigned char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	447	Py_UCS2, or Py_UCS4 for direct character access.
				448	No checks are performed, use PyUnicode_CHARACTER_SIZE or
				449	PyUnicode_KIND() before to ensure these will work correctly. */
				450
				451	#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
				452	#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
				453	#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
				454
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	455	/* Return one of the PyUnicode__KIND values defined above. /
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	456	#define PyUnicode_KIND(op) \
				457	(assert(PyUnicode_Check(op)), \
				458	assert(PyUnicode_IS_READY(op)), \
				459	((PyASCIIObject *)(op))->state.kind)
				460
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	461	/* Return a void pointer to the raw unicode buffer. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	462	#define _PyUnicode_COMPACT_DATA(op) \
				463	(PyUnicode_IS_COMPACT_ASCII(op) ? \
				464	((void)((PyASCIIObject)(op) + 1)) : \
				465	((void)((PyCompactUnicodeObject)(op) + 1)))
				466
				467	#define _PyUnicode_NONCOMPACT_DATA(op) \
				468	(assert(((PyUnicodeObject*)(op))->data.any), \
				469	((((PyUnicodeObject *)(op))->data.any)))
				470
				471	#define PyUnicode_DATA(op) \
				472	(assert(PyUnicode_Check(op)), \
				473	PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
				474	_PyUnicode_NONCOMPACT_DATA(op))
				475
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	476	/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
Victor Stinner	4584a5b	2011-10-01 02:39:37 +0200	[diff] [blame]	477	The index is a character index, the result is a size in bytes.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	478
Victor Stinner	4584a5b	2011-10-01 02:39:37 +0200	[diff] [blame]	479	See also PyUnicode_CHARACTER_SIZE(). */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	480	#define PyUnicode_KIND_SIZE(kind, index) ((index) << ((kind) - 1))
				481
				482	/* In the access macros below, "kind" may be evaluated more than once.
				483	All other macro parameters are evaluated exactly once, so it is safe
				484	to put side effects into them (such as increasing the index). */
				485
				486	/* Write into the canonical representation, this macro does not do any sanity
				487	checks and is intended for usage in loops. The caller should cache the
Georg Brandl	07de325	2011-10-05 16:47:38 +0200	[diff] [blame]	488	kind and data pointers obtained from other macro calls.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	489	index is the index in the string (starts at 0) and value is the new
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	490	code point value which should be written to that location. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	491	#define PyUnicode_WRITE(kind, data, index, value) \
				492	do { \
				493	switch ((kind)) { \
				494	case PyUnicode_1BYTE_KIND: { \
				495	((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
				496	break; \
				497	} \
				498	case PyUnicode_2BYTE_KIND: { \
				499	((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
				500	break; \
				501	} \
				502	default: { \
				503	assert((kind) == PyUnicode_4BYTE_KIND); \
				504	((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
				505	} \
				506	} \
				507	} while (0)
				508
Georg Brandl	07de325	2011-10-05 16:47:38 +0200	[diff] [blame]	509	/* Read a code point from the string's canonical representation. No checks
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	510	or ready calls are performed. */
				511	#define PyUnicode_READ(kind, data, index) \
				512	((Py_UCS4) \
				513	((kind) == PyUnicode_1BYTE_KIND ? \
Victor Stinner	7a48ff7	2011-10-02 00:55:25 +0200	[diff] [blame]	514	((const Py_UCS1 *)(data))[(index)] : \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	515	((kind) == PyUnicode_2BYTE_KIND ? \
				516	((const Py_UCS2 *)(data))[(index)] : \
				517	((const Py_UCS4 *)(data))[(index)] \
				518	) \
				519	))
				520
				521	/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
				522	calls PyUnicode_KIND() and might call it twice. For single reads, use
				523	PyUnicode_READ_CHAR, for multiple consecutive reads callers should
				524	cache kind and use PyUnicode_READ instead. */
				525	#define PyUnicode_READ_CHAR(unicode, index) \
Victor Stinner	3794376	2011-10-02 20:33:18 +0200	[diff] [blame]	526	(assert(PyUnicode_Check(unicode)), \
				527	assert(PyUnicode_IS_READY(unicode)), \
				528	(Py_UCS4) \
				529	(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
				530	((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
				531	(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
				532	((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
				533	((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
				534	) \
				535	))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	536
				537	/* Returns the length of the unicode string. The caller has to make sure that
				538	the string has it's canonical representation set before calling
				539	this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
				540	#define PyUnicode_GET_LENGTH(op) \
				541	(assert(PyUnicode_Check(op)), \
				542	assert(PyUnicode_IS_READY(op)), \
				543	((PyASCIIObject *)(op))->length)
				544
				545
				546	/* Fast check to determine whether an object is ready. Equivalent to
				547	PyUnicode_IS_COMPACT(op) \|\| ((PyUnicodeObject)(op))->data.any) /
				548
				549	#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
				550
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	551	/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	552	case. If the canonical representation is not yet set, it will still call
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	553	_PyUnicode_Ready().
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	554	Returns 0 on success and -1 on errors. */
				555	#define PyUnicode_READY(op) \
				556	(assert(PyUnicode_Check(op)), \
				557	(PyUnicode_IS_READY(op) ? \
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	558	0 : _PyUnicode_Ready((PyObject *)(op))))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	559
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	560	/* Return a maximum character value which is suitable for creating another
				561	string based on op. This is always an approximation but more efficient
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	562	than iterating over the string. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	563	#define PyUnicode_MAX_CHAR_VALUE(op) \
				564	(assert(PyUnicode_IS_READY(op)), \
				565	(PyUnicode_IS_COMPACT_ASCII(op) ? 0x7f: \
				566	(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
				567	(PyUnicode_DATA(op) == (((PyCompactUnicodeObject *)(op))->utf8) ? \
				568	(0x7fU) : (0xffU) \
				569	) : \
				570	(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
				571	(0xffffU) : (0x10ffffU) \
				572	))))
				573
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	574	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	575
				576	/* --- Constants ---------------------------------------------------------- */
				577
				578	/* This Unicode character will be used as replacement character during
				579	decoding if the errors argument is set to "replace". Note: the
				580	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
				581	Unicode 3.0. */
				582
Victor Stinner	5ce1b0d	2011-09-28 20:29:27 +0200	[diff] [blame]	583	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	584
				585	/* === Public API ========================================================= */
				586
				587	/* --- Plain Py_UNICODE --------------------------------------------------- */
				588
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	589	/* With PEP 393, this is the recommended way to allocate a new unicode object.
				590	This function will allocate the object and its buffer in a single memory
				591	block. Objects created using this function are not resizable. */
				592	#ifndef Py_LIMITED_API
				593	PyAPI_FUNC(PyObject*) PyUnicode_New(
				594	Py_ssize_t size, /* Number of code points in the new string */
				595	Py_UCS4 maxchar /* maximum code point value in the string */
				596	);
				597	#endif
				598
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	599	/* Initializes the canonical string representation from a the deprecated
				600	wstr/Py_UNICODE representation. This function is used to convert Unicode
				601	objects which were created using the old API to the new flexible format
				602	introduced with PEP 393.
				603
				604	Don't call this function directly, use the public PyUnicode_READY() macro
				605	instead. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	606	#ifndef Py_LIMITED_API
				607	PyAPI_FUNC(int) _PyUnicode_Ready(
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	608	PyObject unicode / Unicode object */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	609	);
				610	#endif
				611
Victor Stinner	034f6cf	2011-09-30 02:26:44 +0200	[diff] [blame]	612	/* Get a copy of a Unicode string. */
				613	PyAPI_FUNC(PyObject*) PyUnicode_Copy(
				614	PyObject *unicode
				615	);
				616
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	617	/* Copy character from one unicode object into another, this function performs
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	618	character conversion when necessary and falls back to memcpy if possible.
				619
Victor Stinner	a0702ab	2011-09-29 14:14:38 +0200	[diff] [blame]	620	Fail if to is too small (smaller than how_many or smaller than
				621	len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
				622	kind(to), or if to has more than 1 reference.
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	623
				624	Return the number of written character, or return -1 and raise an exception
				625	on error.
				626
				627	Pseudo-code:
				628
				629	how_many = min(how_many, len(from) - from_start)
				630	to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
				631	return how_many
Victor Stinner	a0702ab	2011-09-29 14:14:38 +0200	[diff] [blame]	632
				633	Note: The function doesn't write a terminating null character.
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	634	*/
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	635	#ifndef Py_LIMITED_API
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	636	PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	637	PyObject *to,
				638	Py_ssize_t to_start,
				639	PyObject *from,
				640	Py_ssize_t from_start,
				641	Py_ssize_t how_many
				642	);
				643	#endif
				644
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	645	/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	646	size.
Marc-André Lemburg	8155e0e	2001-04-23 14:44:21 +0000	[diff] [blame]	647
				648	u may be NULL which causes the contents to be undefined. It is the
				649	user's responsibility to fill in the needed data afterwards. Note
				650	that modifying the Unicode object contents after construction is
				651	only allowed if u was set to NULL.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	652
				653	The buffer is copied into the new object. */
				654
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	655	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	656	PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	657	const Py_UNICODE u, / Unicode buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	658	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	659	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	660	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	661
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	662	/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	663	PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
Victor Stinner	0d71116	2010-12-27 02:39:20 +0000	[diff] [blame]	664	const char u, / UTF-8 encoded string */
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	665	Py_ssize_t size /* size of buffer */
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	666	);
				667
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	668	/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	669	UTF-8 encoded bytes. The size is determined with strlen(). */
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	670	PyAPI_FUNC(PyObject*) PyUnicode_FromString(
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	671	const char u / UTF-8 encoded string */
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	672	);
				673
Victor Stinner	b9275c1	2011-10-05 14:01:42 +0200	[diff] [blame]	674	/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
				675	Scan the string to find the maximum character. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	676	#ifndef Py_LIMITED_API
				677	PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
				678	int kind,
				679	const void *buffer,
				680	Py_ssize_t size);
				681	#endif
				682
				683	PyAPI_FUNC(PyObject*) PyUnicode_Substring(
				684	PyObject *str,
				685	Py_ssize_t start,
				686	Py_ssize_t end);
				687
				688	/* Copy the string into a UCS4 buffer including the null character is copy_null
				689	is set. Return NULL and raise an exception on error. Raise a ValueError if
				690	the buffer is smaller than the string. Return buffer on success.
				691
				692	buflen is the length of the buffer in (Py_UCS4) characters. */
				693	PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
				694	PyObject *unicode,
				695	Py_UCS4* buffer,
				696	Py_ssize_t buflen,
				697	int copy_null);
				698
				699	/* Copy the string into a UCS4 buffer. A new buffer is allocated using
				700	* PyMem_Malloc; if this fails, NULL is returned with a memory error
				701	exception set. */
				702	PyAPI_FUNC(Py_UCS4) PyUnicode_AsUCS4Copy(PyObject unicode);
				703
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	704	/* Return a read-only pointer to the Unicode object's internal
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	705	Py_UNICODE buffer.
				706	If the wchar_t/Py_UNICODE representation is not yet available, this
				707	function will calculate it. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	708
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	709	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	710	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	711	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	712	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	713	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	714
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	715	/* Return a read-only pointer to the Unicode object's internal
				716	Py_UNICODE buffer and save the length at size.
				717	If the wchar_t/Py_UNICODE representation is not yet available, this
				718	function will calculate it. */
				719
				720	#ifndef Py_LIMITED_API
				721	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
				722	PyObject unicode, / Unicode object */
				723	Py_ssize_t size / location where to save the length */
				724	);
				725	#endif
				726
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	727	/* Get the length of the Unicode object. */
				728
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	729	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
				730	PyObject *unicode
				731	);
				732
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	733	/* Get the number of Py_UNICODE units in the
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	734	string representation. */
				735
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	736	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	737	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	738	);
				739
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	740	/* Read a character from the string. */
				741
				742	PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
				743	PyObject *unicode,
				744	Py_ssize_t index
				745	);
				746
				747	/* Write a character to the string. The string must have been created through
Victor Stinner	cd9950f	2011-10-02 00:34:53 +0200	[diff] [blame]	748	PyUnicode_New, must not be shared, and must not have been hashed yet.
				749
				750	Return 0 on success, -1 on error. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	751
				752	PyAPI_FUNC(int) PyUnicode_WriteChar(
				753	PyObject *unicode,
				754	Py_ssize_t index,
				755	Py_UCS4 character
				756	);
				757
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	758	#ifndef Py_LIMITED_API
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	759	/* Get the maximum ordinal for a Unicode character. */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	760	PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	761	#endif
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	762
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	763	/* Resize an Unicode object allocated by the legacy API (e.g.
				764	PyUnicode_FromUnicode). Unicode objects allocated by the new API (e.g.
				765	PyUnicode_New) cannot be resized by this function.
				766
				767	The length is a number of Py_UNICODE characters (and not the number of code
				768	points).
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	769
				770	*unicode is modified to point to the new (resized) object and 0
				771	returned on success.
				772
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	773	If the refcount on the object is 1, the function resizes the string in
				774	place, which is usually faster than allocating a new string (and copy
				775	characters).
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	776
				777	Error handling is implemented as follows: an exception is set, -1
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	778	is returned and unicode left untouched. /
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	779
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	780	PyAPI_FUNC(int) PyUnicode_Resize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	781	PyObject *unicode, / Pointer to the Unicode object */
				782	Py_ssize_t length /* New length */
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	783	);
				784
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	785	/* Coerce obj to an Unicode object and return a reference with
				786	incremented refcount.
				787
				788	Coercion is done in the following way:
				789
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	790	1. bytes, bytearray and other char buffer compatible objects are decoded
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	791	under the assumptions that they contain data using the UTF-8
				792	encoding. Decoding is done in "strict" mode.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	793
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	794	2. All other objects (including Unicode objects) raise an
				795	exception.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	796
				797	The API returns NULL in case of an error. The caller is responsible
				798	for decref'ing the returned objects.
				799
				800	*/
				801
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	802	PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	803	register PyObject obj, / Object */
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	804	const char encoding, / encoding */
				805	const char errors / error handling */
				806	);
				807
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	808	/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	809	incremented refcount.
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	810
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	811	Unicode objects are passed back as-is (subclasses are converted to
				812	true Unicode objects), all other objects are delegated to
				813	PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	814	using UTF-8 encoding as basis for decoding the object.
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	815
				816	The API returns NULL in case of an error. The caller is responsible
				817	for decref'ing the returned objects.
				818
				819	*/
				820
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	821	PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	822	register PyObject obj / Object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	823	);
				824
Victor Stinner	1205f27	2010-09-11 00:54:47 +0000	[diff] [blame]	825	PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
				826	const char format, / ASCII-encoded string */
				827	va_list vargs
				828	);
				829	PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
				830	const char format, / ASCII-encoded string */
				831	...
				832	);
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	833
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	834	#ifndef Py_LIMITED_API
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	835	/* Format the object based on the format_spec, as defined in PEP 3101
				836	(Advanced String Formatting). */
				837	PyAPI_FUNC(PyObject ) _PyUnicode_FormatAdvanced(PyObject obj,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	838	PyObject *format_spec,
				839	Py_ssize_t start,
				840	Py_ssize_t end);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	841	#endif
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	842
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	843	PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
				844	PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	845	PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
				846	const char u / UTF-8 encoded string */
				847	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	848	#ifndef Py_LIMITED_API
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	849	PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	850	#endif
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	851
				852	/* Use only if you know it's a string */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	853	#define PyUnicode_CHECK_INTERNED(op) \
				854	(((PyASCIIObject *)(op))->state.interned)
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	855
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	856	/* --- wchar_t support for platforms which support it --------------------- */
				857
				858	#ifdef HAVE_WCHAR_H
				859
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	860	/* Create a Unicode Object from the wchar_t buffer w of the given
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	861	size.
				862
				863	The buffer is copied into the new object. */
				864
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	865	PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	866	register const wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	867	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	868	);
				869
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	870	/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	871	most size wchar_t characters are copied.
				872
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	873	Note that the resulting wchar_t string may or may not be
				874	0-terminated. It is the responsibility of the caller to make sure
				875	that the wchar_t string is 0-terminated in case this is required by
				876	the application.
				877
				878	Returns the number of wchar_t characters copied (excluding a
				879	possibly trailing 0-termination character) or -1 in case of an
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	880	error. */
				881
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	882	PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	883	PyObject unicode, / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	884	register wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	885	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	886	);
				887
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	888	/* Convert the Unicode object to a wide character string. The output string
				889	always ends with a nul character. If size is not NULL, write the number of
Victor Stinner	d88d983	2011-09-06 02:00:05 +0200	[diff] [blame]	890	wide characters (excluding the null character) into *size.
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	891
				892	Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
				893	on success. On error, returns NULL, *size is undefined and raises a
				894	MemoryError. */
				895
				896	PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Victor Stinner	beb4135b	2010-10-07 01:02:42 +0000	[diff] [blame]	897	PyObject unicode, / Unicode object */
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	898	Py_ssize_t size / number of characters of the result */
				899	);
				900
Victor Stinner	9f789e7	2011-10-01 03:57:28 +0200	[diff] [blame]	901	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	902	PyAPI_FUNC(void) _PyUnicode_AsKind(PyObject s, unsigned int kind);
Victor Stinner	9f789e7	2011-10-01 03:57:28 +0200	[diff] [blame]	903	#endif
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	904
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	905	#endif
				906
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	907	/* --- Unicode ordinals --------------------------------------------------- */
				908
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	909	/* Create a Unicode Object from the given Unicode code point ordinal.
				910
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	911	The ordinal must be in range(0x10000) on narrow Python builds
				912	(UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
				913	raised in case it is not.
				914
				915	*/
				916
Marc-André Lemburg	9c329de	2002-08-12 08:19:10 +0000	[diff] [blame]	917	PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	918
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	919	/* --- Free-list management ----------------------------------------------- */
				920
				921	/* Clear the free list used by the Unicode implementation.
				922
				923	This can be used to release memory used for objects on the free
				924	list back to the Python memory allocator.
				925
				926	*/
				927
				928	PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
				929
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	930	/* === Builtin Codecs =====================================================
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	931
				932	Many of these APIs take two arguments encoding and errors. These
				933	parameters encoding and errors have the same semantics as the ones
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	934	of the builtin str() API.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	935
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	936	Setting encoding to NULL causes the default encoding (UTF-8) to be used.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	937
				938	Error handling is set by errors which may also be set to NULL
				939	meaning to use the default handling defined for the codec. Default
				940	error handling for all builtin codecs is "strict" (ValueErrors are
				941	raised).
				942
				943	The codecs all use a similar interface. Only deviation from the
				944	generic ones are documented.
				945
				946	*/
				947
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	948	/* --- Manage the default encoding ---------------------------------------- */
				949
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	950	/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	951	Unicode object unicode and the size of the encoded representation
				952	in bytes stored in *size.
Christian Heimes	5894ba7	2007-11-04 11:43:14 +0000	[diff] [blame]	953
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	954	In case of an error, no *size is set.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	955
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	956	This function caches the UTF-8 encoded string in the unicodeobject
				957	and subsequent calls will return the same string. The memory is released
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	958	when the unicodeobject is deallocated.
				959
				960	_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
				961	support the previous internal function with the same behaviour.
				962
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	963	*** This API is for interpreter INTERNAL USE ONLY and will likely
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	964	*** be removed or changed in the future.
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	965
				966	*** If you need to access the Unicode object as UTF-8 bytes string,
				967	*** please use PyUnicode_AsUTF8String() instead.
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	968	*/
				969
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	970	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	971	PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	972	PyObject *unicode,
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	973	Py_ssize_t *size);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	974	#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	975	#endif
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	976
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	977	/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	978	Unicode object unicode.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	979
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	980	Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
				981	in the unicodeobject.
				982
				983	_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
				984	support the previous internal function with the same behaviour.
				985
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	986	Use of this API is DEPRECATED since no size information can be
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	987	extracted from the returned data.
				988
				989	*** This API is for interpreter INTERNAL USE ONLY and will likely
				990	*** be removed or changed for Python 3.1.
				991
				992	*** If you need to access the Unicode object as UTF-8 bytes string,
				993	*** please use PyUnicode_AsUTF8String() instead.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	994
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	995	*/
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	996
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	997	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	998	PyAPI_FUNC(char ) PyUnicode_AsUTF8(PyObject unicode);
				999	#define _PyUnicode_AsString PyUnicode_AsUTF8
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1000	#endif
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1001
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	1002	/* Returns "utf-8". */
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	1003
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1004	PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	1005
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1006	/* --- Generic Codecs ----------------------------------------------------- */
				1007
				1008	/* Create a Unicode object by decoding the encoded string s of the
				1009	given size. */
				1010
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1011	PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1012	const char s, / encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1013	Py_ssize_t size, /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1014	const char encoding, / encoding */
				1015	const char errors / error handling */
				1016	);
				1017
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1018	/* Decode a Unicode object unicode and return the result as Python
				1019	object. */
				1020
				1021	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1022	PyObject unicode, / Unicode object */
				1023	const char encoding, / encoding */
				1024	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1025	);
				1026
				1027	/* Decode a Unicode object unicode and return the result as Unicode
				1028	object. */
				1029
				1030	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1031	PyObject unicode, / Unicode object */
				1032	const char encoding, / encoding */
				1033	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1034	);
				1035
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1036	/* Encodes a Py_UNICODE buffer of the given size and returns a
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1037	Python string object. */
				1038
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1039	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1040	PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1041	const Py_UNICODE s, / Unicode char buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1042	Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1043	const char encoding, / encoding */
				1044	const char errors / error handling */
				1045	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1046	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1047
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	1048	/* Encodes a Unicode object and returns the result as Python
				1049	object. */
				1050
				1051	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1052	PyObject unicode, / Unicode object */
				1053	const char encoding, / encoding */
				1054	const char errors / error handling */
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	1055	);
				1056
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1057	/* Encodes a Unicode object and returns the result as Python string
				1058	object. */
				1059
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1060	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1061	PyObject unicode, / Unicode object */
				1062	const char encoding, / encoding */
				1063	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1064	);
				1065
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1066	/* Encodes a Unicode object and returns the result as Unicode
				1067	object. */
				1068
				1069	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1070	PyObject unicode, / Unicode object */
				1071	const char encoding, / encoding */
				1072	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1073	);
				1074
				1075	/* Build an encoding map. */
				1076
Thomas Wouters	73e5a5b	2006-06-08 15:35:45 +0000	[diff] [blame]	1077	PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
				1078	PyObject* string /* 256 character map */
				1079	);
				1080
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1081	/* --- UTF-7 Codecs ------------------------------------------------------- */
				1082
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1083	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1084	const char string, / UTF-7 encoded string */
				1085	Py_ssize_t length, /* size of string */
				1086	const char errors / error handling */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1087	);
				1088
Christian Heimes	5d14c2b	2007-11-20 23:38:09 +0000	[diff] [blame]	1089	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1090	const char string, / UTF-7 encoded string */
				1091	Py_ssize_t length, /* size of string */
				1092	const char errors, / error handling */
				1093	Py_ssize_t consumed / bytes consumed */
Christian Heimes	5d14c2b	2007-11-20 23:38:09 +0000	[diff] [blame]	1094	);
				1095
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1096	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1097	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1098	const Py_UNICODE data, / Unicode char buffer */
				1099	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1100	int base64SetO, /* Encode RFC2152 Set O characters in base64 */
				1101	int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
				1102	const char errors / error handling */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1103	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1104	#endif
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1105
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1106	/* --- UTF-8 Codecs ------------------------------------------------------- */
				1107
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1108	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1109	const char string, / UTF-8 encoded string */
				1110	Py_ssize_t length, /* size of string */
				1111	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1112	);
				1113
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1114	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1115	const char string, / UTF-8 encoded string */
				1116	Py_ssize_t length, /* size of string */
				1117	const char errors, / error handling */
				1118	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1119	);
				1120
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1121	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1122	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1123	);
				1124
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1125	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1126	PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
				1127	PyObject *unicode,
				1128	const char *errors);
				1129
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1130	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1131	const Py_UNICODE data, / Unicode char buffer */
				1132	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1133	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1134	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1135	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1136
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1137	/* --- UTF-32 Codecs ------------------------------------------------------ */
				1138
				1139	/* Decodes length bytes from a UTF-32 encoded buffer string and returns
				1140	the corresponding Unicode object.
				1141
				1142	errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1143	to "strict".
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1144
				1145	If byteorder is non-NULL, the decoder starts decoding using the
				1146	given byte order:
				1147
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1148	*byteorder == -1: little endian
				1149	*byteorder == 0: native order
				1150	*byteorder == 1: big endian
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1151
				1152	In native mode, the first four bytes of the stream are checked for a
				1153	BOM mark. If found, the BOM mark is analysed, the byte order
				1154	adjusted and the BOM skipped. In the other modes, no BOM mark
				1155	interpretation is done. After completion, *byteorder is set to the
				1156	current byte order at the end of input data.
				1157
				1158	If byteorder is NULL, the codec starts in native order mode.
				1159
				1160	*/
				1161
				1162	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1163	const char string, / UTF-32 encoded string */
				1164	Py_ssize_t length, /* size of string */
				1165	const char errors, / error handling */
				1166	int byteorder / pointer to byteorder to use
				1167	0=native;-1=LE,1=BE; updated on
				1168	exit */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1169	);
				1170
				1171	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1172	const char string, / UTF-32 encoded string */
				1173	Py_ssize_t length, /* size of string */
				1174	const char errors, / error handling */
				1175	int byteorder, / pointer to byteorder to use
				1176	0=native;-1=LE,1=BE; updated on
				1177	exit */
				1178	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1179	);
				1180
				1181	/* Returns a Python string using the UTF-32 encoding in native byte
				1182	order. The string always starts with a BOM mark. */
				1183
				1184	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1185	PyObject unicode / Unicode object */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1186	);
				1187
				1188	/* Returns a Python string object holding the UTF-32 encoded value of
				1189	the Unicode data.
				1190
				1191	If byteorder is not 0, output is written according to the following
				1192	byte order:
				1193
				1194	byteorder == -1: little endian
				1195	byteorder == 0: native byte order (writes a BOM mark)
				1196	byteorder == 1: big endian
				1197
				1198	If byteorder is 0, the output string will always start with the
				1199	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				1200	prepended.
				1201
				1202	*/
				1203
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1204	#ifndef Py_LIMITED_API
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1205	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1206	const Py_UNICODE data, / Unicode char buffer */
				1207	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1208	const char errors, / error handling */
				1209	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1210	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1211	#endif
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1212
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1213	/* --- UTF-16 Codecs ------------------------------------------------------ */
				1214
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1215	/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1216	the corresponding Unicode object.
				1217
				1218	errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1219	to "strict".
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1220
				1221	If byteorder is non-NULL, the decoder starts decoding using the
				1222	given byte order:
				1223
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1224	*byteorder == -1: little endian
				1225	*byteorder == 0: native order
				1226	*byteorder == 1: big endian
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1227
Marc-André Lemburg	489b56e	2001-05-21 20:30:15 +0000	[diff] [blame]	1228	In native mode, the first two bytes of the stream are checked for a
				1229	BOM mark. If found, the BOM mark is analysed, the byte order
				1230	adjusted and the BOM skipped. In the other modes, no BOM mark
				1231	interpretation is done. After completion, *byteorder is set to the
				1232	current byte order at the end of input data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1233
				1234	If byteorder is NULL, the codec starts in native order mode.
				1235
				1236	*/
				1237
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1238	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1239	const char string, / UTF-16 encoded string */
				1240	Py_ssize_t length, /* size of string */
				1241	const char errors, / error handling */
				1242	int byteorder / pointer to byteorder to use
				1243	0=native;-1=LE,1=BE; updated on
				1244	exit */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1245	);
				1246
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1247	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1248	const char string, / UTF-16 encoded string */
				1249	Py_ssize_t length, /* size of string */
				1250	const char errors, / error handling */
				1251	int byteorder, / pointer to byteorder to use
				1252	0=native;-1=LE,1=BE; updated on
				1253	exit */
				1254	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1255	);
				1256
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1257	/* Returns a Python string using the UTF-16 encoding in native byte
				1258	order. The string always starts with a BOM mark. */
				1259
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1260	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1261	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1262	);
				1263
				1264	/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1265	the Unicode data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1266
				1267	If byteorder is not 0, output is written according to the following
				1268	byte order:
				1269
				1270	byteorder == -1: little endian
				1271	byteorder == 0: native byte order (writes a BOM mark)
				1272	byteorder == 1: big endian
				1273
				1274	If byteorder is 0, the output string will always start with the
				1275	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				1276	prepended.
				1277
				1278	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
				1279	UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters	7e47402	2000-07-16 12:04:32 +0000	[diff] [blame]	1280	at a later point without compromising the APIs.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1281
				1282	*/
				1283
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1284	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1285	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1286	const Py_UNICODE data, / Unicode char buffer */
				1287	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1288	const char errors, / error handling */
				1289	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1290	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1291	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1292
				1293	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				1294
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1295	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1296	const char string, / Unicode-Escape encoded string */
				1297	Py_ssize_t length, /* size of string */
				1298	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1299	);
				1300
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1301	PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1302	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1303	);
				1304
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1305	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1306	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1307	const Py_UNICODE data, / Unicode char buffer */
				1308	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1309	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1310	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1311
				1312	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				1313
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1314	PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1315	const char string, / Raw-Unicode-Escape encoded string */
				1316	Py_ssize_t length, /* size of string */
				1317	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1318	);
				1319
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1320	PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1321	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1322	);
				1323
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1324	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1325	PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1326	const Py_UNICODE data, / Unicode char buffer */
				1327	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1328	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1329	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1330
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1331	/* --- Unicode Internal Codec ---------------------------------------------
				1332
				1333	Only for internal use in _codecsmodule.c */
				1334
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1335	#ifndef Py_LIMITED_API
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1336	PyObject *_PyUnicode_DecodeUnicodeInternal(
				1337	const char *string,
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1338	Py_ssize_t length,
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1339	const char *errors
				1340	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1341	#endif
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1342
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1343	/* --- Latin-1 Codecs -----------------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1344
				1345	Note: Latin-1 corresponds to the first 256 Unicode ordinals.
				1346
				1347	*/
				1348
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1349	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1350	const char string, / Latin-1 encoded string */
				1351	Py_ssize_t length, /* size of string */
				1352	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1353	);
				1354
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1355	PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1356	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1357	);
				1358
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1359	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1360	PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
				1361	PyObject* unicode,
				1362	const char* errors);
				1363
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1364	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1365	const Py_UNICODE data, / Unicode char buffer */
				1366	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1367	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1368	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1369	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1370
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1371	/* --- ASCII Codecs -------------------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1372
				1373	Only 7-bit ASCII data is excepted. All other codes generate errors.
				1374
				1375	*/
				1376
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1377	PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1378	const char string, / ASCII encoded string */
				1379	Py_ssize_t length, /* size of string */
				1380	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1381	);
				1382
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1383	PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1384	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1385	);
				1386
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1387	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1388	PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
				1389	PyObject* unicode,
				1390	const char* errors);
				1391
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1392	PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1393	const Py_UNICODE data, / Unicode char buffer */
				1394	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1395	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1396	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1397	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1398
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1399	/* --- Character Map Codecs -----------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1400
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1401	This codec uses mappings to encode and decode characters.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1402
				1403	Decoding mappings must map single string characters to single
				1404	Unicode characters, integers (which are then interpreted as Unicode
				1405	ordinals) or None (meaning "undefined mapping" and causing an
				1406	error).
				1407
				1408	Encoding mappings must map single Unicode characters to single
				1409	string characters, integers (which are then interpreted as Latin-1
				1410	ordinals) or None (meaning "undefined mapping" and causing an
				1411	error).
				1412
				1413	If a character lookup fails with a LookupError, the character is
				1414	copied as-is meaning that its ordinal value will be interpreted as
				1415	Unicode or Latin-1 ordinal resp. Because of this mappings only need
				1416	to contain those mappings which map characters to different code
				1417	points.
				1418
				1419	*/
				1420
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1421	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1422	const char string, / Encoded string */
				1423	Py_ssize_t length, /* size of string */
				1424	PyObject mapping, / character mapping
				1425	(char ordinal -> unicode ordinal) */
				1426	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1427	);
				1428
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1429	PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1430	PyObject unicode, / Unicode object */
				1431	PyObject mapping / character mapping
				1432	(unicode ordinal -> char ordinal) */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1433	);
				1434
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1435	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1436	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1437	const Py_UNICODE data, / Unicode char buffer */
				1438	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1439	PyObject mapping, / character mapping
				1440	(unicode ordinal -> char ordinal) */
				1441	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1442	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1443	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1444
				1445	/* Translate a Py_UNICODE buffer of the given length by applying a
				1446	character mapping table to it and return the resulting Unicode
				1447	object.
				1448
				1449	The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1450	ordinal integers or None (causing deletion of the character).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1451
				1452	Mapping tables may be dictionaries or sequences. Unmapped character
				1453	ordinals (ones which cause a LookupError) are left untouched and
				1454	are copied as-is.
				1455
				1456	*/
				1457
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1458	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1459	PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1460	const Py_UNICODE data, / Unicode char buffer */
				1461	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1462	PyObject table, / Translate table */
				1463	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1464	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1465	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1466
Victor Stinner	99b9538	2011-07-04 14:23:54 +0200	[diff] [blame]	1467	#ifdef HAVE_MBCS
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1468
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1469	/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1470
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1471	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1472	const char string, / MBCS encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1473	Py_ssize_t length, /* size of string */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1474	const char errors / error handling */
				1475	);
				1476
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1477	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
				1478	const char string, / MBCS encoded string */
				1479	Py_ssize_t length, /* size of string */
				1480	const char errors, / error handling */
				1481	Py_ssize_t consumed / bytes consumed */
				1482	);
				1483
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1484	PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1485	PyObject unicode / Unicode object */
				1486	);
				1487
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1488	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1489	PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1490	const Py_UNICODE data, / Unicode char buffer */
Neal Norwitz	d78f6cf	2007-08-08 04:49:37 +0000	[diff] [blame]	1491	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1492	const char errors / error handling */
				1493	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1494	#endif
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1495
Victor Stinner	99b9538	2011-07-04 14:23:54 +0200	[diff] [blame]	1496	#endif /* HAVE_MBCS */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1497
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1498	/* --- Decimal Encoder ---------------------------------------------------- */
				1499
				1500	/* Takes a Unicode string holding a decimal value and writes it into
				1501	an output buffer using standard ASCII digit codes.
				1502
				1503	The output buffer has to provide at least length+1 bytes of storage
				1504	area. The output string is 0-terminated.
				1505
				1506	The encoder converts whitespace to ' ', decimal characters to their
				1507	corresponding ASCII digit and all other Latin-1 characters except
				1508	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
				1509	are treated as errors. This includes embedded NULL bytes.
				1510
				1511	Error handling is defined by the errors argument:
				1512
				1513	NULL or "strict": raise a ValueError
				1514	"ignore": ignore the wrong characters (these are not copied to the
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1515	output buffer)
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1516	"replace": replaces illegal characters with '?'
				1517
				1518	Returns 0 on success, -1 on failure.
				1519
				1520	*/
				1521
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1522	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1523	PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1524	Py_UNICODE s, / Unicode buffer */
				1525	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1526	char output, / Output buffer; must have size >= length */
				1527	const char errors / error handling */
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1528	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1529	#endif
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1530
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1531	/* Transforms code points that have decimal digit property to the
				1532	corresponding ASCII digit code points.
				1533
				1534	Returns a new Unicode string on success, NULL on failure.
				1535	*/
				1536
Georg Brandl	b550308	2010-12-05 11:40:48 +0000	[diff] [blame]	1537	#ifndef Py_LIMITED_API
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1538	PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
				1539	Py_UNICODE s, / Unicode buffer */
				1540	Py_ssize_t length /* Number of Py_UNICODE chars to transform */
				1541	);
Georg Brandl	b550308	2010-12-05 11:40:48 +0000	[diff] [blame]	1542	#endif
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1543
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1544	/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyUnicodeObject
				1545	as argument instead of a raw buffer and length. This function additionally
				1546	transforms spaces to ASCII because this is what the callers in longobject,
				1547	floatobject, and complexobject did anyways. */
				1548
				1549	#ifndef Py_LIMITED_API
				1550	PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
				1551	PyObject unicode / Unicode object */
				1552	);
				1553	#endif
				1554
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1555	/* --- File system encoding ---------------------------------------------- */
				1556
Victor Stinner	47fcb5b	2010-08-13 23:59:58 +0000	[diff] [blame]	1557	/* ParseTuple converter: encode str objects to bytes using
				1558	PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1559
				1560	PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject, void);
				1561
Victor Stinner	47fcb5b	2010-08-13 23:59:58 +0000	[diff] [blame]	1562	/* ParseTuple converter: decode bytes objects to unicode using
				1563	PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
				1564
				1565	PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject, void);
				1566
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1567	/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
				1568	and the "surrogateescape" error handler.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1569
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1570	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1571	encoding.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1572
Benjamin Peterson	ccbd694	2010-05-15 17:43:18 +0000	[diff] [blame]	1573	Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1574	*/
				1575
				1576	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
				1577	const char s / encoded string */
				1578	);
				1579
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1580	/* Decode a string using Py_FileSystemDefaultEncoding
				1581	and the "surrogateescape" error handler.
				1582
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1583	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1584	encoding.
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1585	*/
				1586
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1587	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
				1588	const char s, / encoded string */
				1589	Py_ssize_t size /* size */
				1590	);
				1591
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1592	/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
Benjamin Peterson	ccbd694	2010-05-15 17:43:18 +0000	[diff] [blame]	1593	"surrogateescape" error handler, and return bytes.
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1594
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1595	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1596	encoding.
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1597	*/
				1598
				1599	PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
				1600	PyObject *unicode
				1601	);
				1602
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1603	/* --- Methods & Slots ----------------------------------------------------
				1604
				1605	These are capable of handling Unicode objects and strings on input
				1606	(we refer to them as strings in the descriptions) and return
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	1607	Unicode objects or integers as appropriate. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1608
				1609	/* Concat two strings giving a new Unicode string. */
				1610
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1611	PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1612	PyObject left, / Left string */
				1613	PyObject right / Right string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1614	);
				1615
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1616	/* Concat two strings and put the result in *pleft
				1617	(sets pleft to NULL on error) /
				1618
				1619	PyAPI_FUNC(void) PyUnicode_Append(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1620	PyObject *pleft, / Pointer to left string */
				1621	PyObject right / Right string */
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1622	);
				1623
				1624	/* Concat two strings, put the result in *pleft and drop the right object
				1625	(sets pleft to NULL on error) /
				1626
				1627	PyAPI_FUNC(void) PyUnicode_AppendAndDel(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1628	PyObject *pleft, / Pointer to left string */
				1629	PyObject right / Right string */
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1630	);
				1631
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1632	/* Split a string giving a list of Unicode strings.
				1633
				1634	If sep is NULL, splitting will be done at all whitespace
				1635	substrings. Otherwise, splits occur at the given separator.
				1636
				1637	At most maxsplit splits will be done. If negative, no limit is set.
				1638
				1639	Separators are not included in the resulting list.
				1640
				1641	*/
				1642
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1643	PyAPI_FUNC(PyObject*) PyUnicode_Split(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1644	PyObject s, / String to split */
				1645	PyObject sep, / String separator */
				1646	Py_ssize_t maxsplit /* Maxsplit count */
				1647	);
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1648
				1649	/* Dito, but split at line breaks.
				1650
				1651	CRLF is considered to be one line break. Line breaks are not
				1652	included in the resulting list. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1653
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1654	PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1655	PyObject s, / String to split */
				1656	int keepends /* If true, line end markers are included */
				1657	);
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1658
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1659	/* Partition a string using a given separator. */
				1660
				1661	PyAPI_FUNC(PyObject*) PyUnicode_Partition(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1662	PyObject s, / String to partition */
				1663	PyObject sep / String separator */
				1664	);
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1665
				1666	/* Partition a string using a given separator, searching from the end of the
				1667	string. */
				1668
				1669	PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1670	PyObject s, / String to partition */
				1671	PyObject sep / String separator */
				1672	);
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1673
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1674	/* Split a string giving a list of Unicode strings.
				1675
				1676	If sep is NULL, splitting will be done at all whitespace
				1677	substrings. Otherwise, splits occur at the given separator.
				1678
				1679	At most maxsplit splits will be done. But unlike PyUnicode_Split
				1680	PyUnicode_RSplit splits from the end of the string. If negative,
				1681	no limit is set.
				1682
				1683	Separators are not included in the resulting list.
				1684
				1685	*/
				1686
				1687	PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1688	PyObject s, / String to split */
				1689	PyObject sep, / String separator */
				1690	Py_ssize_t maxsplit /* Maxsplit count */
				1691	);
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1692
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1693	/* Translate a string by applying a character mapping table to it and
				1694	return the resulting Unicode object.
				1695
				1696	The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1697	ordinal integers or None (causing deletion of the character).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1698
				1699	Mapping tables may be dictionaries or sequences. Unmapped character
				1700	ordinals (ones which cause a LookupError) are left untouched and
				1701	are copied as-is.
				1702
				1703	*/
				1704
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1705	PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1706	PyObject str, / String */
				1707	PyObject table, / Translate table */
				1708	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1709	);
				1710
				1711	/* Join a sequence of strings using the given separator and return
				1712	the resulting Unicode string. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1713
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1714	PyAPI_FUNC(PyObject*) PyUnicode_Join(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1715	PyObject separator, / Separator string */
				1716	PyObject seq / Sequence object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1717	);
				1718
				1719	/* Return 1 if substr matches str[start:end] at the given tail end, 0
				1720	otherwise. */
				1721
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1722	PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1723	PyObject str, / String */
				1724	PyObject substr, / Prefix or Suffix string */
				1725	Py_ssize_t start, /* Start index */
				1726	Py_ssize_t end, /* Stop index */
				1727	int direction /* Tail end: -1 prefix, +1 suffix */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1728	);
				1729
				1730	/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg	4da6fd6	2002-05-29 11:33:13 +0000	[diff] [blame]	1731	given search direction or -1 if not found. -2 is returned in case
				1732	an error occurred and an exception is set. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1733
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1734	PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1735	PyObject str, / String */
				1736	PyObject substr, / Substring to find */
				1737	Py_ssize_t start, /* Start index */
				1738	Py_ssize_t end, /* Stop index */
				1739	int direction /* Find direction: +1 forward, -1 backward */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1740	);
				1741
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1742	/* Like PyUnicode_Find, but search for single character only. */
				1743	PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
				1744	PyObject *str,
				1745	Py_UCS4 ch,
				1746	Py_ssize_t start,
				1747	Py_ssize_t end,
				1748	int direction
				1749	);
				1750
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1751	/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1752
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1753	PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1754	PyObject str, / String */
				1755	PyObject substr, / Substring to count */
				1756	Py_ssize_t start, /* Start index */
				1757	Py_ssize_t end /* Stop index */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1758	);
				1759
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1760	/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1761	and return the resulting Unicode object. */
				1762
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1763	PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1764	PyObject str, / String */
				1765	PyObject substr, / Substring to find */
				1766	PyObject replstr, / Substring to replace */
				1767	Py_ssize_t maxcount /* Max. number of replacements to apply;
				1768	-1 = all */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1769	);
				1770
				1771	/* Compare two strings and return -1, 0, 1 for less than, equal,
				1772	greater than resp. */
				1773
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1774	PyAPI_FUNC(int) PyUnicode_Compare(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1775	PyObject left, / Left string */
				1776	PyObject right / Right string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1777	);
				1778
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1779	PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
				1780	PyObject *left,
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	1781	const char right / ASCII-encoded string */
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1782	);
				1783
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	1784	/* Rich compare two strings and return one of the following:
				1785
				1786	- NULL in case an exception was raised
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	1787	- Py_True or Py_False for successfully comparisons
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	1788	- Py_NotImplemented in case the type combination is unknown
				1789
				1790	Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
				1791	case the conversion of the arguments to Unicode fails with a
				1792	UnicodeDecodeError.
				1793
				1794	Possible values for op:
				1795
				1796	Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
				1797
				1798	*/
				1799
				1800	PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1801	PyObject left, / Left string */
				1802	PyObject right, / Right string */
				1803	int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	1804	);
				1805
Thomas Wouters	7e47402	2000-07-16 12:04:32 +0000	[diff] [blame]	1806	/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1807	the resulting Unicode string. */
				1808
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1809	PyAPI_FUNC(PyObject *) PyUnicode_Format(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1810	PyObject format, / Format string */
				1811	PyObject args / Argument tuple or dictionary */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1812	);
				1813
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	1814	/* Checks whether element is contained in container and return 1/0
				1815	accordingly.
				1816
				1817	element has to coerce to an one element Unicode string. -1 is
				1818	returned in case of an error. */
				1819
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1820	PyAPI_FUNC(int) PyUnicode_Contains(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1821	PyObject container, / Container string */
				1822	PyObject element / Element string */
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	1823	);
				1824
Martin v. Löwis	4738340	2007-08-15 07:32:56 +0000	[diff] [blame]	1825	/* Checks whether argument is a valid identifier. */
				1826
				1827	PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
				1828
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1829	#ifndef Py_LIMITED_API
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1830	/* Externally visible for str.strip(unicode) */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1831	PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1832	PyUnicodeObject *self,
				1833	int striptype,
				1834	PyObject *sepobj
				1835	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1836	#endif
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1837
Eric Smith	5807c41	2008-05-11 21:00:57 +0000	[diff] [blame]	1838	/* Using the current locale, insert the thousands grouping
				1839	into the string pointed to by buffer. For the argument descriptions,
				1840	see Objects/stringlib/localeutil.h */
				1841
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1842	#ifndef Py_LIMITED_API
Eric Smith	0923d1d	2009-04-16 20:16:10 +0000	[diff] [blame]	1843	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
				1844	Py_ssize_t n_buffer,
				1845	Py_UNICODE *digits,
				1846	Py_ssize_t n_digits,
				1847	Py_ssize_t min_width);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1848	#endif
Eric Smith	5807c41	2008-05-11 21:00:57 +0000	[diff] [blame]	1849
Eric Smith	a3b1ac8	2009-04-03 14:45:06 +0000	[diff] [blame]	1850	/* Using explicit passed-in values, insert the thousands grouping
				1851	into the string pointed to by buffer. For the argument descriptions,
				1852	see Objects/stringlib/localeutil.h */
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1853	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1854	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	1855	PyObject *unicode,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1856	int kind,
				1857	void *buffer,
				1858	Py_ssize_t n_buffer,
				1859	void *digits,
				1860	Py_ssize_t n_digits,
				1861	Py_ssize_t min_width,
				1862	const char *grouping,
				1863	const char *thousands_sep);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1864	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1865	/* === Characters Type APIs =============================================== */
				1866
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	1867	/* Helper array used by Py_UNICODE_ISSPACE(). */
				1868
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1869	#ifndef Py_LIMITED_API
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	1870	PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
				1871
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1872	/* These should not be used directly. Use the Py_UNICODE_IS* and
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1873	Py_UNICODE_TO* macros instead.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1874
				1875	These APIs are implemented in Objects/unicodectype.c.
				1876
				1877	*/
				1878
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1879	PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1880	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1881	);
				1882
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1883	PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1884	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1885	);
				1886
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1887	PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1888	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1889	);
				1890
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	1891	PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1892	Py_UCS4 ch /* Unicode character */
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	1893	);
				1894
				1895	PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1896	Py_UCS4 ch /* Unicode character */
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	1897	);
				1898
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1899	PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1900	const Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1901	);
				1902
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1903	PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1904	const Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1905	);
				1906
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1907	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
				1908	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1909	);
				1910
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1911	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
				1912	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1913	);
				1914
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1915	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
				1916	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1917	);
				1918
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1919	PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1920	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1921	);
				1922
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1923	PyAPI_FUNC(int) _PyUnicode_ToDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1924	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1925	);
				1926
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1927	PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1928	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1929	);
				1930
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1931	PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1932	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1933	);
				1934
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1935	PyAPI_FUNC(int) _PyUnicode_IsDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1936	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1937	);
				1938
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1939	PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1940	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1941	);
				1942
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	1943	PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1944	Py_UCS4 ch /* Unicode character */
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	1945	);
				1946
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1947	PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1948	Py_UCS4 ch /* Unicode character */
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	1949	);
				1950
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1951	PyAPI_FUNC(size_t) Py_UNICODE_strlen(
				1952	const Py_UNICODE *u
				1953	);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1954
				1955	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1956	Py_UNICODE *s1,
				1957	const Py_UNICODE *s2);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1958
Victor Stinner	c4eb765	2010-09-01 23:43:50 +0000	[diff] [blame]	1959	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
				1960	Py_UNICODE s1, const Py_UNICODE s2);
				1961
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1962	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1963	Py_UNICODE *s1,
				1964	const Py_UNICODE *s2,
				1965	size_t n);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1966
				1967	PyAPI_FUNC(int) Py_UNICODE_strcmp(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1968	const Py_UNICODE *s1,
				1969	const Py_UNICODE *s2
				1970	);
				1971
				1972	PyAPI_FUNC(int) Py_UNICODE_strncmp(
				1973	const Py_UNICODE *s1,
				1974	const Py_UNICODE *s2,
				1975	size_t n
				1976	);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1977
				1978	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1979	const Py_UNICODE *s,
				1980	Py_UNICODE c
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1981	);
				1982
Victor Stinner	331ea92	2010-08-10 16:37:20 +0000	[diff] [blame]	1983	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1984	const Py_UNICODE *s,
				1985	Py_UNICODE c
Victor Stinner	331ea92	2010-08-10 16:37:20 +0000	[diff] [blame]	1986	);
				1987
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1988	PyAPI_FUNC(size_t) Py_UCS4_strlen(
				1989	const Py_UCS4 *u
				1990	);
				1991
				1992	PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcpy(
				1993	Py_UCS4 *s1,
				1994	const Py_UCS4 *s2);
				1995
				1996	PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcat(
				1997	Py_UCS4 s1, const Py_UCS4 s2);
				1998
				1999	PyAPI_FUNC(Py_UCS4*) Py_UCS4_strncpy(
				2000	Py_UCS4 *s1,
				2001	const Py_UCS4 *s2,
				2002	size_t n);
				2003
				2004	PyAPI_FUNC(int) Py_UCS4_strcmp(
				2005	const Py_UCS4 *s1,
				2006	const Py_UCS4 *s2
				2007	);
				2008
				2009	PyAPI_FUNC(int) Py_UCS4_strncmp(
				2010	const Py_UCS4 *s1,
				2011	const Py_UCS4 *s2,
				2012	size_t n
				2013	);
				2014
				2015	PyAPI_FUNC(Py_UCS4*) Py_UCS4_strchr(
				2016	const Py_UCS4 *s,
				2017	Py_UCS4 c
				2018	);
				2019
				2020	PyAPI_FUNC(Py_UCS4*) Py_UCS4_strrchr(
				2021	const Py_UCS4 *s,
				2022	Py_UCS4 c
				2023	);
				2024
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2025	/* Create a copy of a unicode string ending with a nul character. Return NULL
				2026	and raise a MemoryError exception on memory allocation failure, otherwise
				2027	return a new allocated buffer (use PyMem_Free() to free the buffer). */
				2028
Victor Stinner	4640860	2010-09-03 16:18:00 +0000	[diff] [blame]	2029	PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2030	PyObject *unicode
				2031	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2032	#endif /* Py_LIMITED_API */
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2033
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	2034	#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
				2035	/* FIXME: use PyObject* type for op */
				2036	PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
				2037	void *op,
				2038	int check_content);
				2039	#endif
				2040
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2041	#ifdef __cplusplus
				2042	}
				2043	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2044	#endif /* !Py_UNICODEOBJECT_H */