Blame - Include/unicodeobject.h - platform/external/python/cpython3

blob: 3109cf466748ad1e7ccc217766ab6e09992c3189 [file] [log] [blame]

Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1	#ifndef Py_UNICODEOBJECT_H
				2	#define Py_UNICODEOBJECT_H
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	3
Christian Heimes	af98da1	2008-01-27 15:18:18 +0000	[diff] [blame]	4	#include <stdarg.h>
				5
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	6	/*
				7
				8	Unicode implementation based on original code by Fredrik Lundh,
				9	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	10	Unicode Integration Proposal. (See
				11	http://www.egenix.com/files/python/unicode-proposal.txt).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	12
Guido van Rossum	16b1ad9	2000-08-03 16:24:25 +0000	[diff] [blame]	13	Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	14
				15
				16	Original header:
				17	--------------------------------------------------------------------
				18
				19	* Yet another Unicode string type for Python. This type supports the
				20	* 16-bit Basic Multilingual Plane (BMP) only.
				21	*
				22	* Written by Fredrik Lundh, January 1999.
				23	*
				24	* Copyright (c) 1999 by Secret Labs AB.
				25	* Copyright (c) 1999 by Fredrik Lundh.
				26	*
				27	* fredrik@pythonware.com
				28	* http://www.pythonware.com
				29	*
				30	* --------------------------------------------------------------------
				31	* This Unicode String Type is
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	32	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	33	* Copyright (c) 1999 by Secret Labs AB
				34	* Copyright (c) 1999 by Fredrik Lundh
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	35	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	36	* By obtaining, using, and/or copying this software and/or its
				37	* associated documentation, you agree that you have read, understood,
				38	* and will comply with the following terms and conditions:
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	39	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	40	* Permission to use, copy, modify, and distribute this software and its
				41	* associated documentation for any purpose and without fee is hereby
				42	* granted, provided that the above copyright notice appears in all
				43	* copies, and that both that copyright notice and this permission notice
				44	* appear in supporting documentation, and that the name of Secret Labs
				45	* AB or the author not be used in advertising or publicity pertaining to
				46	* distribution of the software without specific, written prior
				47	* permission.
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	48	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	49	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
				50	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
				51	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
				52	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
				53	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
				54	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
				55	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
				56	* -------------------------------------------------------------------- */
				57
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	58	#include <ctype.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	59
				60	/* === Internal API ======================================================= */
				61
				62	/* --- Internal Unicode Format -------------------------------------------- */
				63
Christian Heimes	0625e89	2008-01-07 21:04:21 +0000	[diff] [blame]	64	/* Python 3.x requires unicode */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	65	#define Py_USING_UNICODE
Christian Heimes	0625e89	2008-01-07 21:04:21 +0000	[diff] [blame]	66
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	67	#ifndef SIZEOF_WCHAR_T
				68	#error Must define SIZEOF_WCHAR_T
Fredrik Lundh	9b14ab3	2001-06-26 22:59:49 +0000	[diff] [blame]	69	#endif
				70
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	71	#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
				72
				73	/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
				74	Otherwise, Unicode strings are stored as UCS-2 (with limited support
				75	for UTF-16) */
Fredrik Lundh	8f45585	2001-06-27 18:59:43 +0000	[diff] [blame]	76
				77	#if Py_UNICODE_SIZE >= 4
				78	#define Py_UNICODE_WIDE
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	79	#endif
Fredrik Lundh	1294ad0	2001-06-26 17:17:07 +0000	[diff] [blame]	80
Amaury Forgeot d'Arc	feb7307	2010-09-12 22:42:57 +0000	[diff] [blame]	81	/* Set these flags if the platform has "wchar.h" and the
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	82	wchar_t type is a 16-bit unsigned type */
				83	/* #define HAVE_WCHAR_H */
				84	/* #define HAVE_USABLE_WCHAR_T */
				85
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	86	/* Py_UNICODE was the native Unicode storage format (code unit) used by
				87	Python and represents a single Unicode element in the Unicode type.
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	88	With PEP 393, Py_UNICODE is deprecated and replaced with a
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	89	typedef to wchar_t. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	90
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	91	#ifndef Py_LIMITED_API
				92	#define PY_UNICODE_TYPE wchar_t
				93	typedef wchar_t Py_UNICODE;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	94	#endif
				95
				96	/* If the compiler provides a wchar_t type we try to support it
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	97	through the interface functions PyUnicode_FromWideChar(),
				98	PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	99
				100	#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg	1a731c6	2000-08-11 11:43:10 +0000	[diff] [blame]	101	# ifndef HAVE_WCHAR_H
				102	# define HAVE_WCHAR_H
				103	# endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	104	#endif
				105
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	106	#if defined(MS_WINDOWS)
Victor Stinner	99b9538	2011-07-04 14:23:54 +0200	[diff] [blame]	107	# define HAVE_MBCS
				108	#endif
				109
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	110	#ifdef HAVE_WCHAR_H
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	111	/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
				112	# ifdef _HAVE_BSDI
				113	# include <time.h>
				114	# endif
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	115	# include <wchar.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	116	#endif
				117
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	118	/* Py_UCS4 and Py_UCS2 are typedefs for the respective
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	119	unicode representations. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	120	#if SIZEOF_INT >= 4
				121	typedef unsigned int Py_UCS4;
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	122	#elif SIZEOF_LONG >= 4
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	123	typedef unsigned long Py_UCS4;
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	124	#else
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	125	#error "Could not find a proper typedef for Py_UCS4"
Marc-André Lemburg	b5ac6f6	2001-07-31 14:30:16 +0000	[diff] [blame]	126	#endif
				127
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	128	typedef unsigned short Py_UCS2;
				129	typedef unsigned char Py_UCS1;
				130
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	131	/* --- Internal Unicode Operations ---------------------------------------- */
				132
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	133	/* Since splitting on whitespace is an important use case, and
				134	whitespace in most situations is solely ASCII whitespace, we
				135	optimize for the common case by using a quick look-up table
				136	_Py_ascii_whitespace (see below) with an inlined check.
Christian Heimes	190d79e	2008-01-30 11:58:22 +0000	[diff] [blame]	137
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	138	*/
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	139	#ifndef Py_LIMITED_API
Christian Heimes	190d79e	2008-01-30 11:58:22 +0000	[diff] [blame]	140	#define Py_UNICODE_ISSPACE(ch) \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	141	((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	142
				143	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
				144	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
				145	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				146	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				147
				148	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
				149	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
				150	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				151
				152	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				153	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				154	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	155	#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	156
				157	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				158	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				159	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				160
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	161	#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	162
Marc-André Lemburg	a9c103b	2000-07-03 10:52:13 +0000	[diff] [blame]	163	#define Py_UNICODE_ISALNUM(ch) \
				164	(Py_UNICODE_ISALPHA(ch) \|\| \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	165	Py_UNICODE_ISDECIMAL(ch) \|\| \
				166	Py_UNICODE_ISDIGIT(ch) \|\| \
				167	Py_UNICODE_ISNUMERIC(ch))
Marc-André Lemburg	a9c103b	2000-07-03 10:52:13 +0000	[diff] [blame]	168
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	169	#define Py_UNICODE_COPY(target, source, length) \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	170	Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	171
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	172	#define Py_UNICODE_FILL(target, value, length) \
				173	do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	174	for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	175	} while (0)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	176
Ezio Melotti	8c9375b	2011-08-22 20:03:25 +0300	[diff] [blame]	177	/* macros to work with surrogates */
				178	#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
				179	#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
				180	#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
				181	/* Join two surrogate characters and return a single Py_UCS4 value. */
				182	#define Py_UNICODE_JOIN_SURROGATES(high, low) \
				183	(((((Py_UCS4)(high) & 0x03FF) << 10) \| \
				184	((Py_UCS4)(low) & 0x03FF)) + 0x10000)
				185
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	186	/* Check if substring matches at given offset. The offset must be
				187	valid, and the substring must not be empty. */
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	188
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	189	#define Py_UNICODE_MATCH(string, offset, substring) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	190	((((string)->wstr + (offset)) == ((substring)->wstr)) && \
				191	((((string)->wstr + (offset) + (substring)->wstr_length-1) == ((substring)->wstr + (substring)->wstr_length-1))) && \
				192	!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
				193
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	194	#endif /* Py_LIMITED_API */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	195
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	196	#ifdef __cplusplus
				197	extern "C" {
				198	#endif
				199
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	200	/* --- Unicode Type ------------------------------------------------------- */
				201
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	202	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	203
				204	/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
				205	structure. state.ascii and state.compact are set, and the data
				206	immediately follow the structure. utf8_length and wstr_length can be found
				207	in the length field; the utf8 pointer is equal to the data pointer. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	208	typedef struct {
Éric Araujo	80a348c	2011-10-05 01:11:12 +0200	[diff] [blame]	209	/* There are 4 forms of Unicode strings:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	210
				211	- compact ascii:
				212
				213	* structure = PyASCIIObject
				214	* kind = PyUnicode_1BYTE_KIND
				215	* compact = 1
				216	* ascii = 1
				217	* ready = 1
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	218	* (length is the length of the utf8 and wstr strings)
				219	* (data starts just after the structure)
				220	* (since ASCII is decoded from UTF-8, the utf8 string are the data)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	221
				222	- compact:
				223
				224	* structure = PyCompactUnicodeObject
				225	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				226	PyUnicode_4BYTE_KIND
				227	* compact = 1
				228	* ready = 1
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	229	* ascii = 0
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	230	* utf8 is not shared with data
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	231	* utf8_length = 0 if utf8 is NULL
				232	* wstr is shared with data and wstr_length=length
				233	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	234	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	235	* wstr_length = 0 if wstr is NULL
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	236	* (data starts just after the structure)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	237
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	238	- legacy string, not ready:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	239
				240	* structure = PyUnicodeObject
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	241	* length = 0 (use wstr_length)
				242	* hash = -1
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	243	* kind = PyUnicode_WCHAR_KIND
				244	* compact = 0
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	245	* ascii = 0
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	246	* ready = 0
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	247	* interned = SSTATE_NOT_INTERNED
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	248	* wstr is not NULL
				249	* data.any is NULL
				250	* utf8 is NULL
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	251	* utf8_length = 0
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	252
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	253	- legacy string, ready:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	254
				255	* structure = PyUnicodeObject structure
				256	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				257	PyUnicode_4BYTE_KIND
				258	* compact = 0
				259	* ready = 1
				260	* data.any is not NULL
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	261	* utf8 is shared and utf8_length = length with data.any if ascii = 1
				262	* utf8_length = 0 if utf8 is NULL
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	263	* wstr is shared with data.any and wstr_length = length
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	264	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
				265	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
				266	* wstr_length = 0 if wstr is NULL
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	267
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	268	Compact strings use only one memory block (structure + characters),
				269	whereas legacy strings use one block for the structure and one block
				270	for characters.
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	271
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	272	Legacy strings are created by PyUnicode_FromUnicode() and
				273	PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
				274	when PyUnicode_READY() is called.
				275
				276	See also _PyUnicode_CheckConsistency().
				277	*/
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	278	PyObject_HEAD
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	279	Py_ssize_t length; /* Number of code points in the string */
Benjamin Peterson	8f67d08	2010-10-17 20:54:53 +0000	[diff] [blame]	280	Py_hash_t hash; /* Hash value; -1 if not set */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	281	struct {
				282	/*
				283	SSTATE_NOT_INTERNED (0)
				284	SSTATE_INTERNED_MORTAL (1)
				285	SSTATE_INTERNED_IMMORTAL (2)
				286
				287	If interned != SSTATE_NOT_INTERNED, the two references from the
				288	dictionary to this object are not counted in ob_refcnt.
				289	*/
				290	unsigned int interned:2;
				291	/* Character size:
				292
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	293	- PyUnicode_WCHAR_KIND (0):
				294
				295	* character type = wchar_t (16 or 32 bits, depending on the
				296	platform)
				297
				298	- PyUnicode_1BYTE_KIND (1):
				299
				300	* character type = Py_UCS1 (8 bits, unsigned)
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame]	301	* if ascii is set, all characters must be in range
				302	U+0000-U+007F, otherwise at least one character must be in range
				303	U+0080-U+00FF
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	304
				305	- PyUnicode_2BYTE_KIND (2):
				306
				307	* character type = Py_UCS2 (16 bits, unsigned)
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame]	308	* at least one character must be in range U+0100-U+FFFF
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	309
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	310	- PyUnicode_4BYTE_KIND (4):
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	311
				312	* character type = Py_UCS4 (32 bits, unsigned)
				313	* at least one character must be in range U+10000-U+10FFFF
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	314	*/
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	315	unsigned int kind:3;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	316	/* Compact is with respect to the allocation scheme. Compact unicode
				317	objects only require one memory block while non-compact objects use
				318	one block for the PyUnicodeObject struct and another for its data
				319	buffer. */
				320	unsigned int compact:1;
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame]	321	/* The string only contains characters in range U+0000-U+007F (ASCII)
				322	and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
				323	set, use the PyASCIIObject structure. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	324	unsigned int ascii:1;
				325	/* The ready flag indicates whether the object layout is initialized
				326	completely. This means that this is either a compact object, or
				327	the data pointer is filled out. The bit is redundant, and helps
				328	to minimize the test in PyUnicode_IS_READY(). */
				329	unsigned int ready:1;
				330	} state;
				331	wchar_t wstr; / wchar_t representation (null-terminated) */
				332	} PyASCIIObject;
				333
				334	/* Non-ASCII strings allocated through PyUnicode_New use the
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	335	PyCompactUnicodeObject structure. state.compact is set, and the data
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	336	immediately follow the structure. */
				337	typedef struct {
				338	PyASCIIObject _base;
				339	Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
				340	* terminating \0. */
				341	char utf8; / UTF-8 representation (null-terminated) */
				342	Py_ssize_t wstr_length; /* Number of code points in wstr, possible
				343	* surrogates count as two code points. */
				344	} PyCompactUnicodeObject;
				345
				346	/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
				347	PyUnicodeObject structure. The actual string data is initially in the wstr
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	348	block, and copied into the data block using _PyUnicode_Ready. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	349	typedef struct {
				350	PyCompactUnicodeObject _base;
				351	union {
				352	void *any;
				353	Py_UCS1 *latin1;
				354	Py_UCS2 *ucs2;
				355	Py_UCS4 *ucs4;
				356	} data; /* Canonical, smallest-form Unicode buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	357	} PyUnicodeObject;
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	358	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	359
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	360	PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Christian Heimes	a22e8bd	2007-11-29 22:35:39 +0000	[diff] [blame]	361	PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	362
Thomas Wouters	27d517b	2007-02-25 20:39:11 +0000	[diff] [blame]	363	#define PyUnicode_Check(op) \
Christian Heimes	90aa764	2007-12-19 02:45:37 +0000	[diff] [blame]	364	PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
				365	#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	366
				367	/* Fast access macros */
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	368	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	369
				370	#define PyUnicode_WSTR_LENGTH(op) \
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	371	(PyUnicode_IS_COMPACT_ASCII(op) ? \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	372	((PyASCIIObject*)op)->length : \
				373	((PyCompactUnicodeObject*)op)->wstr_length)
				374
				375	/* Returns the deprecated Py_UNICODE representation's size in code units
				376	(this includes surrogate pairs as 2 units).
				377	If the Py_UNICODE representation is not available, it will be computed
				378	on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
				379
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	380	#define PyUnicode_GET_SIZE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	381	(assert(PyUnicode_Check(op)), \
				382	(((PyASCIIObject *)(op))->wstr) ? \
				383	PyUnicode_WSTR_LENGTH(op) : \
				384	((void)PyUnicode_AsUnicode((PyObject *)(op)), \
				385	PyUnicode_WSTR_LENGTH(op)))
				386
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	387	#define PyUnicode_GET_DATA_SIZE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	388	(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
				389
				390	/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
				391	representation on demand. Using this macro is very inefficient now,
				392	try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
				393	use PyUnicode_WRITE() and PyUnicode_READ(). */
				394
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	395	#define PyUnicode_AS_UNICODE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	396	(assert(PyUnicode_Check(op)), \
				397	(((PyASCIIObject )(op))->wstr) ? (((PyASCIIObject )(op))->wstr) : \
				398	PyUnicode_AsUnicode((PyObject *)(op)))
				399
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	400	#define PyUnicode_AS_DATA(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	401	((const char *)(PyUnicode_AS_UNICODE(op)))
				402
				403
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	404	/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	405
				406	/* Values for PyUnicodeObject.state: */
				407
				408	/* Interning state. */
				409	#define SSTATE_NOT_INTERNED 0
				410	#define SSTATE_INTERNED_MORTAL 1
				411	#define SSTATE_INTERNED_IMMORTAL 2
				412
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	413	/* Return true if the string contains only ASCII characters, or 0 if not. The
				414	string may be compact (PyUnicode_IS_COMPACT_ASCII) or not. No type checks
				415	or Ready calls are performed. */
				416	#define PyUnicode_IS_ASCII(op) \
				417	(((PyASCIIObject*)op)->state.ascii)
				418
				419	/* Return true if the string is compact or 0 if not.
				420	No type checks or Ready calls are performed. */
				421	#define PyUnicode_IS_COMPACT(op) \
				422	(((PyASCIIObject*)(op))->state.compact)
				423
				424	/* Return true if the string is a compact ASCII string (use PyASCIIObject
				425	structure), or 0 if not. No type checks or Ready calls are performed. */
				426	#define PyUnicode_IS_COMPACT_ASCII(op) \
				427	(PyUnicode_IS_ASCII(op) && PyUnicode_IS_COMPACT(op))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	428
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	429	enum PyUnicode_Kind {
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	430	/* String contains only wstr byte characters. This is only possible
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	431	when the string was created with a legacy API and _PyUnicode_Ready()
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	432	has not been called yet. */
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	433	PyUnicode_WCHAR_KIND = 0,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	434	/* Return values of the PyUnicode_KIND() macro: */
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	435	PyUnicode_1BYTE_KIND = 1,
				436	PyUnicode_2BYTE_KIND = 2,
				437	PyUnicode_4BYTE_KIND = 4
				438	};
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	439
Georg Brandl	4975a9b	2011-10-05 16:12:21 +0200	[diff] [blame]	440	/* Return pointers to the canonical representation cast to unsigned char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	441	Py_UCS2, or Py_UCS4 for direct character access.
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	442	No checks are performed, use PyUnicode_KIND() before to ensure
				443	these will work correctly. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	444
				445	#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
				446	#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
				447	#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
				448
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	449	/* Return one of the PyUnicode__KIND values defined above. /
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	450	#define PyUnicode_KIND(op) \
				451	(assert(PyUnicode_Check(op)), \
				452	assert(PyUnicode_IS_READY(op)), \
				453	((PyASCIIObject *)(op))->state.kind)
				454
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	455	/* Return a void pointer to the raw unicode buffer. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	456	#define _PyUnicode_COMPACT_DATA(op) \
Victor Stinner	55c7e00	2011-10-18 23:32:53 +0200	[diff] [blame]	457	(PyUnicode_IS_ASCII(op) ? \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	458	((void)((PyASCIIObject)(op) + 1)) : \
				459	((void)((PyCompactUnicodeObject)(op) + 1)))
				460
				461	#define _PyUnicode_NONCOMPACT_DATA(op) \
				462	(assert(((PyUnicodeObject*)(op))->data.any), \
				463	((((PyUnicodeObject *)(op))->data.any)))
				464
				465	#define PyUnicode_DATA(op) \
				466	(assert(PyUnicode_Check(op)), \
				467	PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
				468	_PyUnicode_NONCOMPACT_DATA(op))
				469
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	470	/* In the access macros below, "kind" may be evaluated more than once.
				471	All other macro parameters are evaluated exactly once, so it is safe
				472	to put side effects into them (such as increasing the index). */
				473
				474	/* Write into the canonical representation, this macro does not do any sanity
				475	checks and is intended for usage in loops. The caller should cache the
Georg Brandl	07de325	2011-10-05 16:47:38 +0200	[diff] [blame]	476	kind and data pointers obtained from other macro calls.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	477	index is the index in the string (starts at 0) and value is the new
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	478	code point value which should be written to that location. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	479	#define PyUnicode_WRITE(kind, data, index, value) \
				480	do { \
				481	switch ((kind)) { \
				482	case PyUnicode_1BYTE_KIND: { \
				483	((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
				484	break; \
				485	} \
				486	case PyUnicode_2BYTE_KIND: { \
				487	((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
				488	break; \
				489	} \
				490	default: { \
				491	assert((kind) == PyUnicode_4BYTE_KIND); \
				492	((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
				493	} \
				494	} \
				495	} while (0)
				496
Georg Brandl	07de325	2011-10-05 16:47:38 +0200	[diff] [blame]	497	/* Read a code point from the string's canonical representation. No checks
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	498	or ready calls are performed. */
				499	#define PyUnicode_READ(kind, data, index) \
				500	((Py_UCS4) \
				501	((kind) == PyUnicode_1BYTE_KIND ? \
Victor Stinner	7a48ff7	2011-10-02 00:55:25 +0200	[diff] [blame]	502	((const Py_UCS1 *)(data))[(index)] : \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	503	((kind) == PyUnicode_2BYTE_KIND ? \
				504	((const Py_UCS2 *)(data))[(index)] : \
				505	((const Py_UCS4 *)(data))[(index)] \
				506	) \
				507	))
				508
				509	/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
				510	calls PyUnicode_KIND() and might call it twice. For single reads, use
				511	PyUnicode_READ_CHAR, for multiple consecutive reads callers should
				512	cache kind and use PyUnicode_READ instead. */
				513	#define PyUnicode_READ_CHAR(unicode, index) \
Victor Stinner	3794376	2011-10-02 20:33:18 +0200	[diff] [blame]	514	(assert(PyUnicode_Check(unicode)), \
				515	assert(PyUnicode_IS_READY(unicode)), \
				516	(Py_UCS4) \
				517	(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
				518	((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
				519	(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
				520	((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
				521	((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
				522	) \
				523	))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	524
				525	/* Returns the length of the unicode string. The caller has to make sure that
				526	the string has it's canonical representation set before calling
				527	this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
				528	#define PyUnicode_GET_LENGTH(op) \
				529	(assert(PyUnicode_Check(op)), \
				530	assert(PyUnicode_IS_READY(op)), \
				531	((PyASCIIObject *)(op))->length)
				532
				533
				534	/* Fast check to determine whether an object is ready. Equivalent to
				535	PyUnicode_IS_COMPACT(op) \|\| ((PyUnicodeObject)(op))->data.any) /
				536
				537	#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
				538
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	539	/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	540	case. If the canonical representation is not yet set, it will still call
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	541	_PyUnicode_Ready().
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	542	Returns 0 on success and -1 on errors. */
				543	#define PyUnicode_READY(op) \
				544	(assert(PyUnicode_Check(op)), \
				545	(PyUnicode_IS_READY(op) ? \
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	546	0 : _PyUnicode_Ready((PyObject *)(op))))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	547
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	548	/* Return a maximum character value which is suitable for creating another
				549	string based on op. This is always an approximation but more efficient
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	550	than iterating over the string. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	551	#define PyUnicode_MAX_CHAR_VALUE(op) \
				552	(assert(PyUnicode_IS_READY(op)), \
Victor Stinner	8813104	2011-10-13 01:12:01 +0200	[diff] [blame]	553	(PyUnicode_IS_ASCII(op) ? \
				554	(0x7f) : \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	555	(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
Victor Stinner	8813104	2011-10-13 01:12:01 +0200	[diff] [blame]	556	(0xffU) : \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	557	(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
Victor Stinner	8813104	2011-10-13 01:12:01 +0200	[diff] [blame]	558	(0xffffU) : \
				559	(0x10ffffU)))))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	560
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	561	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	562
				563	/* --- Constants ---------------------------------------------------------- */
				564
				565	/* This Unicode character will be used as replacement character during
				566	decoding if the errors argument is set to "replace". Note: the
				567	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
				568	Unicode 3.0. */
				569
Victor Stinner	5ce1b0d	2011-09-28 20:29:27 +0200	[diff] [blame]	570	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	571
				572	/* === Public API ========================================================= */
				573
				574	/* --- Plain Py_UNICODE --------------------------------------------------- */
				575
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	576	/* With PEP 393, this is the recommended way to allocate a new unicode object.
				577	This function will allocate the object and its buffer in a single memory
				578	block. Objects created using this function are not resizable. */
				579	#ifndef Py_LIMITED_API
				580	PyAPI_FUNC(PyObject*) PyUnicode_New(
				581	Py_ssize_t size, /* Number of code points in the new string */
				582	Py_UCS4 maxchar /* maximum code point value in the string */
				583	);
				584	#endif
				585
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	586	/* Initializes the canonical string representation from a the deprecated
				587	wstr/Py_UNICODE representation. This function is used to convert Unicode
				588	objects which were created using the old API to the new flexible format
				589	introduced with PEP 393.
				590
				591	Don't call this function directly, use the public PyUnicode_READY() macro
				592	instead. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	593	#ifndef Py_LIMITED_API
				594	PyAPI_FUNC(int) _PyUnicode_Ready(
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	595	PyObject unicode / Unicode object */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	596	);
				597	#endif
				598
Victor Stinner	034f6cf	2011-09-30 02:26:44 +0200	[diff] [blame]	599	/* Get a copy of a Unicode string. */
				600	PyAPI_FUNC(PyObject*) PyUnicode_Copy(
				601	PyObject *unicode
				602	);
				603
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	604	/* Copy character from one unicode object into another, this function performs
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	605	character conversion when necessary and falls back to memcpy if possible.
				606
Victor Stinner	a0702ab	2011-09-29 14:14:38 +0200	[diff] [blame]	607	Fail if to is too small (smaller than how_many or smaller than
				608	len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
				609	kind(to), or if to has more than 1 reference.
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	610
				611	Return the number of written character, or return -1 and raise an exception
				612	on error.
				613
				614	Pseudo-code:
				615
				616	how_many = min(how_many, len(from) - from_start)
				617	to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
				618	return how_many
Victor Stinner	a0702ab	2011-09-29 14:14:38 +0200	[diff] [blame]	619
				620	Note: The function doesn't write a terminating null character.
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	621	*/
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	622	#ifndef Py_LIMITED_API
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	623	PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	624	PyObject *to,
				625	Py_ssize_t to_start,
				626	PyObject *from,
				627	Py_ssize_t from_start,
				628	Py_ssize_t how_many
				629	);
				630	#endif
				631
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	632	/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	633	size.
Marc-André Lemburg	8155e0e	2001-04-23 14:44:21 +0000	[diff] [blame]	634
				635	u may be NULL which causes the contents to be undefined. It is the
				636	user's responsibility to fill in the needed data afterwards. Note
				637	that modifying the Unicode object contents after construction is
				638	only allowed if u was set to NULL.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	639
				640	The buffer is copied into the new object. */
				641
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	642	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	643	PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	644	const Py_UNICODE u, / Unicode buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	645	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	646	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	647	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	648
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	649	/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	650	PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
Victor Stinner	0d71116	2010-12-27 02:39:20 +0000	[diff] [blame]	651	const char u, / UTF-8 encoded string */
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	652	Py_ssize_t size /* size of buffer */
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	653	);
				654
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	655	/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	656	UTF-8 encoded bytes. The size is determined with strlen(). */
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	657	PyAPI_FUNC(PyObject*) PyUnicode_FromString(
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	658	const char u / UTF-8 encoded string */
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	659	);
				660
Victor Stinner	b9275c1	2011-10-05 14:01:42 +0200	[diff] [blame]	661	/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
				662	Scan the string to find the maximum character. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	663	#ifndef Py_LIMITED_API
				664	PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
				665	int kind,
				666	const void *buffer,
				667	Py_ssize_t size);
				668	#endif
				669
				670	PyAPI_FUNC(PyObject*) PyUnicode_Substring(
				671	PyObject *str,
				672	Py_ssize_t start,
				673	Py_ssize_t end);
				674
Georg Brandl	db6c7f5	2011-10-07 11:19:11 +0200	[diff] [blame]	675	/* Copy the string into a UCS4 buffer including the null character if copy_null
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	676	is set. Return NULL and raise an exception on error. Raise a ValueError if
				677	the buffer is smaller than the string. Return buffer on success.
				678
				679	buflen is the length of the buffer in (Py_UCS4) characters. */
				680	PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
				681	PyObject *unicode,
				682	Py_UCS4* buffer,
				683	Py_ssize_t buflen,
				684	int copy_null);
				685
				686	/* Copy the string into a UCS4 buffer. A new buffer is allocated using
				687	* PyMem_Malloc; if this fails, NULL is returned with a memory error
				688	exception set. */
				689	PyAPI_FUNC(Py_UCS4) PyUnicode_AsUCS4Copy(PyObject unicode);
				690
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	691	/* Return a read-only pointer to the Unicode object's internal
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	692	Py_UNICODE buffer.
				693	If the wchar_t/Py_UNICODE representation is not yet available, this
				694	function will calculate it. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	695
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	696	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	697	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	698	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	699	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	700	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	701
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	702	/* Return a read-only pointer to the Unicode object's internal
				703	Py_UNICODE buffer and save the length at size.
				704	If the wchar_t/Py_UNICODE representation is not yet available, this
				705	function will calculate it. */
				706
				707	#ifndef Py_LIMITED_API
				708	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
				709	PyObject unicode, / Unicode object */
				710	Py_ssize_t size / location where to save the length */
				711	);
				712	#endif
				713
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	714	/* Get the length of the Unicode object. */
				715
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	716	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
				717	PyObject *unicode
				718	);
				719
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	720	/* Get the number of Py_UNICODE units in the
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	721	string representation. */
				722
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	723	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	724	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	725	);
				726
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	727	/* Read a character from the string. */
				728
				729	PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
				730	PyObject *unicode,
				731	Py_ssize_t index
				732	);
				733
				734	/* Write a character to the string. The string must have been created through
Victor Stinner	cd9950f	2011-10-02 00:34:53 +0200	[diff] [blame]	735	PyUnicode_New, must not be shared, and must not have been hashed yet.
				736
				737	Return 0 on success, -1 on error. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	738
				739	PyAPI_FUNC(int) PyUnicode_WriteChar(
				740	PyObject *unicode,
				741	Py_ssize_t index,
				742	Py_UCS4 character
				743	);
				744
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	745	#ifndef Py_LIMITED_API
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	746	/* Get the maximum ordinal for a Unicode character. */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	747	PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	748	#endif
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	749
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	750	/* Resize an Unicode object allocated by the legacy API (e.g.
				751	PyUnicode_FromUnicode). Unicode objects allocated by the new API (e.g.
				752	PyUnicode_New) cannot be resized by this function.
				753
				754	The length is a number of Py_UNICODE characters (and not the number of code
				755	points).
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	756
				757	*unicode is modified to point to the new (resized) object and 0
				758	returned on success.
				759
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	760	If the refcount on the object is 1, the function resizes the string in
				761	place, which is usually faster than allocating a new string (and copy
				762	characters).
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	763
				764	Error handling is implemented as follows: an exception is set, -1
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	765	is returned and unicode left untouched. /
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	766
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	767	PyAPI_FUNC(int) PyUnicode_Resize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	768	PyObject *unicode, / Pointer to the Unicode object */
				769	Py_ssize_t length /* New length */
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	770	);
				771
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	772	/* Coerce obj to an Unicode object and return a reference with
				773	incremented refcount.
				774
				775	Coercion is done in the following way:
				776
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	777	1. bytes, bytearray and other char buffer compatible objects are decoded
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	778	under the assumptions that they contain data using the UTF-8
				779	encoding. Decoding is done in "strict" mode.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	780
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	781	2. All other objects (including Unicode objects) raise an
				782	exception.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	783
				784	The API returns NULL in case of an error. The caller is responsible
				785	for decref'ing the returned objects.
				786
				787	*/
				788
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	789	PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	790	register PyObject obj, / Object */
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	791	const char encoding, / encoding */
				792	const char errors / error handling */
				793	);
				794
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	795	/* Coerce obj to an Unicode object and return a reference with
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	796	incremented refcount.
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	797
Guido van Rossum	b8c65bc	2001-10-19 02:01:31 +0000	[diff] [blame]	798	Unicode objects are passed back as-is (subclasses are converted to
				799	true Unicode objects), all other objects are delegated to
				800	PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	801	using UTF-8 encoding as basis for decoding the object.
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	802
				803	The API returns NULL in case of an error. The caller is responsible
				804	for decref'ing the returned objects.
				805
				806	*/
				807
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	808	PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	809	register PyObject obj / Object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	810	);
				811
Victor Stinner	1205f27	2010-09-11 00:54:47 +0000	[diff] [blame]	812	PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
				813	const char format, / ASCII-encoded string */
				814	va_list vargs
				815	);
				816	PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
				817	const char format, / ASCII-encoded string */
				818	...
				819	);
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	820
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	821	#ifndef Py_LIMITED_API
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	822	/* Format the object based on the format_spec, as defined in PEP 3101
				823	(Advanced String Formatting). */
				824	PyAPI_FUNC(PyObject ) _PyUnicode_FormatAdvanced(PyObject obj,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	825	PyObject *format_spec,
				826	Py_ssize_t start,
				827	Py_ssize_t end);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	828	#endif
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	829
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	830	PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
				831	PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	832	PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
				833	const char u / UTF-8 encoded string */
				834	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	835	#ifndef Py_LIMITED_API
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	836	PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	837	#endif
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	838
				839	/* Use only if you know it's a string */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	840	#define PyUnicode_CHECK_INTERNED(op) \
				841	(((PyASCIIObject *)(op))->state.interned)
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	842
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	843	/* --- wchar_t support for platforms which support it --------------------- */
				844
				845	#ifdef HAVE_WCHAR_H
				846
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	847	/* Create a Unicode Object from the wchar_t buffer w of the given
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	848	size.
				849
				850	The buffer is copied into the new object. */
				851
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	852	PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	853	register const wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	854	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	855	);
				856
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	857	/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	858	most size wchar_t characters are copied.
				859
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	860	Note that the resulting wchar_t string may or may not be
				861	0-terminated. It is the responsibility of the caller to make sure
				862	that the wchar_t string is 0-terminated in case this is required by
				863	the application.
				864
				865	Returns the number of wchar_t characters copied (excluding a
				866	possibly trailing 0-termination character) or -1 in case of an
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	867	error. */
				868
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	869	PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	870	PyObject unicode, / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	871	register wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	872	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	873	);
				874
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	875	/* Convert the Unicode object to a wide character string. The output string
				876	always ends with a nul character. If size is not NULL, write the number of
Victor Stinner	d88d983	2011-09-06 02:00:05 +0200	[diff] [blame]	877	wide characters (excluding the null character) into *size.
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	878
				879	Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
				880	on success. On error, returns NULL, *size is undefined and raises a
				881	MemoryError. */
				882
				883	PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Victor Stinner	beb4135b	2010-10-07 01:02:42 +0000	[diff] [blame]	884	PyObject unicode, / Unicode object */
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	885	Py_ssize_t size / number of characters of the result */
				886	);
				887
Victor Stinner	9f789e7	2011-10-01 03:57:28 +0200	[diff] [blame]	888	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	889	PyAPI_FUNC(void) _PyUnicode_AsKind(PyObject s, unsigned int kind);
Victor Stinner	9f789e7	2011-10-01 03:57:28 +0200	[diff] [blame]	890	#endif
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	891
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	892	#endif
				893
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	894	/* --- Unicode ordinals --------------------------------------------------- */
				895
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	896	/* Create a Unicode Object from the given Unicode code point ordinal.
				897
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	898	The ordinal must be in range(0x10000) on narrow Python builds
				899	(UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
				900	raised in case it is not.
				901
				902	*/
				903
Marc-André Lemburg	9c329de	2002-08-12 08:19:10 +0000	[diff] [blame]	904	PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	905
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	906	/* --- Free-list management ----------------------------------------------- */
				907
				908	/* Clear the free list used by the Unicode implementation.
				909
				910	This can be used to release memory used for objects on the free
				911	list back to the Python memory allocator.
				912
				913	*/
				914
				915	PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
				916
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	917	/* === Builtin Codecs =====================================================
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	918
				919	Many of these APIs take two arguments encoding and errors. These
				920	parameters encoding and errors have the same semantics as the ones
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	921	of the builtin str() API.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	922
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	923	Setting encoding to NULL causes the default encoding (UTF-8) to be used.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	924
				925	Error handling is set by errors which may also be set to NULL
				926	meaning to use the default handling defined for the codec. Default
				927	error handling for all builtin codecs is "strict" (ValueErrors are
				928	raised).
				929
				930	The codecs all use a similar interface. Only deviation from the
				931	generic ones are documented.
				932
				933	*/
				934
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	935	/* --- Manage the default encoding ---------------------------------------- */
				936
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	937	/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	938	Unicode object unicode and the size of the encoded representation
				939	in bytes stored in *size.
Christian Heimes	5894ba7	2007-11-04 11:43:14 +0000	[diff] [blame]	940
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	941	In case of an error, no *size is set.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	942
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	943	This function caches the UTF-8 encoded string in the unicodeobject
				944	and subsequent calls will return the same string. The memory is released
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	945	when the unicodeobject is deallocated.
				946
				947	_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
				948	support the previous internal function with the same behaviour.
				949
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	950	*** This API is for interpreter INTERNAL USE ONLY and will likely
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	951	*** be removed or changed in the future.
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	952
				953	*** If you need to access the Unicode object as UTF-8 bytes string,
				954	*** please use PyUnicode_AsUTF8String() instead.
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	955	*/
				956
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	957	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	958	PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	959	PyObject *unicode,
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	960	Py_ssize_t *size);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	961	#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	962	#endif
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	963
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	964	/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	965	Unicode object unicode.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	966
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	967	Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
				968	in the unicodeobject.
				969
				970	_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
				971	support the previous internal function with the same behaviour.
				972
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	973	Use of this API is DEPRECATED since no size information can be
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	974	extracted from the returned data.
				975
				976	*** This API is for interpreter INTERNAL USE ONLY and will likely
				977	*** be removed or changed for Python 3.1.
				978
				979	*** If you need to access the Unicode object as UTF-8 bytes string,
				980	*** please use PyUnicode_AsUTF8String() instead.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	981
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	982	*/
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	983
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	984	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	985	PyAPI_FUNC(char ) PyUnicode_AsUTF8(PyObject unicode);
				986	#define _PyUnicode_AsString PyUnicode_AsUTF8
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	987	#endif
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	988
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	989	/* Returns "utf-8". */
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	990
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	991	PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	992
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	993	/* --- Generic Codecs ----------------------------------------------------- */
				994
				995	/* Create a Unicode object by decoding the encoded string s of the
				996	given size. */
				997
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	998	PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	999	const char s, / encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1000	Py_ssize_t size, /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1001	const char encoding, / encoding */
				1002	const char errors / error handling */
				1003	);
				1004
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1005	/* Decode a Unicode object unicode and return the result as Python
				1006	object. */
				1007
				1008	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1009	PyObject unicode, / Unicode object */
				1010	const char encoding, / encoding */
				1011	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1012	);
				1013
				1014	/* Decode a Unicode object unicode and return the result as Unicode
				1015	object. */
				1016
				1017	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1018	PyObject unicode, / Unicode object */
				1019	const char encoding, / encoding */
				1020	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1021	);
				1022
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1023	/* Encodes a Py_UNICODE buffer of the given size and returns a
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1024	Python string object. */
				1025
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1026	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1027	PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1028	const Py_UNICODE s, / Unicode char buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1029	Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1030	const char encoding, / encoding */
				1031	const char errors / error handling */
				1032	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1033	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1034
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	1035	/* Encodes a Unicode object and returns the result as Python
				1036	object. */
				1037
				1038	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1039	PyObject unicode, / Unicode object */
				1040	const char encoding, / encoding */
				1041	const char errors / error handling */
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	1042	);
				1043
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1044	/* Encodes a Unicode object and returns the result as Python string
				1045	object. */
				1046
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1047	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1048	PyObject unicode, / Unicode object */
				1049	const char encoding, / encoding */
				1050	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1051	);
				1052
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1053	/* Encodes a Unicode object and returns the result as Unicode
				1054	object. */
				1055
				1056	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1057	PyObject unicode, / Unicode object */
				1058	const char encoding, / encoding */
				1059	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1060	);
				1061
				1062	/* Build an encoding map. */
				1063
Thomas Wouters	73e5a5b	2006-06-08 15:35:45 +0000	[diff] [blame]	1064	PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
				1065	PyObject* string /* 256 character map */
				1066	);
				1067
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1068	/* --- UTF-7 Codecs ------------------------------------------------------- */
				1069
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1070	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1071	const char string, / UTF-7 encoded string */
				1072	Py_ssize_t length, /* size of string */
				1073	const char errors / error handling */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1074	);
				1075
Christian Heimes	5d14c2b	2007-11-20 23:38:09 +0000	[diff] [blame]	1076	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1077	const char string, / UTF-7 encoded string */
				1078	Py_ssize_t length, /* size of string */
				1079	const char errors, / error handling */
				1080	Py_ssize_t consumed / bytes consumed */
Christian Heimes	5d14c2b	2007-11-20 23:38:09 +0000	[diff] [blame]	1081	);
				1082
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1083	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1084	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1085	const Py_UNICODE data, / Unicode char buffer */
				1086	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1087	int base64SetO, /* Encode RFC2152 Set O characters in base64 */
				1088	int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
				1089	const char errors / error handling */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1090	);
Martin v. Löwis	1db7c13	2011-11-10 18:24:32 +0100	[diff] [blame]	1091	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
				1092	PyObject unicode, / Unicode object */
				1093	int base64SetO, /* Encode RFC2152 Set O characters in base64 */
				1094	int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
				1095	const char errors / error handling */
				1096	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1097	#endif
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1098
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1099	/* --- UTF-8 Codecs ------------------------------------------------------- */
				1100
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1101	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1102	const char string, / UTF-8 encoded string */
				1103	Py_ssize_t length, /* size of string */
				1104	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1105	);
				1106
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1107	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1108	const char string, / UTF-8 encoded string */
				1109	Py_ssize_t length, /* size of string */
				1110	const char errors, / error handling */
				1111	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1112	);
				1113
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1114	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1115	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1116	);
				1117
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1118	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1119	PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
				1120	PyObject *unicode,
				1121	const char *errors);
				1122
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1123	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1124	const Py_UNICODE data, / Unicode char buffer */
				1125	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1126	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1127	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1128	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1129
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1130	/* --- UTF-32 Codecs ------------------------------------------------------ */
				1131
				1132	/* Decodes length bytes from a UTF-32 encoded buffer string and returns
				1133	the corresponding Unicode object.
				1134
				1135	errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1136	to "strict".
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1137
				1138	If byteorder is non-NULL, the decoder starts decoding using the
				1139	given byte order:
				1140
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1141	*byteorder == -1: little endian
				1142	*byteorder == 0: native order
				1143	*byteorder == 1: big endian
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1144
				1145	In native mode, the first four bytes of the stream are checked for a
				1146	BOM mark. If found, the BOM mark is analysed, the byte order
				1147	adjusted and the BOM skipped. In the other modes, no BOM mark
				1148	interpretation is done. After completion, *byteorder is set to the
				1149	current byte order at the end of input data.
				1150
				1151	If byteorder is NULL, the codec starts in native order mode.
				1152
				1153	*/
				1154
				1155	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1156	const char string, / UTF-32 encoded string */
				1157	Py_ssize_t length, /* size of string */
				1158	const char errors, / error handling */
				1159	int byteorder / pointer to byteorder to use
				1160	0=native;-1=LE,1=BE; updated on
				1161	exit */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1162	);
				1163
				1164	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1165	const char string, / UTF-32 encoded string */
				1166	Py_ssize_t length, /* size of string */
				1167	const char errors, / error handling */
				1168	int byteorder, / pointer to byteorder to use
				1169	0=native;-1=LE,1=BE; updated on
				1170	exit */
				1171	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1172	);
				1173
				1174	/* Returns a Python string using the UTF-32 encoding in native byte
				1175	order. The string always starts with a BOM mark. */
				1176
				1177	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1178	PyObject unicode / Unicode object */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1179	);
				1180
				1181	/* Returns a Python string object holding the UTF-32 encoded value of
				1182	the Unicode data.
				1183
				1184	If byteorder is not 0, output is written according to the following
				1185	byte order:
				1186
				1187	byteorder == -1: little endian
				1188	byteorder == 0: native byte order (writes a BOM mark)
				1189	byteorder == 1: big endian
				1190
				1191	If byteorder is 0, the output string will always start with the
				1192	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				1193	prepended.
				1194
				1195	*/
				1196
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1197	#ifndef Py_LIMITED_API
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1198	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1199	const Py_UNICODE data, / Unicode char buffer */
				1200	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1201	const char errors, / error handling */
				1202	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1203	);
Martin v. Löwis	1db7c13	2011-11-10 18:24:32 +0100	[diff] [blame]	1204	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
				1205	PyObject object, / Unicode object */
				1206	const char errors, / error handling */
				1207	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				1208	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1209	#endif
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1210
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1211	/* --- UTF-16 Codecs ------------------------------------------------------ */
				1212
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1213	/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1214	the corresponding Unicode object.
				1215
				1216	errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1217	to "strict".
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1218
				1219	If byteorder is non-NULL, the decoder starts decoding using the
				1220	given byte order:
				1221
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1222	*byteorder == -1: little endian
				1223	*byteorder == 0: native order
				1224	*byteorder == 1: big endian
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1225
Marc-André Lemburg	489b56e	2001-05-21 20:30:15 +0000	[diff] [blame]	1226	In native mode, the first two bytes of the stream are checked for a
				1227	BOM mark. If found, the BOM mark is analysed, the byte order
				1228	adjusted and the BOM skipped. In the other modes, no BOM mark
				1229	interpretation is done. After completion, *byteorder is set to the
				1230	current byte order at the end of input data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1231
				1232	If byteorder is NULL, the codec starts in native order mode.
				1233
				1234	*/
				1235
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1236	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1237	const char string, / UTF-16 encoded string */
				1238	Py_ssize_t length, /* size of string */
				1239	const char errors, / error handling */
				1240	int byteorder / pointer to byteorder to use
				1241	0=native;-1=LE,1=BE; updated on
				1242	exit */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1243	);
				1244
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1245	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1246	const char string, / UTF-16 encoded string */
				1247	Py_ssize_t length, /* size of string */
				1248	const char errors, / error handling */
				1249	int byteorder, / pointer to byteorder to use
				1250	0=native;-1=LE,1=BE; updated on
				1251	exit */
				1252	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1253	);
				1254
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1255	/* Returns a Python string using the UTF-16 encoding in native byte
				1256	order. The string always starts with a BOM mark. */
				1257
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1258	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1259	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1260	);
				1261
				1262	/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1263	the Unicode data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1264
				1265	If byteorder is not 0, output is written according to the following
				1266	byte order:
				1267
				1268	byteorder == -1: little endian
				1269	byteorder == 0: native byte order (writes a BOM mark)
				1270	byteorder == 1: big endian
				1271
				1272	If byteorder is 0, the output string will always start with the
				1273	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				1274	prepended.
				1275
				1276	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
				1277	UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters	7e47402	2000-07-16 12:04:32 +0000	[diff] [blame]	1278	at a later point without compromising the APIs.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1279
				1280	*/
				1281
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1282	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1283	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1284	const Py_UNICODE data, / Unicode char buffer */
				1285	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1286	const char errors, / error handling */
				1287	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1288	);
Martin v. Löwis	1db7c13	2011-11-10 18:24:32 +0100	[diff] [blame]	1289	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
				1290	PyObject* unicode, /* Unicode object */
				1291	const char errors, / error handling */
				1292	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				1293	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1294	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1295
				1296	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				1297
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1298	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1299	const char string, / Unicode-Escape encoded string */
				1300	Py_ssize_t length, /* size of string */
				1301	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1302	);
				1303
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1304	PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1305	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1306	);
				1307
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1308	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1309	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1310	const Py_UNICODE data, / Unicode char buffer */
				1311	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1312	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1313	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1314
				1315	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				1316
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1317	PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1318	const char string, / Raw-Unicode-Escape encoded string */
				1319	Py_ssize_t length, /* size of string */
				1320	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1321	);
				1322
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1323	PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1324	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1325	);
				1326
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1327	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1328	PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1329	const Py_UNICODE data, / Unicode char buffer */
				1330	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1331	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1332	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1333
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1334	/* --- Unicode Internal Codec ---------------------------------------------
				1335
				1336	Only for internal use in _codecsmodule.c */
				1337
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1338	#ifndef Py_LIMITED_API
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1339	PyObject *_PyUnicode_DecodeUnicodeInternal(
				1340	const char *string,
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1341	Py_ssize_t length,
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1342	const char *errors
				1343	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1344	#endif
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1345
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1346	/* --- Latin-1 Codecs -----------------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1347
				1348	Note: Latin-1 corresponds to the first 256 Unicode ordinals.
				1349
				1350	*/
				1351
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1352	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1353	const char string, / Latin-1 encoded string */
				1354	Py_ssize_t length, /* size of string */
				1355	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1356	);
				1357
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1358	PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1359	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1360	);
				1361
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1362	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1363	PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
				1364	PyObject* unicode,
				1365	const char* errors);
				1366
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1367	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1368	const Py_UNICODE data, / Unicode char buffer */
				1369	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1370	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1371	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1372	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1373
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1374	/* --- ASCII Codecs -------------------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1375
				1376	Only 7-bit ASCII data is excepted. All other codes generate errors.
				1377
				1378	*/
				1379
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1380	PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1381	const char string, / ASCII encoded string */
				1382	Py_ssize_t length, /* size of string */
				1383	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1384	);
				1385
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1386	PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1387	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1388	);
				1389
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1390	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1391	PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
				1392	PyObject* unicode,
				1393	const char* errors);
				1394
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1395	PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1396	const Py_UNICODE data, / Unicode char buffer */
				1397	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1398	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1399	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1400	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1401
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1402	/* --- Character Map Codecs -----------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1403
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1404	This codec uses mappings to encode and decode characters.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1405
				1406	Decoding mappings must map single string characters to single
				1407	Unicode characters, integers (which are then interpreted as Unicode
				1408	ordinals) or None (meaning "undefined mapping" and causing an
				1409	error).
				1410
				1411	Encoding mappings must map single Unicode characters to single
				1412	string characters, integers (which are then interpreted as Latin-1
				1413	ordinals) or None (meaning "undefined mapping" and causing an
				1414	error).
				1415
				1416	If a character lookup fails with a LookupError, the character is
				1417	copied as-is meaning that its ordinal value will be interpreted as
				1418	Unicode or Latin-1 ordinal resp. Because of this mappings only need
				1419	to contain those mappings which map characters to different code
				1420	points.
				1421
				1422	*/
				1423
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1424	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1425	const char string, / Encoded string */
				1426	Py_ssize_t length, /* size of string */
				1427	PyObject mapping, / character mapping
				1428	(char ordinal -> unicode ordinal) */
				1429	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1430	);
				1431
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1432	PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1433	PyObject unicode, / Unicode object */
				1434	PyObject mapping / character mapping
				1435	(unicode ordinal -> char ordinal) */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1436	);
				1437
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1438	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1439	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1440	const Py_UNICODE data, / Unicode char buffer */
				1441	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1442	PyObject mapping, / character mapping
				1443	(unicode ordinal -> char ordinal) */
				1444	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1445	);
Martin v. Löwis	23e275b	2011-11-02 18:02:51 +0100	[diff] [blame]	1446	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
				1447	PyObject unicode, / Unicode object */
				1448	PyObject mapping, / character mapping
				1449	(unicode ordinal -> char ordinal) */
				1450	const char errors / error handling */
				1451	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1452	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1453
				1454	/* Translate a Py_UNICODE buffer of the given length by applying a
				1455	character mapping table to it and return the resulting Unicode
				1456	object.
				1457
				1458	The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1459	ordinal integers or None (causing deletion of the character).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1460
				1461	Mapping tables may be dictionaries or sequences. Unmapped character
				1462	ordinals (ones which cause a LookupError) are left untouched and
				1463	are copied as-is.
				1464
				1465	*/
				1466
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1467	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1468	PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1469	const Py_UNICODE data, / Unicode char buffer */
				1470	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1471	PyObject table, / Translate table */
				1472	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1473	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1474	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1475
Victor Stinner	99b9538	2011-07-04 14:23:54 +0200	[diff] [blame]	1476	#ifdef HAVE_MBCS
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1477
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1478	/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1479
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1480	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1481	const char string, / MBCS encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1482	Py_ssize_t length, /* size of string */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1483	const char errors / error handling */
				1484	);
				1485
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1486	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
				1487	const char string, / MBCS encoded string */
				1488	Py_ssize_t length, /* size of string */
				1489	const char errors, / error handling */
				1490	Py_ssize_t consumed / bytes consumed */
				1491	);
				1492
Victor Stinner	3a50e70	2011-10-18 21:21:00 +0200	[diff] [blame]	1493	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
				1494	int code_page, /* code page number */
				1495	const char string, / encoded string */
				1496	Py_ssize_t length, /* size of string */
				1497	const char errors, / error handling */
				1498	Py_ssize_t consumed / bytes consumed */
				1499	);
				1500
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1501	PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1502	PyObject unicode / Unicode object */
				1503	);
				1504
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1505	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1506	PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1507	const Py_UNICODE data, / Unicode char buffer */
Victor Stinner	3a50e70	2011-10-18 21:21:00 +0200	[diff] [blame]	1508	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1509	const char errors / error handling */
				1510	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1511	#endif
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1512
Victor Stinner	3a50e70	2011-10-18 21:21:00 +0200	[diff] [blame]	1513	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
				1514	int code_page, /* code page number */
				1515	PyObject unicode, / Unicode object */
				1516	const char errors / error handling */
				1517	);
				1518
Victor Stinner	99b9538	2011-07-04 14:23:54 +0200	[diff] [blame]	1519	#endif /* HAVE_MBCS */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1520
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1521	/* --- Decimal Encoder ---------------------------------------------------- */
				1522
				1523	/* Takes a Unicode string holding a decimal value and writes it into
				1524	an output buffer using standard ASCII digit codes.
				1525
				1526	The output buffer has to provide at least length+1 bytes of storage
				1527	area. The output string is 0-terminated.
				1528
				1529	The encoder converts whitespace to ' ', decimal characters to their
				1530	corresponding ASCII digit and all other Latin-1 characters except
				1531	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
				1532	are treated as errors. This includes embedded NULL bytes.
				1533
				1534	Error handling is defined by the errors argument:
				1535
				1536	NULL or "strict": raise a ValueError
				1537	"ignore": ignore the wrong characters (these are not copied to the
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1538	output buffer)
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1539	"replace": replaces illegal characters with '?'
				1540
				1541	Returns 0 on success, -1 on failure.
				1542
				1543	*/
				1544
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1545	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1546	PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1547	Py_UNICODE s, / Unicode buffer */
				1548	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1549	char output, / Output buffer; must have size >= length */
				1550	const char errors / error handling */
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1551	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1552	#endif
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1553
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1554	/* Transforms code points that have decimal digit property to the
				1555	corresponding ASCII digit code points.
				1556
				1557	Returns a new Unicode string on success, NULL on failure.
				1558	*/
				1559
Georg Brandl	b550308	2010-12-05 11:40:48 +0000	[diff] [blame]	1560	#ifndef Py_LIMITED_API
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1561	PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
				1562	Py_UNICODE s, / Unicode buffer */
				1563	Py_ssize_t length /* Number of Py_UNICODE chars to transform */
				1564	);
Georg Brandl	b550308	2010-12-05 11:40:48 +0000	[diff] [blame]	1565	#endif
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1566
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1567	/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyUnicodeObject
				1568	as argument instead of a raw buffer and length. This function additionally
				1569	transforms spaces to ASCII because this is what the callers in longobject,
				1570	floatobject, and complexobject did anyways. */
				1571
				1572	#ifndef Py_LIMITED_API
				1573	PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
				1574	PyObject unicode / Unicode object */
				1575	);
				1576	#endif
				1577
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1578	/* --- File system encoding ---------------------------------------------- */
				1579
Victor Stinner	47fcb5b	2010-08-13 23:59:58 +0000	[diff] [blame]	1580	/* ParseTuple converter: encode str objects to bytes using
				1581	PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1582
				1583	PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject, void);
				1584
Victor Stinner	47fcb5b	2010-08-13 23:59:58 +0000	[diff] [blame]	1585	/* ParseTuple converter: decode bytes objects to unicode using
				1586	PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
				1587
				1588	PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject, void);
				1589
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1590	/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
				1591	and the "surrogateescape" error handler.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1592
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1593	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1594	encoding.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1595
Benjamin Peterson	ccbd694	2010-05-15 17:43:18 +0000	[diff] [blame]	1596	Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1597	*/
				1598
				1599	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
				1600	const char s / encoded string */
				1601	);
				1602
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1603	/* Decode a string using Py_FileSystemDefaultEncoding
				1604	and the "surrogateescape" error handler.
				1605
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1606	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1607	encoding.
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1608	*/
				1609
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1610	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
				1611	const char s, / encoded string */
				1612	Py_ssize_t size /* size */
				1613	);
				1614
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1615	/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
Benjamin Peterson	ccbd694	2010-05-15 17:43:18 +0000	[diff] [blame]	1616	"surrogateescape" error handler, and return bytes.
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1617
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1618	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1619	encoding.
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1620	*/
				1621
				1622	PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
				1623	PyObject *unicode
				1624	);
				1625
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1626	/* --- Methods & Slots ----------------------------------------------------
				1627
				1628	These are capable of handling Unicode objects and strings on input
				1629	(we refer to them as strings in the descriptions) and return
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	1630	Unicode objects or integers as appropriate. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1631
				1632	/* Concat two strings giving a new Unicode string. */
				1633
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1634	PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1635	PyObject left, / Left string */
				1636	PyObject right / Right string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1637	);
				1638
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1639	/* Concat two strings and put the result in *pleft
				1640	(sets pleft to NULL on error) /
				1641
				1642	PyAPI_FUNC(void) PyUnicode_Append(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1643	PyObject *pleft, / Pointer to left string */
				1644	PyObject right / Right string */
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1645	);
				1646
				1647	/* Concat two strings, put the result in *pleft and drop the right object
				1648	(sets pleft to NULL on error) /
				1649
				1650	PyAPI_FUNC(void) PyUnicode_AppendAndDel(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1651	PyObject *pleft, / Pointer to left string */
				1652	PyObject right / Right string */
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1653	);
				1654
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1655	/* Split a string giving a list of Unicode strings.
				1656
				1657	If sep is NULL, splitting will be done at all whitespace
				1658	substrings. Otherwise, splits occur at the given separator.
				1659
				1660	At most maxsplit splits will be done. If negative, no limit is set.
				1661
				1662	Separators are not included in the resulting list.
				1663
				1664	*/
				1665
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1666	PyAPI_FUNC(PyObject*) PyUnicode_Split(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1667	PyObject s, / String to split */
				1668	PyObject sep, / String separator */
				1669	Py_ssize_t maxsplit /* Maxsplit count */
				1670	);
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1671
				1672	/* Dito, but split at line breaks.
				1673
				1674	CRLF is considered to be one line break. Line breaks are not
				1675	included in the resulting list. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1676
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1677	PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1678	PyObject s, / String to split */
				1679	int keepends /* If true, line end markers are included */
				1680	);
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1681
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1682	/* Partition a string using a given separator. */
				1683
				1684	PyAPI_FUNC(PyObject*) PyUnicode_Partition(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1685	PyObject s, / String to partition */
				1686	PyObject sep / String separator */
				1687	);
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1688
				1689	/* Partition a string using a given separator, searching from the end of the
				1690	string. */
				1691
				1692	PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1693	PyObject s, / String to partition */
				1694	PyObject sep / String separator */
				1695	);
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1696
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1697	/* Split a string giving a list of Unicode strings.
				1698
				1699	If sep is NULL, splitting will be done at all whitespace
				1700	substrings. Otherwise, splits occur at the given separator.
				1701
				1702	At most maxsplit splits will be done. But unlike PyUnicode_Split
				1703	PyUnicode_RSplit splits from the end of the string. If negative,
				1704	no limit is set.
				1705
				1706	Separators are not included in the resulting list.
				1707
				1708	*/
				1709
				1710	PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1711	PyObject s, / String to split */
				1712	PyObject sep, / String separator */
				1713	Py_ssize_t maxsplit /* Maxsplit count */
				1714	);
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1715
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1716	/* Translate a string by applying a character mapping table to it and
				1717	return the resulting Unicode object.
				1718
				1719	The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1720	ordinal integers or None (causing deletion of the character).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1721
				1722	Mapping tables may be dictionaries or sequences. Unmapped character
				1723	ordinals (ones which cause a LookupError) are left untouched and
				1724	are copied as-is.
				1725
				1726	*/
				1727
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1728	PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1729	PyObject str, / String */
				1730	PyObject table, / Translate table */
				1731	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1732	);
				1733
				1734	/* Join a sequence of strings using the given separator and return
				1735	the resulting Unicode string. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1736
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1737	PyAPI_FUNC(PyObject*) PyUnicode_Join(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1738	PyObject separator, / Separator string */
				1739	PyObject seq / Sequence object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1740	);
				1741
				1742	/* Return 1 if substr matches str[start:end] at the given tail end, 0
				1743	otherwise. */
				1744
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1745	PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1746	PyObject str, / String */
				1747	PyObject substr, / Prefix or Suffix string */
				1748	Py_ssize_t start, /* Start index */
				1749	Py_ssize_t end, /* Stop index */
				1750	int direction /* Tail end: -1 prefix, +1 suffix */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1751	);
				1752
				1753	/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg	4da6fd6	2002-05-29 11:33:13 +0000	[diff] [blame]	1754	given search direction or -1 if not found. -2 is returned in case
				1755	an error occurred and an exception is set. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1756
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1757	PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1758	PyObject str, / String */
				1759	PyObject substr, / Substring to find */
				1760	Py_ssize_t start, /* Start index */
				1761	Py_ssize_t end, /* Stop index */
				1762	int direction /* Find direction: +1 forward, -1 backward */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1763	);
				1764
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1765	/* Like PyUnicode_Find, but search for single character only. */
				1766	PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
				1767	PyObject *str,
				1768	Py_UCS4 ch,
				1769	Py_ssize_t start,
				1770	Py_ssize_t end,
				1771	int direction
				1772	);
				1773
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1774	/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1775
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1776	PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1777	PyObject str, / String */
				1778	PyObject substr, / Substring to count */
				1779	Py_ssize_t start, /* Start index */
				1780	Py_ssize_t end /* Stop index */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1781	);
				1782
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1783	/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1784	and return the resulting Unicode object. */
				1785
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1786	PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1787	PyObject str, / String */
				1788	PyObject substr, / Substring to find */
				1789	PyObject replstr, / Substring to replace */
				1790	Py_ssize_t maxcount /* Max. number of replacements to apply;
				1791	-1 = all */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1792	);
				1793
				1794	/* Compare two strings and return -1, 0, 1 for less than, equal,
				1795	greater than resp. */
				1796
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1797	PyAPI_FUNC(int) PyUnicode_Compare(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1798	PyObject left, / Left string */
				1799	PyObject right / Right string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1800	);
				1801
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1802	PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
				1803	PyObject *left,
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	1804	const char right / ASCII-encoded string */
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1805	);
				1806
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	1807	/* Rich compare two strings and return one of the following:
				1808
				1809	- NULL in case an exception was raised
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	1810	- Py_True or Py_False for successfully comparisons
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	1811	- Py_NotImplemented in case the type combination is unknown
				1812
				1813	Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
				1814	case the conversion of the arguments to Unicode fails with a
				1815	UnicodeDecodeError.
				1816
				1817	Possible values for op:
				1818
				1819	Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
				1820
				1821	*/
				1822
				1823	PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1824	PyObject left, / Left string */
				1825	PyObject right, / Right string */
				1826	int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	1827	);
				1828
Thomas Wouters	7e47402	2000-07-16 12:04:32 +0000	[diff] [blame]	1829	/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1830	the resulting Unicode string. */
				1831
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1832	PyAPI_FUNC(PyObject *) PyUnicode_Format(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1833	PyObject format, / Format string */
				1834	PyObject args / Argument tuple or dictionary */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1835	);
				1836
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	1837	/* Checks whether element is contained in container and return 1/0
				1838	accordingly.
				1839
				1840	element has to coerce to an one element Unicode string. -1 is
				1841	returned in case of an error. */
				1842
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1843	PyAPI_FUNC(int) PyUnicode_Contains(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1844	PyObject container, / Container string */
				1845	PyObject element / Element string */
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	1846	);
				1847
Martin v. Löwis	4738340	2007-08-15 07:32:56 +0000	[diff] [blame]	1848	/* Checks whether argument is a valid identifier. */
				1849
				1850	PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
				1851
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1852	#ifndef Py_LIMITED_API
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1853	/* Externally visible for str.strip(unicode) */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1854	PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Victor Stinner	9db1a8b	2011-10-23 20:04:37 +0200	[diff] [blame]	1855	PyObject *self,
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1856	int striptype,
				1857	PyObject *sepobj
				1858	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1859	#endif
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	1860
Eric Smith	5807c41	2008-05-11 21:00:57 +0000	[diff] [blame]	1861	/* Using the current locale, insert the thousands grouping
				1862	into the string pointed to by buffer. For the argument descriptions,
				1863	see Objects/stringlib/localeutil.h */
				1864
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1865	#ifndef Py_LIMITED_API
Eric Smith	0923d1d	2009-04-16 20:16:10 +0000	[diff] [blame]	1866	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
				1867	Py_ssize_t n_buffer,
				1868	Py_UNICODE *digits,
				1869	Py_ssize_t n_digits,
				1870	Py_ssize_t min_width);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1871	#endif
Eric Smith	5807c41	2008-05-11 21:00:57 +0000	[diff] [blame]	1872
Eric Smith	a3b1ac8	2009-04-03 14:45:06 +0000	[diff] [blame]	1873	/* Using explicit passed-in values, insert the thousands grouping
				1874	into the string pointed to by buffer. For the argument descriptions,
				1875	see Objects/stringlib/localeutil.h */
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1876	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1877	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	1878	PyObject *unicode,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1879	int kind,
				1880	void *buffer,
				1881	Py_ssize_t n_buffer,
				1882	void *digits,
				1883	Py_ssize_t n_digits,
				1884	Py_ssize_t min_width,
				1885	const char *grouping,
				1886	const char *thousands_sep);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1887	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1888	/* === Characters Type APIs =============================================== */
				1889
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	1890	/* Helper array used by Py_UNICODE_ISSPACE(). */
				1891
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1892	#ifndef Py_LIMITED_API
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	1893	PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
				1894
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1895	/* These should not be used directly. Use the Py_UNICODE_IS* and
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1896	Py_UNICODE_TO* macros instead.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1897
				1898	These APIs are implemented in Objects/unicodectype.c.
				1899
				1900	*/
				1901
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1902	PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1903	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1904	);
				1905
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1906	PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1907	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1908	);
				1909
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1910	PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1911	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1912	);
				1913
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	1914	PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1915	Py_UCS4 ch /* Unicode character */
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	1916	);
				1917
				1918	PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1919	Py_UCS4 ch /* Unicode character */
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	1920	);
				1921
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1922	PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1923	const Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1924	);
				1925
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1926	PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1927	const Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1928	);
				1929
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1930	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
				1931	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1932	);
				1933
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1934	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
				1935	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1936	);
				1937
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1938	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
				1939	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1940	);
				1941
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1942	PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1943	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1944	);
				1945
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1946	PyAPI_FUNC(int) _PyUnicode_ToDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1947	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1948	);
				1949
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1950	PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1951	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1952	);
				1953
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1954	PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1955	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1956	);
				1957
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1958	PyAPI_FUNC(int) _PyUnicode_IsDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1959	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1960	);
				1961
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1962	PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1963	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1964	);
				1965
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	1966	PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1967	Py_UCS4 ch /* Unicode character */
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	1968	);
				1969
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1970	PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	1971	Py_UCS4 ch /* Unicode character */
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	1972	);
				1973
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1974	PyAPI_FUNC(size_t) Py_UNICODE_strlen(
				1975	const Py_UNICODE *u
				1976	);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1977
				1978	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1979	Py_UNICODE *s1,
				1980	const Py_UNICODE *s2);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1981
Victor Stinner	c4eb765	2010-09-01 23:43:50 +0000	[diff] [blame]	1982	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
				1983	Py_UNICODE s1, const Py_UNICODE s2);
				1984
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1985	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1986	Py_UNICODE *s1,
				1987	const Py_UNICODE *s2,
				1988	size_t n);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1989
				1990	PyAPI_FUNC(int) Py_UNICODE_strcmp(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	1991	const Py_UNICODE *s1,
				1992	const Py_UNICODE *s2
				1993	);
				1994
				1995	PyAPI_FUNC(int) Py_UNICODE_strncmp(
				1996	const Py_UNICODE *s1,
				1997	const Py_UNICODE *s2,
				1998	size_t n
				1999	);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2000
				2001	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2002	const Py_UNICODE *s,
				2003	Py_UNICODE c
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2004	);
				2005
Victor Stinner	331ea92	2010-08-10 16:37:20 +0000	[diff] [blame]	2006	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2007	const Py_UNICODE *s,
				2008	Py_UNICODE c
Victor Stinner	331ea92	2010-08-10 16:37:20 +0000	[diff] [blame]	2009	);
				2010
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2011	/* Create a copy of a unicode string ending with a nul character. Return NULL
				2012	and raise a MemoryError exception on memory allocation failure, otherwise
				2013	return a new allocated buffer (use PyMem_Free() to free the buffer). */
				2014
Victor Stinner	4640860	2010-09-03 16:18:00 +0000	[diff] [blame]	2015	PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2016	PyObject *unicode
				2017	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2018	#endif /* Py_LIMITED_API */
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2019
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	2020	#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	2021	PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
Victor Stinner	7931d9a	2011-11-04 00:22:48 +0100	[diff] [blame]	2022	PyObject *op,
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	2023	int check_content);
				2024	#endif
				2025
Martin v. Löwis	afe55bb	2011-10-09 10:38:36 +0200	[diff] [blame]	2026	/******************* String Literals **************************************/
				2027	/* This structure helps managing static strings. The basic usage goes like this:
				2028	Instead of doing
				2029
				2030	r = PyObject_CallMethod(o, "foo", "args", ...);
				2031
				2032	do
				2033
Martin v. Löwis	bd928fe	2011-10-14 10:20:37 +0200	[diff] [blame]	2034	_Py_IDENTIFIER(foo);
Martin v. Löwis	afe55bb	2011-10-09 10:38:36 +0200	[diff] [blame]	2035	...
				2036	r = _PyObject_CallMethodId(o, &PyId_foo, "args", ...);
				2037
				2038	PyId_foo is a static variable, either on block level or file level. On first
				2039	usage, the string "foo" is interned, and the structures are linked. On interpreter
				2040	shutdown, all strings are released (through _PyUnicode_ClearStaticStrings).
				2041
				2042	Alternatively, _Py_static_string allows to choose the variable name.
Martin v. Löwis	d10759f	2011-11-07 13:00:05 +0100	[diff] [blame]	2043	_PyUnicode_FromId returns a borrowed reference to the interned string.
Martin v. Löwis	afe55bb	2011-10-09 10:38:36 +0200	[diff] [blame]	2044	_PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
				2045	*/
				2046	typedef struct _Py_Identifier {
				2047	struct _Py_Identifier *next;
				2048	const char* string;
				2049	PyObject *object;
				2050	} _Py_Identifier;
				2051
Martin v. Löwis	87da872	2011-10-09 11:54:42 +0200	[diff] [blame]	2052	#define _Py_static_string(varname, value) static _Py_Identifier varname = { 0, value, 0 }
Martin v. Löwis	bd928fe	2011-10-14 10:20:37 +0200	[diff] [blame]	2053	#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)
Martin v. Löwis	afe55bb	2011-10-09 10:38:36 +0200	[diff] [blame]	2054
				2055	/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
				2056	PyAPI_FUNC(PyObject) _PyUnicode_FromId(_Py_Identifier);
				2057	/* Clear all static strings. */
				2058	PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
				2059
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2060	#ifdef __cplusplus
				2061	}
				2062	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2063	#endif /* !Py_UNICODEOBJECT_H */