Blame - Include/unicodeobject.h - platform/external/python/cpython3

blob: bc6ecd4e81e3f2f2bf976c33814ee23e4974d527 [file] [log] [blame]

Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1	#ifndef Py_UNICODEOBJECT_H
				2	#define Py_UNICODEOBJECT_H
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	3
Christian Heimes	af98da1	2008-01-27 15:18:18 +0000	[diff] [blame]	4	#include <stdarg.h>
				5
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	6	/*
				7
				8	Unicode implementation based on original code by Fredrik Lundh,
				9	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	10	Unicode Integration Proposal. (See
				11	http://www.egenix.com/files/python/unicode-proposal.txt).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	12
Guido van Rossum	16b1ad9	2000-08-03 16:24:25 +0000	[diff] [blame]	13	Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	14
				15
				16	Original header:
				17	--------------------------------------------------------------------
				18
				19	* Yet another Unicode string type for Python. This type supports the
				20	* 16-bit Basic Multilingual Plane (BMP) only.
				21	*
				22	* Written by Fredrik Lundh, January 1999.
				23	*
				24	* Copyright (c) 1999 by Secret Labs AB.
				25	* Copyright (c) 1999 by Fredrik Lundh.
				26	*
				27	* fredrik@pythonware.com
				28	* http://www.pythonware.com
				29	*
				30	* --------------------------------------------------------------------
				31	* This Unicode String Type is
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	32	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	33	* Copyright (c) 1999 by Secret Labs AB
				34	* Copyright (c) 1999 by Fredrik Lundh
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	35	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	36	* By obtaining, using, and/or copying this software and/or its
				37	* associated documentation, you agree that you have read, understood,
				38	* and will comply with the following terms and conditions:
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	39	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	40	* Permission to use, copy, modify, and distribute this software and its
				41	* associated documentation for any purpose and without fee is hereby
				42	* granted, provided that the above copyright notice appears in all
				43	* copies, and that both that copyright notice and this permission notice
				44	* appear in supporting documentation, and that the name of Secret Labs
				45	* AB or the author not be used in advertising or publicity pertaining to
				46	* distribution of the software without specific, written prior
				47	* permission.
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	48	*
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	49	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
				50	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
				51	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
				52	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
				53	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
				54	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
				55	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
				56	* -------------------------------------------------------------------- */
				57
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	58	#include <ctype.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	59
				60	/* === Internal API ======================================================= */
				61
				62	/* --- Internal Unicode Format -------------------------------------------- */
				63
Christian Heimes	0625e89	2008-01-07 21:04:21 +0000	[diff] [blame]	64	/* Python 3.x requires unicode */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	65	#define Py_USING_UNICODE
Christian Heimes	0625e89	2008-01-07 21:04:21 +0000	[diff] [blame]	66
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	67	#ifndef SIZEOF_WCHAR_T
				68	#error Must define SIZEOF_WCHAR_T
Fredrik Lundh	9b14ab3	2001-06-26 22:59:49 +0000	[diff] [blame]	69	#endif
				70
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	71	#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
				72
				73	/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
				74	Otherwise, Unicode strings are stored as UCS-2 (with limited support
				75	for UTF-16) */
Fredrik Lundh	8f45585	2001-06-27 18:59:43 +0000	[diff] [blame]	76
				77	#if Py_UNICODE_SIZE >= 4
				78	#define Py_UNICODE_WIDE
Martin v. Löwis	0ba70cc	2001-06-26 22:22:37 +0000	[diff] [blame]	79	#endif
Fredrik Lundh	1294ad0	2001-06-26 17:17:07 +0000	[diff] [blame]	80
Amaury Forgeot d'Arc	feb7307	2010-09-12 22:42:57 +0000	[diff] [blame]	81	/* Set these flags if the platform has "wchar.h" and the
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	82	wchar_t type is a 16-bit unsigned type */
				83	/* #define HAVE_WCHAR_H */
				84	/* #define HAVE_USABLE_WCHAR_T */
				85
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	86	/* Py_UNICODE was the native Unicode storage format (code unit) used by
				87	Python and represents a single Unicode element in the Unicode type.
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	88	With PEP 393, Py_UNICODE is deprecated and replaced with a
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	89	typedef to wchar_t. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	90
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	91	#ifndef Py_LIMITED_API
				92	#define PY_UNICODE_TYPE wchar_t
				93	typedef wchar_t Py_UNICODE;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	94	#endif
				95
				96	/* If the compiler provides a wchar_t type we try to support it
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	97	through the interface functions PyUnicode_FromWideChar(),
				98	PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	99
				100	#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg	1a731c6	2000-08-11 11:43:10 +0000	[diff] [blame]	101	# ifndef HAVE_WCHAR_H
				102	# define HAVE_WCHAR_H
				103	# endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	104	#endif
				105
				106	#ifdef HAVE_WCHAR_H
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	107	/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
				108	# ifdef _HAVE_BSDI
				109	# include <time.h>
				110	# endif
Marc-André Lemburg	5e6007c	2001-09-19 11:21:03 +0000	[diff] [blame]	111	# include <wchar.h>
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	112	#endif
				113
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	114	/* Py_UCS4 and Py_UCS2 are typedefs for the respective
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	115	unicode representations. */
Benjamin Peterson	a13e367	2016-09-08 11:38:28 -0700	[diff] [blame]	116	typedef uint32_t Py_UCS4;
				117	typedef uint16_t Py_UCS2;
				118	typedef uint8_t Py_UCS1;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	119
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	120	/* --- Internal Unicode Operations ---------------------------------------- */
				121
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	122	/* Since splitting on whitespace is an important use case, and
				123	whitespace in most situations is solely ASCII whitespace, we
				124	optimize for the common case by using a quick look-up table
				125	_Py_ascii_whitespace (see below) with an inlined check.
Christian Heimes	190d79e	2008-01-30 11:58:22 +0000	[diff] [blame]	126
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	127	*/
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	128	#ifndef Py_LIMITED_API
Christian Heimes	190d79e	2008-01-30 11:58:22 +0000	[diff] [blame]	129	#define Py_UNICODE_ISSPACE(ch) \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	130	((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	131
				132	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
				133	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
				134	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				135	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				136
				137	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
				138	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
				139	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				140
				141	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				142	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				143	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	144	#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	145
				146	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				147	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				148	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				149
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	150	#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	151
Marc-André Lemburg	a9c103b	2000-07-03 10:52:13 +0000	[diff] [blame]	152	#define Py_UNICODE_ISALNUM(ch) \
				153	(Py_UNICODE_ISALPHA(ch) \|\| \
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	154	Py_UNICODE_ISDECIMAL(ch) \|\| \
				155	Py_UNICODE_ISDIGIT(ch) \|\| \
				156	Py_UNICODE_ISNUMERIC(ch))
Marc-André Lemburg	a9c103b	2000-07-03 10:52:13 +0000	[diff] [blame]	157
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	158	#define Py_UNICODE_COPY(target, source, length) \
Christian Heimes	f051e43	2016-09-13 20:22:02 +0200	[diff] [blame^]	159	memcpy((target), (source), (length)*sizeof(Py_UNICODE))
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	160
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	161	#define Py_UNICODE_FILL(target, value, length) \
				162	do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	163	for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	164	} while (0)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	165
Ezio Melotti	8c9375b	2011-08-22 20:03:25 +0300	[diff] [blame]	166	/* macros to work with surrogates */
Victor Stinner	76df43d	2012-10-30 01:42:39 +0100	[diff] [blame]	167	#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
				168	#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
				169	#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
Ezio Melotti	8c9375b	2011-08-22 20:03:25 +0300	[diff] [blame]	170	/* Join two surrogate characters and return a single Py_UCS4 value. */
				171	#define Py_UNICODE_JOIN_SURROGATES(high, low) \
				172	(((((Py_UCS4)(high) & 0x03FF) << 10) \| \
				173	((Py_UCS4)(low) & 0x03FF)) + 0x10000)
Victor Stinner	551ac95	2011-11-29 22:58:13 +0100	[diff] [blame]	174	/* high surrogate = top 10 bits added to D800 */
Antoine Pitrou	27f6a3b	2012-06-15 22:15:23 +0200	[diff] [blame]	175	#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
Victor Stinner	551ac95	2011-11-29 22:58:13 +0100	[diff] [blame]	176	/* low surrogate = bottom 10 bits added to DC00 */
Antoine Pitrou	27f6a3b	2012-06-15 22:15:23 +0200	[diff] [blame]	177	#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
Ezio Melotti	8c9375b	2011-08-22 20:03:25 +0300	[diff] [blame]	178
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	179	/* Check if substring matches at given offset. The offset must be
				180	valid, and the substring must not be empty. */
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	181
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	182	#define Py_UNICODE_MATCH(string, offset, substring) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	183	((((string)->wstr + (offset)) == ((substring)->wstr)) && \
				184	((((string)->wstr + (offset) + (substring)->wstr_length-1) == ((substring)->wstr + (substring)->wstr_length-1))) && \
				185	!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
				186
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	187	#endif /* Py_LIMITED_API */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	188
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	189	#ifdef __cplusplus
				190	extern "C" {
				191	#endif
				192
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	193	/* --- Unicode Type ------------------------------------------------------- */
				194
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	195	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	196
				197	/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
				198	structure. state.ascii and state.compact are set, and the data
				199	immediately follow the structure. utf8_length and wstr_length can be found
				200	in the length field; the utf8 pointer is equal to the data pointer. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	201	typedef struct {
Éric Araujo	80a348c	2011-10-05 01:11:12 +0200	[diff] [blame]	202	/* There are 4 forms of Unicode strings:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	203
				204	- compact ascii:
				205
				206	* structure = PyASCIIObject
Victor Stinner	7a9105a	2011-12-12 00:13:42 +0100	[diff] [blame]	207	* test: PyUnicode_IS_COMPACT_ASCII(op)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	208	* kind = PyUnicode_1BYTE_KIND
				209	* compact = 1
				210	* ascii = 1
				211	* ready = 1
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	212	* (length is the length of the utf8 and wstr strings)
				213	* (data starts just after the structure)
				214	* (since ASCII is decoded from UTF-8, the utf8 string are the data)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	215
				216	- compact:
				217
				218	* structure = PyCompactUnicodeObject
Victor Stinner	80bc72d	2011-12-22 03:23:10 +0100	[diff] [blame]	219	* test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	220	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				221	PyUnicode_4BYTE_KIND
				222	* compact = 1
				223	* ready = 1
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	224	* ascii = 0
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	225	* utf8 is not shared with data
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	226	* utf8_length = 0 if utf8 is NULL
				227	* wstr is shared with data and wstr_length=length
				228	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	229	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	230	* wstr_length = 0 if wstr is NULL
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	231	* (data starts just after the structure)
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	232
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	233	- legacy string, not ready:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	234
				235	* structure = PyUnicodeObject
Victor Stinner	7a9105a	2011-12-12 00:13:42 +0100	[diff] [blame]	236	* test: kind == PyUnicode_WCHAR_KIND
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	237	* length = 0 (use wstr_length)
				238	* hash = -1
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	239	* kind = PyUnicode_WCHAR_KIND
				240	* compact = 0
Victor Stinner	30134f5	2011-10-04 01:32:45 +0200	[diff] [blame]	241	* ascii = 0
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	242	* ready = 0
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	243	* interned = SSTATE_NOT_INTERNED
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	244	* wstr is not NULL
				245	* data.any is NULL
				246	* utf8 is NULL
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	247	* utf8_length = 0
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	248
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	249	- legacy string, ready:
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	250
				251	* structure = PyUnicodeObject structure
Victor Stinner	7a9105a	2011-12-12 00:13:42 +0100	[diff] [blame]	252	* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	253	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				254	PyUnicode_4BYTE_KIND
				255	* compact = 0
				256	* ready = 1
				257	* data.any is not NULL
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	258	* utf8 is shared and utf8_length = length with data.any if ascii = 1
				259	* utf8_length = 0 if utf8 is NULL
Victor Stinner	e30c0a1	2011-11-04 20:54:05 +0100	[diff] [blame]	260	* wstr is shared with data.any and wstr_length = length
Victor Stinner	a41463c	2011-10-04 01:05:08 +0200	[diff] [blame]	261	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
				262	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
				263	* wstr_length = 0 if wstr is NULL
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	264
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	265	Compact strings use only one memory block (structure + characters),
				266	whereas legacy strings use one block for the structure and one block
				267	for characters.
Victor Stinner	910337b	2011-10-03 03:20:16 +0200	[diff] [blame]	268
Victor Stinner	8cfcbed	2011-10-03 23:19:21 +0200	[diff] [blame]	269	Legacy strings are created by PyUnicode_FromUnicode() and
				270	PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
				271	when PyUnicode_READY() is called.
				272
				273	See also _PyUnicode_CheckConsistency().
				274	*/
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	275	PyObject_HEAD
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	276	Py_ssize_t length; /* Number of code points in the string */
Benjamin Peterson	8f67d08	2010-10-17 20:54:53 +0000	[diff] [blame]	277	Py_hash_t hash; /* Hash value; -1 if not set */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	278	struct {
				279	/*
				280	SSTATE_NOT_INTERNED (0)
				281	SSTATE_INTERNED_MORTAL (1)
				282	SSTATE_INTERNED_IMMORTAL (2)
				283
				284	If interned != SSTATE_NOT_INTERNED, the two references from the
				285	dictionary to this object are not counted in ob_refcnt.
				286	*/
				287	unsigned int interned:2;
				288	/* Character size:
				289
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	290	- PyUnicode_WCHAR_KIND (0):
				291
				292	* character type = wchar_t (16 or 32 bits, depending on the
				293	platform)
				294
				295	- PyUnicode_1BYTE_KIND (1):
				296
				297	* character type = Py_UCS1 (8 bits, unsigned)
Victor Stinner	77faf69	2011-11-20 18:56:05 +0100	[diff] [blame]	298	* all characters are in the range U+0000-U+00FF (latin1)
				299	* if ascii is set, all characters are in the range U+0000-U+007F
				300	(ASCII), otherwise at least one character is in the range
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame]	301	U+0080-U+00FF
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	302
				303	- PyUnicode_2BYTE_KIND (2):
				304
				305	* character type = Py_UCS2 (16 bits, unsigned)
Victor Stinner	77faf69	2011-11-20 18:56:05 +0100	[diff] [blame]	306	* all characters are in the range U+0000-U+FFFF (BMP)
				307	* at least one character is in the range U+0100-U+FFFF
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	308
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	309	- PyUnicode_4BYTE_KIND (4):
Victor Stinner	4d0d54b	2011-10-05 01:31:05 +0200	[diff] [blame]	310
				311	* character type = Py_UCS4 (32 bits, unsigned)
Victor Stinner	77faf69	2011-11-20 18:56:05 +0100	[diff] [blame]	312	* all characters are in the range U+0000-U+10FFFF
				313	* at least one character is in the range U+10000-U+10FFFF
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	314	*/
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	315	unsigned int kind:3;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	316	/* Compact is with respect to the allocation scheme. Compact unicode
				317	objects only require one memory block while non-compact objects use
				318	one block for the PyUnicodeObject struct and another for its data
				319	buffer. */
				320	unsigned int compact:1;
Victor Stinner	77faf69	2011-11-20 18:56:05 +0100	[diff] [blame]	321	/* The string only contains characters in the range U+0000-U+007F (ASCII)
Victor Stinner	1d4b35f	2011-10-06 01:51:19 +0200	[diff] [blame]	322	and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
				323	set, use the PyASCIIObject structure. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	324	unsigned int ascii:1;
				325	/* The ready flag indicates whether the object layout is initialized
				326	completely. This means that this is either a compact object, or
				327	the data pointer is filled out. The bit is redundant, and helps
				328	to minimize the test in PyUnicode_IS_READY(). */
				329	unsigned int ready:1;
Antoine Pitrou	8c6f8dc	2014-03-23 22:55:03 +0100	[diff] [blame]	330	/* Padding to ensure that PyUnicode_DATA() is always aligned to
				331	4 bytes (see issue #19537 on m68k). */
				332	unsigned int :24;
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	333	} state;
				334	wchar_t wstr; / wchar_t representation (null-terminated) */
				335	} PyASCIIObject;
				336
				337	/* Non-ASCII strings allocated through PyUnicode_New use the
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	338	PyCompactUnicodeObject structure. state.compact is set, and the data
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	339	immediately follow the structure. */
				340	typedef struct {
				341	PyASCIIObject _base;
				342	Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
				343	* terminating \0. */
				344	char utf8; / UTF-8 representation (null-terminated) */
				345	Py_ssize_t wstr_length; /* Number of code points in wstr, possible
				346	* surrogates count as two code points. */
				347	} PyCompactUnicodeObject;
				348
				349	/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
				350	PyUnicodeObject structure. The actual string data is initially in the wstr
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	351	block, and copied into the data block using _PyUnicode_Ready. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	352	typedef struct {
				353	PyCompactUnicodeObject _base;
				354	union {
				355	void *any;
				356	Py_UCS1 *latin1;
				357	Py_UCS2 *ucs2;
				358	Py_UCS4 *ucs4;
				359	} data; /* Canonical, smallest-form Unicode buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	360	} PyUnicodeObject;
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	361	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	362
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	363	PyAPI_DATA(PyTypeObject) PyUnicode_Type;
Christian Heimes	a22e8bd	2007-11-29 22:35:39 +0000	[diff] [blame]	364	PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	365
Thomas Wouters	27d517b	2007-02-25 20:39:11 +0000	[diff] [blame]	366	#define PyUnicode_Check(op) \
Christian Heimes	90aa764	2007-12-19 02:45:37 +0000	[diff] [blame]	367	PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
				368	#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	369
				370	/* Fast access macros */
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	371	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	372
				373	#define PyUnicode_WSTR_LENGTH(op) \
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	374	(PyUnicode_IS_COMPACT_ASCII(op) ? \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	375	((PyASCIIObject*)op)->length : \
				376	((PyCompactUnicodeObject*)op)->wstr_length)
				377
				378	/* Returns the deprecated Py_UNICODE representation's size in code units
				379	(this includes surrogate pairs as 2 units).
				380	If the Py_UNICODE representation is not available, it will be computed
				381	on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
				382
Victor Stinner	f3ae620	2011-11-21 02:24:49 +0100	[diff] [blame]	383	#define PyUnicode_GET_SIZE(op) \
				384	(assert(PyUnicode_Check(op)), \
				385	(((PyASCIIObject *)(op))->wstr) ? \
				386	PyUnicode_WSTR_LENGTH(op) : \
				387	((void)PyUnicode_AsUnicode((PyObject *)(op)), \
				388	assert(((PyASCIIObject *)(op))->wstr), \
				389	PyUnicode_WSTR_LENGTH(op)))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	390
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	391	#define PyUnicode_GET_DATA_SIZE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	392	(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
				393
				394	/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
				395	representation on demand. Using this macro is very inefficient now,
				396	try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
				397	use PyUnicode_WRITE() and PyUnicode_READ(). */
				398
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	399	#define PyUnicode_AS_UNICODE(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	400	(assert(PyUnicode_Check(op)), \
				401	(((PyASCIIObject )(op))->wstr) ? (((PyASCIIObject )(op))->wstr) : \
				402	PyUnicode_AsUnicode((PyObject *)(op)))
				403
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	404	#define PyUnicode_AS_DATA(op) \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	405	((const char *)(PyUnicode_AS_UNICODE(op)))
				406
				407
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	408	/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	409
Victor Stinner	6f9568b	2011-11-17 00:12:44 +0100	[diff] [blame]	410	/* Values for PyASCIIObject.state: */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	411
				412	/* Interning state. */
				413	#define SSTATE_NOT_INTERNED 0
				414	#define SSTATE_INTERNED_MORTAL 1
				415	#define SSTATE_INTERNED_IMMORTAL 2
				416
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	417	/* Return true if the string contains only ASCII characters, or 0 if not. The
Victor Stinner	24c74be	2011-12-12 01:24:20 +0100	[diff] [blame]	418	string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
				419	ready. */
				420	#define PyUnicode_IS_ASCII(op) \
				421	(assert(PyUnicode_Check(op)), \
				422	assert(PyUnicode_IS_READY(op)), \
				423	((PyASCIIObject*)op)->state.ascii)
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	424
				425	/* Return true if the string is compact or 0 if not.
				426	No type checks or Ready calls are performed. */
				427	#define PyUnicode_IS_COMPACT(op) \
				428	(((PyASCIIObject*)(op))->state.compact)
				429
				430	/* Return true if the string is a compact ASCII string (use PyASCIIObject
				431	structure), or 0 if not. No type checks or Ready calls are performed. */
				432	#define PyUnicode_IS_COMPACT_ASCII(op) \
Victor Stinner	24c74be	2011-12-12 01:24:20 +0100	[diff] [blame]	433	(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	434
Victor Stinner	52e2cc8	2011-12-19 22:14:45 +0100	[diff] [blame]	435	enum PyUnicode_Kind {
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	436	/* String contains only wstr byte characters. This is only possible
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	437	when the string was created with a legacy API and _PyUnicode_Ready()
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	438	has not been called yet. */
Victor Stinner	52e2cc8	2011-12-19 22:14:45 +0100	[diff] [blame]	439	PyUnicode_WCHAR_KIND = 0,
				440	/* Return values of the PyUnicode_KIND() macro: */
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	441	PyUnicode_1BYTE_KIND = 1,
				442	PyUnicode_2BYTE_KIND = 2,
				443	PyUnicode_4BYTE_KIND = 4
				444	};
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	445
Georg Brandl	4975a9b	2011-10-05 16:12:21 +0200	[diff] [blame]	446	/* Return pointers to the canonical representation cast to unsigned char,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	447	Py_UCS2, or Py_UCS4 for direct character access.
Martin v. Löwis	c47adb0	2011-10-07 20:55:35 +0200	[diff] [blame]	448	No checks are performed, use PyUnicode_KIND() before to ensure
				449	these will work correctly. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	450
				451	#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
				452	#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
				453	#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
				454
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	455	/* Return one of the PyUnicode__KIND values defined above. /
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	456	#define PyUnicode_KIND(op) \
				457	(assert(PyUnicode_Check(op)), \
				458	assert(PyUnicode_IS_READY(op)), \
				459	((PyASCIIObject *)(op))->state.kind)
				460
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	461	/* Return a void pointer to the raw unicode buffer. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	462	#define _PyUnicode_COMPACT_DATA(op) \
Victor Stinner	55c7e00	2011-10-18 23:32:53 +0200	[diff] [blame]	463	(PyUnicode_IS_ASCII(op) ? \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	464	((void)((PyASCIIObject)(op) + 1)) : \
				465	((void)((PyCompactUnicodeObject)(op) + 1)))
				466
				467	#define _PyUnicode_NONCOMPACT_DATA(op) \
				468	(assert(((PyUnicodeObject*)(op))->data.any), \
				469	((((PyUnicodeObject *)(op))->data.any)))
				470
				471	#define PyUnicode_DATA(op) \
				472	(assert(PyUnicode_Check(op)), \
				473	PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
				474	_PyUnicode_NONCOMPACT_DATA(op))
				475
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	476	/* In the access macros below, "kind" may be evaluated more than once.
				477	All other macro parameters are evaluated exactly once, so it is safe
				478	to put side effects into them (such as increasing the index). */
				479
				480	/* Write into the canonical representation, this macro does not do any sanity
				481	checks and is intended for usage in loops. The caller should cache the
Georg Brandl	07de325	2011-10-05 16:47:38 +0200	[diff] [blame]	482	kind and data pointers obtained from other macro calls.
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	483	index is the index in the string (starts at 0) and value is the new
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	484	code point value which should be written to that location. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	485	#define PyUnicode_WRITE(kind, data, index, value) \
				486	do { \
				487	switch ((kind)) { \
				488	case PyUnicode_1BYTE_KIND: { \
				489	((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
				490	break; \
				491	} \
				492	case PyUnicode_2BYTE_KIND: { \
				493	((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
				494	break; \
				495	} \
				496	default: { \
				497	assert((kind) == PyUnicode_4BYTE_KIND); \
				498	((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
				499	} \
				500	} \
				501	} while (0)
				502
Georg Brandl	07de325	2011-10-05 16:47:38 +0200	[diff] [blame]	503	/* Read a code point from the string's canonical representation. No checks
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	504	or ready calls are performed. */
				505	#define PyUnicode_READ(kind, data, index) \
				506	((Py_UCS4) \
				507	((kind) == PyUnicode_1BYTE_KIND ? \
Victor Stinner	7a48ff7	2011-10-02 00:55:25 +0200	[diff] [blame]	508	((const Py_UCS1 *)(data))[(index)] : \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	509	((kind) == PyUnicode_2BYTE_KIND ? \
				510	((const Py_UCS2 *)(data))[(index)] : \
				511	((const Py_UCS4 *)(data))[(index)] \
				512	) \
				513	))
				514
				515	/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
				516	calls PyUnicode_KIND() and might call it twice. For single reads, use
				517	PyUnicode_READ_CHAR, for multiple consecutive reads callers should
				518	cache kind and use PyUnicode_READ instead. */
				519	#define PyUnicode_READ_CHAR(unicode, index) \
Victor Stinner	3794376	2011-10-02 20:33:18 +0200	[diff] [blame]	520	(assert(PyUnicode_Check(unicode)), \
				521	assert(PyUnicode_IS_READY(unicode)), \
				522	(Py_UCS4) \
				523	(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
				524	((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
				525	(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
				526	((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
				527	((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
				528	) \
				529	))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	530
				531	/* Returns the length of the unicode string. The caller has to make sure that
				532	the string has it's canonical representation set before calling
				533	this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
				534	#define PyUnicode_GET_LENGTH(op) \
				535	(assert(PyUnicode_Check(op)), \
				536	assert(PyUnicode_IS_READY(op)), \
				537	((PyASCIIObject *)(op))->length)
				538
				539
				540	/* Fast check to determine whether an object is ready. Equivalent to
				541	PyUnicode_IS_COMPACT(op) \|\| ((PyUnicodeObject)(op))->data.any) /
				542
				543	#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
				544
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	545	/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	546	case. If the canonical representation is not yet set, it will still call
Victor Stinner	a3b334d	2011-10-03 13:53:37 +0200	[diff] [blame]	547	_PyUnicode_Ready().
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	548	Returns 0 on success and -1 on errors. */
				549	#define PyUnicode_READY(op) \
				550	(assert(PyUnicode_Check(op)), \
				551	(PyUnicode_IS_READY(op) ? \
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	552	0 : _PyUnicode_Ready((PyObject *)(op))))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	553
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	554	/* Return a maximum character value which is suitable for creating another
				555	string based on op. This is always an approximation but more efficient
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	556	than iterating over the string. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	557	#define PyUnicode_MAX_CHAR_VALUE(op) \
				558	(assert(PyUnicode_IS_READY(op)), \
Victor Stinner	8813104	2011-10-13 01:12:01 +0200	[diff] [blame]	559	(PyUnicode_IS_ASCII(op) ? \
				560	(0x7f) : \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	561	(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
Victor Stinner	8813104	2011-10-13 01:12:01 +0200	[diff] [blame]	562	(0xffU) : \
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	563	(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
Victor Stinner	8813104	2011-10-13 01:12:01 +0200	[diff] [blame]	564	(0xffffU) : \
				565	(0x10ffffU)))))
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	566
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	567	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	568
				569	/* --- Constants ---------------------------------------------------------- */
				570
				571	/* This Unicode character will be used as replacement character during
				572	decoding if the errors argument is set to "replace". Note: the
				573	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
				574	Unicode 3.0. */
				575
Victor Stinner	5ce1b0d	2011-09-28 20:29:27 +0200	[diff] [blame]	576	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	577
				578	/* === Public API ========================================================= */
				579
				580	/* --- Plain Py_UNICODE --------------------------------------------------- */
				581
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	582	/* With PEP 393, this is the recommended way to allocate a new unicode object.
				583	This function will allocate the object and its buffer in a single memory
				584	block. Objects created using this function are not resizable. */
				585	#ifndef Py_LIMITED_API
				586	PyAPI_FUNC(PyObject*) PyUnicode_New(
				587	Py_ssize_t size, /* Number of code points in the new string */
				588	Py_UCS4 maxchar /* maximum code point value in the string */
				589	);
				590	#endif
				591
Benjamin Peterson	82f34ad	2015-01-13 09:17:24 -0500	[diff] [blame]	592	/* Initializes the canonical string representation from the deprecated
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	593	wstr/Py_UNICODE representation. This function is used to convert Unicode
				594	objects which were created using the old API to the new flexible format
				595	introduced with PEP 393.
				596
				597	Don't call this function directly, use the public PyUnicode_READY() macro
				598	instead. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	599	#ifndef Py_LIMITED_API
				600	PyAPI_FUNC(int) _PyUnicode_Ready(
Victor Stinner	d8f6510	2011-09-29 19:43:17 +0200	[diff] [blame]	601	PyObject unicode / Unicode object */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	602	);
				603	#endif
				604
Victor Stinner	034f6cf	2011-09-30 02:26:44 +0200	[diff] [blame]	605	/* Get a copy of a Unicode string. */
Victor Stinner	bf6e560	2011-12-12 01:53:47 +0100	[diff] [blame]	606	#ifndef Py_LIMITED_API
				607	PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
Victor Stinner	034f6cf	2011-09-30 02:26:44 +0200	[diff] [blame]	608	PyObject *unicode
				609	);
Victor Stinner	bf6e560	2011-12-12 01:53:47 +0100	[diff] [blame]	610	#endif
Victor Stinner	034f6cf	2011-09-30 02:26:44 +0200	[diff] [blame]	611
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	612	/* Copy character from one unicode object into another, this function performs
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	613	character conversion when necessary and falls back to memcpy() if possible.
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	614
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	615	Fail if to is too small (smaller than how_many or smaller than
Victor Stinner	a0702ab	2011-09-29 14:14:38 +0200	[diff] [blame]	616	len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	617	kind(to), or if to has more than 1 reference.
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	618
				619	Return the number of written character, or return -1 and raise an exception
				620	on error.
				621
				622	Pseudo-code:
				623
				624	how_many = min(how_many, len(from) - from_start)
				625	to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
				626	return how_many
Victor Stinner	a0702ab	2011-09-29 14:14:38 +0200	[diff] [blame]	627
				628	Note: The function doesn't write a terminating null character.
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	629	*/
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	630	#ifndef Py_LIMITED_API
Victor Stinner	be78eaf	2011-09-28 21:37:03 +0200	[diff] [blame]	631	PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	632	PyObject *to,
				633	Py_ssize_t to_start,
				634	PyObject *from,
				635	Py_ssize_t from_start,
				636	Py_ssize_t how_many
				637	);
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	638
				639	/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
				640	may crash if parameters are invalid (e.g. if the output string
				641	is too short). */
				642	PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
				643	PyObject *to,
				644	Py_ssize_t to_start,
				645	PyObject *from,
				646	Py_ssize_t from_start,
				647	Py_ssize_t how_many
				648	);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	649	#endif
				650
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	651	#ifndef Py_LIMITED_API
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	652	/* Fill a string with a character: write fill_char into
				653	unicode[start:start+length].
				654
				655	Fail if fill_char is bigger than the string maximum character, or if the
				656	string has more than 1 reference.
				657
				658	Return the number of written character, or return -1 and raise an exception
				659	on error. */
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	660	PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
				661	PyObject *unicode,
				662	Py_ssize_t start,
				663	Py_ssize_t length,
				664	Py_UCS4 fill_char
				665	);
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	666
				667	/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
				668	if parameters are invalid (e.g. if length is longer than the string). */
				669	PyAPI_FUNC(void) _PyUnicode_FastFill(
				670	PyObject *unicode,
				671	Py_ssize_t start,
				672	Py_ssize_t length,
				673	Py_UCS4 fill_char
				674	);
Victor Stinner	3fe5531	2012-01-04 00:33:50 +0100	[diff] [blame]	675	#endif
				676
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	677	/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	678	size.
Marc-André Lemburg	8155e0e	2001-04-23 14:44:21 +0000	[diff] [blame]	679
				680	u may be NULL which causes the contents to be undefined. It is the
				681	user's responsibility to fill in the needed data afterwards. Note
				682	that modifying the Unicode object contents after construction is
				683	only allowed if u was set to NULL.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	684
				685	The buffer is copied into the new object. */
				686
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	687	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	688	PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	689	const Py_UNICODE u, / Unicode buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	690	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	691	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	692	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	693
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	694	/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	695	PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
Victor Stinner	0d71116	2010-12-27 02:39:20 +0000	[diff] [blame]	696	const char u, / UTF-8 encoded string */
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	697	Py_ssize_t size /* size of buffer */
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	698	);
				699
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	700	/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	701	UTF-8 encoded bytes. The size is determined with strlen(). */
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	702	PyAPI_FUNC(PyObject*) PyUnicode_FromString(
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	703	const char u / UTF-8 encoded string */
Walter Dörwald	acaa5a1	2007-05-05 12:00:46 +0000	[diff] [blame]	704	);
				705
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	706	#ifndef Py_LIMITED_API
Victor Stinner	b9275c1	2011-10-05 14:01:42 +0200	[diff] [blame]	707	/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
				708	Scan the string to find the maximum character. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	709	PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
				710	int kind,
				711	const void *buffer,
				712	Py_ssize_t size);
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	713
				714	/* Create a new string from a buffer of ASCII characters.
				715	WARNING: Don't check if the string contains any non-ASCII character. */
				716	PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
				717	const char *buffer,
				718	Py_ssize_t size);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	719	#endif
				720
				721	PyAPI_FUNC(PyObject*) PyUnicode_Substring(
				722	PyObject *str,
				723	Py_ssize_t start,
				724	Py_ssize_t end);
				725
Victor Stinner	ece58de	2012-04-23 23:36:38 +0200	[diff] [blame]	726	#ifndef Py_LIMITED_API
				727	/* Compute the maximum character of the substring unicode[start:end].
				728	Return 127 for an empty string. */
				729	PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
				730	PyObject *unicode,
				731	Py_ssize_t start,
				732	Py_ssize_t end);
				733	#endif
				734
Georg Brandl	db6c7f5	2011-10-07 11:19:11 +0200	[diff] [blame]	735	/* Copy the string into a UCS4 buffer including the null character if copy_null
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	736	is set. Return NULL and raise an exception on error. Raise a ValueError if
				737	the buffer is smaller than the string. Return buffer on success.
				738
				739	buflen is the length of the buffer in (Py_UCS4) characters. */
				740	PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
				741	PyObject *unicode,
				742	Py_UCS4* buffer,
				743	Py_ssize_t buflen,
				744	int copy_null);
				745
				746	/* Copy the string into a UCS4 buffer. A new buffer is allocated using
				747	* PyMem_Malloc; if this fails, NULL is returned with a memory error
				748	exception set. */
				749	PyAPI_FUNC(Py_UCS4) PyUnicode_AsUCS4Copy(PyObject unicode);
				750
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	751	/* Return a read-only pointer to the Unicode object's internal
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	752	Py_UNICODE buffer.
				753	If the wchar_t/Py_UNICODE representation is not yet available, this
				754	function will calculate it. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	755
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	756	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	757	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	758	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	759	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	760	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	761
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	762	/* Return a read-only pointer to the Unicode object's internal
				763	Py_UNICODE buffer and save the length at size.
				764	If the wchar_t/Py_UNICODE representation is not yet available, this
				765	function will calculate it. */
				766
				767	#ifndef Py_LIMITED_API
				768	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
				769	PyObject unicode, / Unicode object */
				770	Py_ssize_t size / location where to save the length */
				771	);
				772	#endif
				773
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	774	/* Get the length of the Unicode object. */
				775
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	776	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
				777	PyObject *unicode
				778	);
				779
Victor Stinner	157f83f	2011-09-28 21:41:31 +0200	[diff] [blame]	780	/* Get the number of Py_UNICODE units in the
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	781	string representation. */
				782
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	783	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	784	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	785	);
				786
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	787	/* Read a character from the string. */
				788
				789	PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
				790	PyObject *unicode,
				791	Py_ssize_t index
				792	);
				793
				794	/* Write a character to the string. The string must have been created through
Victor Stinner	cd9950f	2011-10-02 00:34:53 +0200	[diff] [blame]	795	PyUnicode_New, must not be shared, and must not have been hashed yet.
				796
				797	Return 0 on success, -1 on error. */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	798
				799	PyAPI_FUNC(int) PyUnicode_WriteChar(
				800	PyObject *unicode,
				801	Py_ssize_t index,
				802	Py_UCS4 character
				803	);
				804
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	805	#ifndef Py_LIMITED_API
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	806	/* Get the maximum ordinal for a Unicode character. */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	807	PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	808	#endif
Martin v. Löwis	ce9b5a5	2001-06-27 06:28:56 +0000	[diff] [blame]	809
Martin Panter	6245cb3	2016-04-15 02:14:19 +0000	[diff] [blame]	810	/* Resize a Unicode object. The length is the number of characters, except
Victor Stinner	b0a82a6	2011-12-12 13:08:33 +0100	[diff] [blame]	811	if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
				812	is the number of Py_UNICODE characters.
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	813
				814	*unicode is modified to point to the new (resized) object and 0
				815	returned on success.
				816
Victor Stinner	b0a82a6	2011-12-12 13:08:33 +0100	[diff] [blame]	817	Try to resize the string in place (which is usually faster than allocating
				818	a new string and copy characters), or create a new string.
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	819
				820	Error handling is implemented as follows: an exception is set, -1
Victor Stinner	16e6a80	2011-12-12 13:24:15 +0100	[diff] [blame]	821	is returned and *unicode left untouched.
				822
				823	WARNING: The function doesn't check string content, the result may not be a
				824	string in canonical representation. */
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	825
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	826	PyAPI_FUNC(int) PyUnicode_Resize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	827	PyObject *unicode, / Pointer to the Unicode object */
				828	Py_ssize_t length /* New length */
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	829	);
				830
Serhiy Storchaka	6a7b3a7	2016-04-17 08:32:47 +0300	[diff] [blame]	831	/* Decode obj to a Unicode object.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	832
Martin Panter	20d3255	2016-04-15 00:56:21 +0000	[diff] [blame]	833	bytes, bytearray and other bytes-like objects are decoded according to the
				834	given encoding and error handler. The encoding and error handler can be
				835	NULL to have the interface use UTF-8 and "strict".
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	836
Martin Panter	20d3255	2016-04-15 00:56:21 +0000	[diff] [blame]	837	All other objects (including Unicode objects) raise an exception.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	838
				839	The API returns NULL in case of an error. The caller is responsible
				840	for decref'ing the returned objects.
				841
				842	*/
				843
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	844	PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
Antoine Pitrou	9ed5f27	2013-08-13 20:18:52 +0200	[diff] [blame]	845	PyObject obj, / Object */
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	846	const char encoding, / encoding */
				847	const char errors / error handling */
				848	);
				849
Martin Panter	20d3255	2016-04-15 00:56:21 +0000	[diff] [blame]	850	/* Copy an instance of a Unicode subtype to a new true Unicode object if
				851	necessary. If obj is already a true Unicode object (not a subtype), return
				852	the reference with incremented refcount.
Marc-André Lemburg	5a5c81a	2000-07-07 13:46:42 +0000	[diff] [blame]	853
				854	The API returns NULL in case of an error. The caller is responsible
				855	for decref'ing the returned objects.
				856
				857	*/
				858
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	859	PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
Antoine Pitrou	9ed5f27	2013-08-13 20:18:52 +0200	[diff] [blame]	860	PyObject obj / Object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	861	);
				862
Victor Stinner	1205f27	2010-09-11 00:54:47 +0000	[diff] [blame]	863	PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
				864	const char format, / ASCII-encoded string */
				865	va_list vargs
				866	);
				867	PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
				868	const char format, / ASCII-encoded string */
				869	...
				870	);
Walter Dörwald	d203431	2007-05-18 16:29:38 +0000	[diff] [blame]	871
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	872	#ifndef Py_LIMITED_API
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	873	typedef struct {
				874	PyObject *buffer;
				875	void *data;
				876	enum PyUnicode_Kind kind;
				877	Py_UCS4 maxchar;
				878	Py_ssize_t size;
				879	Py_ssize_t pos;
Victor Stinner	8f674cc	2013-04-17 23:02:17 +0200	[diff] [blame]	880
				881	/* minimum number of allocated characters (default: 0) */
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	882	Py_ssize_t min_length;
Victor Stinner	8f674cc	2013-04-17 23:02:17 +0200	[diff] [blame]	883
				884	/* minimum character (default: 127, ASCII) */
				885	Py_UCS4 min_char;
				886
Victor Stinner	fdfbf78	2015-10-09 00:33:49 +0200	[diff] [blame]	887	/* If non-zero, overallocate the buffer (default: 0). */
Victor Stinner	d7b7c74	2012-06-04 22:52:12 +0200	[diff] [blame]	888	unsigned char overallocate;
Victor Stinner	8f674cc	2013-04-17 23:02:17 +0200	[diff] [blame]	889
Victor Stinner	d7b7c74	2012-06-04 22:52:12 +0200	[diff] [blame]	890	/* If readonly is 1, buffer is a shared string (cannot be modified)
				891	and size is set to 0. */
				892	unsigned char readonly;
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	893	} _PyUnicodeWriter ;
				894
				895	/* Initialize a Unicode writer.
Victor Stinner	8f674cc	2013-04-17 23:02:17 +0200	[diff] [blame]	896	*
				897	* By default, the minimum buffer size is 0 character and overallocation is
				898	* disabled. Set min_length, min_char and overallocate attributes to control
				899	* the allocation of the buffer. */
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	900	PyAPI_FUNC(void)
Victor Stinner	8f674cc	2013-04-17 23:02:17 +0200	[diff] [blame]	901	_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	902
				903	/* Prepare the buffer to write 'length' characters
				904	with the specified maximum character.
				905
				906	Return 0 on success, raise an exception and return -1 on error. */
				907	#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
				908	(((MAXCHAR) <= (WRITER)->maxchar \
				909	&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
				910	? 0 \
				911	: (((LENGTH) == 0) \
				912	? 0 \
				913	: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
				914
				915	/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
				916	instead. */
				917	PyAPI_FUNC(int)
				918	_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
				919	Py_ssize_t length, Py_UCS4 maxchar);
				920
Victor Stinner	ca9381e	2015-09-22 00:58:32 +0200	[diff] [blame]	921	/* Prepare the buffer to have at least the kind KIND.
				922	For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
				923	support characters in range U+000-U+FFFF.
				924
				925	Return 0 on success, raise an exception and return -1 on error. */
				926	#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
				927	(assert((KIND) != PyUnicode_WCHAR_KIND), \
				928	(KIND) <= (WRITER)->kind \
				929	? 0 \
				930	: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
				931
				932	/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
				933	macro instead. */
				934	PyAPI_FUNC(int)
				935	_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
				936	enum PyUnicode_Kind kind);
				937
Victor Stinner	a0dd021	2013-04-11 22:09:04 +0200	[diff] [blame]	938	/* Append a Unicode character.
				939	Return 0 on success, raise an exception and return -1 on error. */
				940	PyAPI_FUNC(int)
				941	_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
				942	Py_UCS4 ch
				943	);
				944
Victor Stinner	e215d96	2012-10-06 23:03:36 +0200	[diff] [blame]	945	/* Append a Unicode string.
				946	Return 0 on success, raise an exception and return -1 on error. */
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	947	PyAPI_FUNC(int)
Victor Stinner	e215d96	2012-10-06 23:03:36 +0200	[diff] [blame]	948	_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
				949	PyObject str / Unicode string */
				950	);
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	951
Victor Stinner	cfc4c13	2013-04-03 01:48:39 +0200	[diff] [blame]	952	/* Append a substring of a Unicode string.
				953	Return 0 on success, raise an exception and return -1 on error. */
				954	PyAPI_FUNC(int)
				955	_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
				956	PyObject str, / Unicode string */
				957	Py_ssize_t start,
				958	Py_ssize_t end
				959	);
				960
Serhiy Storchaka	d65c949	2015-11-02 14:10:23 +0200	[diff] [blame]	961	/* Append an ASCII-encoded byte string.
Victor Stinner	4a58707	2013-11-19 12:54:53 +0100	[diff] [blame]	962	Return 0 on success, raise an exception and return -1 on error. */
				963	PyAPI_FUNC(int)
				964	_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
				965	const char str, / ASCII-encoded byte string */
				966	Py_ssize_t len /* number of bytes, or -1 if unknown */
				967	);
				968
Victor Stinner	e215d96	2012-10-06 23:03:36 +0200	[diff] [blame]	969	/* Append a latin1-encoded byte string.
				970	Return 0 on success, raise an exception and return -1 on error. */
				971	PyAPI_FUNC(int)
Victor Stinner	4a58707	2013-11-19 12:54:53 +0100	[diff] [blame]	972	_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
				973	const char str, / latin1-encoded byte string */
				974	Py_ssize_t len /* length in bytes */
Victor Stinner	e215d96	2012-10-06 23:03:36 +0200	[diff] [blame]	975	);
				976
Martin Panter	6245cb3	2016-04-15 02:14:19 +0000	[diff] [blame]	977	/* Get the value of the writer as a Unicode string. Clear the
Victor Stinner	e215d96	2012-10-06 23:03:36 +0200	[diff] [blame]	978	buffer of the writer. Raise an exception and return NULL
				979	on error. */
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	980	PyAPI_FUNC(PyObject *)
				981	_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
				982
Victor Stinner	e215d96	2012-10-06 23:03:36 +0200	[diff] [blame]	983	/* Deallocate memory of a writer (clear its internal buffer). */
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	984	PyAPI_FUNC(void)
				985	_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
				986	#endif
				987
				988	#ifndef Py_LIMITED_API
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	989	/* Format the object based on the format_spec, as defined in PEP 3101
				990	(Advanced String Formatting). */
Victor Stinner	d3f0882	2012-05-29 12:57:52 +0200	[diff] [blame]	991	PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
				992	_PyUnicodeWriter *writer,
				993	PyObject *obj,
				994	PyObject *format_spec,
				995	Py_ssize_t start,
				996	Py_ssize_t end);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	997	#endif
Eric Smith	4a7d76d	2008-05-30 18:10:19 +0000	[diff] [blame]	998
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	999	PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
				1000	PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	1001	PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
				1002	const char u / UTF-8 encoded string */
				1003	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1004	#ifndef Py_LIMITED_API
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	1005	PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1006	#endif
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	1007
				1008	/* Use only if you know it's a string */
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1009	#define PyUnicode_CHECK_INTERNED(op) \
				1010	(((PyASCIIObject *)(op))->state.interned)
Walter Dörwald	1680713	2007-05-25 13:52:07 +0000	[diff] [blame]	1011
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1012	/* --- wchar_t support for platforms which support it --------------------- */
				1013
				1014	#ifdef HAVE_WCHAR_H
				1015
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	1016	/* Create a Unicode Object from the wchar_t buffer w of the given
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1017	size.
				1018
				1019	The buffer is copied into the new object. */
				1020
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1021	PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
Antoine Pitrou	9ed5f27	2013-08-13 20:18:52 +0200	[diff] [blame]	1022	const wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1023	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1024	);
				1025
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	1026	/* Copies the Unicode Object contents into the wchar_t buffer w. At
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1027	most size wchar_t characters are copied.
				1028
Marc-André Lemburg	a9cadcd	2004-11-22 13:02:31 +0000	[diff] [blame]	1029	Note that the resulting wchar_t string may or may not be
				1030	0-terminated. It is the responsibility of the caller to make sure
				1031	that the wchar_t string is 0-terminated in case this is required by
				1032	the application.
				1033
				1034	Returns the number of wchar_t characters copied (excluding a
				1035	possibly trailing 0-termination character) or -1 in case of an
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1036	error. */
				1037
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1038	PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1039	PyObject unicode, / Unicode object */
Antoine Pitrou	9ed5f27	2013-08-13 20:18:52 +0200	[diff] [blame]	1040	wchar_t w, / wchar_t buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1041	Py_ssize_t size /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1042	);
				1043
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	1044	/* Convert the Unicode object to a wide character string. The output string
				1045	always ends with a nul character. If size is not NULL, write the number of
Victor Stinner	d88d983	2011-09-06 02:00:05 +0200	[diff] [blame]	1046	wide characters (excluding the null character) into *size.
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	1047
Victor Stinner	22fabe2	2015-02-11 18:17:56 +0100	[diff] [blame]	1048	Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	1049	on success. On error, returns NULL, *size is undefined and raises a
				1050	MemoryError. */
				1051
				1052	PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Victor Stinner	beb4135b	2010-10-07 01:02:42 +0000	[diff] [blame]	1053	PyObject unicode, / Unicode object */
Victor Stinner	137c34c	2010-09-29 10:25:54 +0000	[diff] [blame]	1054	Py_ssize_t size / number of characters of the result */
				1055	);
				1056
Victor Stinner	9f789e7	2011-10-01 03:57:28 +0200	[diff] [blame]	1057	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1058	PyAPI_FUNC(void) _PyUnicode_AsKind(PyObject s, unsigned int kind);
Victor Stinner	9f789e7	2011-10-01 03:57:28 +0200	[diff] [blame]	1059	#endif
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1060
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1061	#endif
				1062
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	1063	/* --- Unicode ordinals --------------------------------------------------- */
				1064
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1065	/* Create a Unicode Object from the given Unicode code point ordinal.
				1066
Ezio Melotti	e7f9037	2012-10-05 03:33:31 +0300	[diff] [blame]	1067	The ordinal must be in range(0x110000). A ValueError is
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	1068	raised in case it is not.
				1069
				1070	*/
				1071
Marc-André Lemburg	9c329de	2002-08-12 08:19:10 +0000	[diff] [blame]	1072	PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
Marc-André Lemburg	cc8764c	2002-08-11 12:23:04 +0000	[diff] [blame]	1073
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	1074	/* --- Free-list management ----------------------------------------------- */
				1075
				1076	/* Clear the free list used by the Unicode implementation.
				1077
				1078	This can be used to release memory used for objects on the free
				1079	list back to the Python memory allocator.
				1080
				1081	*/
				1082
				1083	PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
				1084
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1085	/* === Builtin Codecs =====================================================
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1086
				1087	Many of these APIs take two arguments encoding and errors. These
				1088	parameters encoding and errors have the same semantics as the ones
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	1089	of the builtin str() API.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1090
Georg Brandl	952867a	2010-06-27 10:17:12 +0000	[diff] [blame]	1091	Setting encoding to NULL causes the default encoding (UTF-8) to be used.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1092
				1093	Error handling is set by errors which may also be set to NULL
				1094	meaning to use the default handling defined for the codec. Default
				1095	error handling for all builtin codecs is "strict" (ValueErrors are
				1096	raised).
				1097
				1098	The codecs all use a similar interface. Only deviation from the
				1099	generic ones are documented.
				1100
				1101	*/
				1102
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	1103	/* --- Manage the default encoding ---------------------------------------- */
				1104
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	1105	/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	1106	Unicode object unicode and the size of the encoded representation
				1107	in bytes stored in *size.
Christian Heimes	5894ba7	2007-11-04 11:43:14 +0000	[diff] [blame]	1108
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	1109	In case of an error, no *size is set.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	1110
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	1111	This function caches the UTF-8 encoded string in the unicodeobject
				1112	and subsequent calls will return the same string. The memory is released
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1113	when the unicodeobject is deallocated.
				1114
				1115	_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
				1116	support the previous internal function with the same behaviour.
				1117
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	1118	*** This API is for interpreter INTERNAL USE ONLY and will likely
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	1119	*** be removed or changed in the future.
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	1120
				1121	*** If you need to access the Unicode object as UTF-8 bytes string,
				1122	*** please use PyUnicode_AsUTF8String() instead.
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1123	*/
				1124
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1125	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1126	PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1127	PyObject *unicode,
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	1128	Py_ssize_t *size);
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1129	#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1130	#endif
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	1131
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	1132	/* Returns a pointer to the default encoding (UTF-8) of the
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	1133	Unicode object unicode.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	1134
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1135	Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
				1136	in the unicodeobject.
				1137
				1138	_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
				1139	support the previous internal function with the same behaviour.
				1140
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	1141	Use of this API is DEPRECATED since no size information can be
Marc-André Lemburg	4cc0f24	2008-08-07 18:54:33 +0000	[diff] [blame]	1142	extracted from the returned data.
				1143
				1144	*** This API is for interpreter INTERNAL USE ONLY and will likely
				1145	*** be removed or changed for Python 3.1.
				1146
				1147	*** If you need to access the Unicode object as UTF-8 bytes string,
				1148	*** please use PyUnicode_AsUTF8String() instead.
Guido van Rossum	7d1df6c	2007-08-29 13:53:23 +0000	[diff] [blame]	1149
Marc-André Lemburg	9155aa7	2008-04-29 11:14:08 +0000	[diff] [blame]	1150	*/
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1151
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1152	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1153	PyAPI_FUNC(char ) PyUnicode_AsUTF8(PyObject unicode);
				1154	#define _PyUnicode_AsString PyUnicode_AsUTF8
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1155	#endif
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	1156
Alexander Belopolsky	83283c2	2010-11-16 14:29:01 +0000	[diff] [blame]	1157	/* Returns "utf-8". */
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	1158
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1159	PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	1160
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1161	/* --- Generic Codecs ----------------------------------------------------- */
				1162
				1163	/* Create a Unicode object by decoding the encoded string s of the
				1164	given size. */
				1165
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1166	PyAPI_FUNC(PyObject*) PyUnicode_Decode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1167	const char s, / encoded string */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1168	Py_ssize_t size, /* size of buffer */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1169	const char encoding, / encoding */
				1170	const char errors / error handling */
				1171	);
				1172
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1173	/* Decode a Unicode object unicode and return the result as Python
				1174	object. */
				1175
				1176	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1177	PyObject unicode, / Unicode object */
				1178	const char encoding, / encoding */
				1179	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1180	);
				1181
				1182	/* Decode a Unicode object unicode and return the result as Unicode
				1183	object. */
				1184
				1185	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1186	PyObject unicode, / Unicode object */
				1187	const char encoding, / encoding */
				1188	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1189	);
				1190
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1191	/* Encodes a Py_UNICODE buffer of the given size and returns a
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1192	Python string object. */
				1193
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1194	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1195	PyAPI_FUNC(PyObject*) PyUnicode_Encode(
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1196	const Py_UNICODE s, / Unicode char buffer */
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1197	Py_ssize_t size, /* number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1198	const char encoding, / encoding */
				1199	const char errors / error handling */
				1200	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1201	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1202
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	1203	/* Encodes a Unicode object and returns the result as Python
				1204	object. */
				1205
				1206	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1207	PyObject unicode, / Unicode object */
				1208	const char encoding, / encoding */
				1209	const char errors / error handling */
Marc-André Lemburg	d2d4598	2004-07-08 17:57:32 +0000	[diff] [blame]	1210	);
				1211
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1212	/* Encodes a Unicode object and returns the result as Python string
				1213	object. */
				1214
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1215	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1216	PyObject unicode, / Unicode object */
				1217	const char encoding, / encoding */
				1218	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1219	);
				1220
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1221	/* Encodes a Unicode object and returns the result as Unicode
				1222	object. */
				1223
				1224	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1225	PyObject unicode, / Unicode object */
				1226	const char encoding, / encoding */
				1227	const char errors / error handling */
Marc-André Lemburg	b2750b5	2008-06-06 12:18:17 +0000	[diff] [blame]	1228	);
				1229
				1230	/* Build an encoding map. */
				1231
Thomas Wouters	73e5a5b	2006-06-08 15:35:45 +0000	[diff] [blame]	1232	PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
				1233	PyObject* string /* 256 character map */
				1234	);
				1235
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1236	/* --- UTF-7 Codecs ------------------------------------------------------- */
				1237
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1238	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1239	const char string, / UTF-7 encoded string */
				1240	Py_ssize_t length, /* size of string */
				1241	const char errors / error handling */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1242	);
				1243
Christian Heimes	5d14c2b	2007-11-20 23:38:09 +0000	[diff] [blame]	1244	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1245	const char string, / UTF-7 encoded string */
				1246	Py_ssize_t length, /* size of string */
				1247	const char errors, / error handling */
				1248	Py_ssize_t consumed / bytes consumed */
Christian Heimes	5d14c2b	2007-11-20 23:38:09 +0000	[diff] [blame]	1249	);
				1250
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1251	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1252	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1253	const Py_UNICODE data, / Unicode char buffer */
				1254	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1255	int base64SetO, /* Encode RFC2152 Set O characters in base64 */
				1256	int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
				1257	const char errors / error handling */
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1258	);
Martin v. Löwis	1db7c13	2011-11-10 18:24:32 +0100	[diff] [blame]	1259	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
				1260	PyObject unicode, / Unicode object */
				1261	int base64SetO, /* Encode RFC2152 Set O characters in base64 */
				1262	int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
				1263	const char errors / error handling */
				1264	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1265	#endif
Marc-André Lemburg	c60e6f7	2001-09-20 10:35:46 +0000	[diff] [blame]	1266
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1267	/* --- UTF-8 Codecs ------------------------------------------------------- */
				1268
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1269	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1270	const char string, / UTF-8 encoded string */
				1271	Py_ssize_t length, /* size of string */
				1272	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1273	);
				1274
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1275	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1276	const char string, / UTF-8 encoded string */
				1277	Py_ssize_t length, /* size of string */
				1278	const char errors, / error handling */
				1279	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1280	);
				1281
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1282	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1283	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1284	);
				1285
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1286	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1287	PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
				1288	PyObject *unicode,
				1289	const char *errors);
				1290
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1291	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1292	const Py_UNICODE data, / Unicode char buffer */
				1293	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1294	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1295	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1296	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1297
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1298	/* --- UTF-32 Codecs ------------------------------------------------------ */
				1299
				1300	/* Decodes length bytes from a UTF-32 encoded buffer string and returns
				1301	the corresponding Unicode object.
				1302
				1303	errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1304	to "strict".
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1305
				1306	If byteorder is non-NULL, the decoder starts decoding using the
				1307	given byte order:
				1308
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1309	*byteorder == -1: little endian
				1310	*byteorder == 0: native order
				1311	*byteorder == 1: big endian
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1312
				1313	In native mode, the first four bytes of the stream are checked for a
				1314	BOM mark. If found, the BOM mark is analysed, the byte order
				1315	adjusted and the BOM skipped. In the other modes, no BOM mark
				1316	interpretation is done. After completion, *byteorder is set to the
				1317	current byte order at the end of input data.
				1318
				1319	If byteorder is NULL, the codec starts in native order mode.
				1320
				1321	*/
				1322
				1323	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1324	const char string, / UTF-32 encoded string */
				1325	Py_ssize_t length, /* size of string */
				1326	const char errors, / error handling */
				1327	int byteorder / pointer to byteorder to use
				1328	0=native;-1=LE,1=BE; updated on
				1329	exit */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1330	);
				1331
				1332	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1333	const char string, / UTF-32 encoded string */
				1334	Py_ssize_t length, /* size of string */
				1335	const char errors, / error handling */
				1336	int byteorder, / pointer to byteorder to use
				1337	0=native;-1=LE,1=BE; updated on
				1338	exit */
				1339	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1340	);
				1341
				1342	/* Returns a Python string using the UTF-32 encoding in native byte
				1343	order. The string always starts with a BOM mark. */
				1344
				1345	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1346	PyObject unicode / Unicode object */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1347	);
				1348
				1349	/* Returns a Python string object holding the UTF-32 encoded value of
				1350	the Unicode data.
				1351
				1352	If byteorder is not 0, output is written according to the following
				1353	byte order:
				1354
				1355	byteorder == -1: little endian
				1356	byteorder == 0: native byte order (writes a BOM mark)
				1357	byteorder == 1: big endian
				1358
				1359	If byteorder is 0, the output string will always start with the
				1360	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				1361	prepended.
				1362
				1363	*/
				1364
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1365	#ifndef Py_LIMITED_API
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1366	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1367	const Py_UNICODE data, / Unicode char buffer */
				1368	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1369	const char errors, / error handling */
				1370	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1371	);
Martin v. Löwis	1db7c13	2011-11-10 18:24:32 +0100	[diff] [blame]	1372	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
				1373	PyObject object, / Unicode object */
				1374	const char errors, / error handling */
				1375	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				1376	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1377	#endif
Walter Dörwald	41980ca	2007-08-16 21:55:45 +0000	[diff] [blame]	1378
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1379	/* --- UTF-16 Codecs ------------------------------------------------------ */
				1380
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1381	/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1382	the corresponding Unicode object.
				1383
				1384	errors (if non-NULL) defines the error handling. It defaults
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1385	to "strict".
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1386
				1387	If byteorder is non-NULL, the decoder starts decoding using the
				1388	given byte order:
				1389
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1390	*byteorder == -1: little endian
				1391	*byteorder == 0: native order
				1392	*byteorder == 1: big endian
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1393
Marc-André Lemburg	489b56e	2001-05-21 20:30:15 +0000	[diff] [blame]	1394	In native mode, the first two bytes of the stream are checked for a
				1395	BOM mark. If found, the BOM mark is analysed, the byte order
				1396	adjusted and the BOM skipped. In the other modes, no BOM mark
				1397	interpretation is done. After completion, *byteorder is set to the
				1398	current byte order at the end of input data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1399
				1400	If byteorder is NULL, the codec starts in native order mode.
				1401
				1402	*/
				1403
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1404	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1405	const char string, / UTF-16 encoded string */
				1406	Py_ssize_t length, /* size of string */
				1407	const char errors, / error handling */
				1408	int byteorder / pointer to byteorder to use
				1409	0=native;-1=LE,1=BE; updated on
				1410	exit */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1411	);
				1412
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1413	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1414	const char string, / UTF-16 encoded string */
				1415	Py_ssize_t length, /* size of string */
				1416	const char errors, / error handling */
				1417	int byteorder, / pointer to byteorder to use
				1418	0=native;-1=LE,1=BE; updated on
				1419	exit */
				1420	Py_ssize_t consumed / bytes consumed */
Walter Dörwald	6965203	2004-09-07 20:24:22 +0000	[diff] [blame]	1421	);
				1422
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1423	/* Returns a Python string using the UTF-16 encoding in native byte
				1424	order. The string always starts with a BOM mark. */
				1425
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1426	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1427	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1428	);
				1429
				1430	/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1431	the Unicode data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1432
				1433	If byteorder is not 0, output is written according to the following
				1434	byte order:
				1435
				1436	byteorder == -1: little endian
				1437	byteorder == 0: native byte order (writes a BOM mark)
				1438	byteorder == 1: big endian
				1439
				1440	If byteorder is 0, the output string will always start with the
				1441	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				1442	prepended.
				1443
				1444	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
				1445	UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters	7e47402	2000-07-16 12:04:32 +0000	[diff] [blame]	1446	at a later point without compromising the APIs.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1447
				1448	*/
				1449
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1450	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1451	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1452	const Py_UNICODE data, / Unicode char buffer */
				1453	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				1454	const char errors, / error handling */
				1455	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1456	);
Martin v. Löwis	1db7c13	2011-11-10 18:24:32 +0100	[diff] [blame]	1457	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
				1458	PyObject* unicode, /* Unicode object */
				1459	const char errors, / error handling */
				1460	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				1461	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1462	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1463
				1464	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				1465
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1466	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1467	const char string, / Unicode-Escape encoded string */
				1468	Py_ssize_t length, /* size of string */
				1469	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1470	);
				1471
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1472	PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1473	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1474	);
				1475
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1476	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1477	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1478	const Py_UNICODE data, / Unicode char buffer */
				1479	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1480	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1481	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1482
				1483	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				1484
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1485	PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1486	const char string, / Raw-Unicode-Escape encoded string */
				1487	Py_ssize_t length, /* size of string */
				1488	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1489	);
				1490
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1491	PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1492	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1493	);
				1494
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1495	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1496	PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1497	const Py_UNICODE data, / Unicode char buffer */
				1498	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1499	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1500	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1501
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1502	/* --- Unicode Internal Codec ---------------------------------------------
				1503
				1504	Only for internal use in _codecsmodule.c */
				1505
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1506	#ifndef Py_LIMITED_API
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1507	PyObject *_PyUnicode_DecodeUnicodeInternal(
				1508	const char *string,
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1509	Py_ssize_t length,
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1510	const char *errors
				1511	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1512	#endif
Walter Dörwald	a47d1c0	2005-08-30 10:23:14 +0000	[diff] [blame]	1513
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1514	/* --- Latin-1 Codecs -----------------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1515
				1516	Note: Latin-1 corresponds to the first 256 Unicode ordinals.
				1517
				1518	*/
				1519
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1520	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1521	const char string, / Latin-1 encoded string */
				1522	Py_ssize_t length, /* size of string */
				1523	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1524	);
				1525
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1526	PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1527	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1528	);
				1529
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1530	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1531	PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
				1532	PyObject* unicode,
				1533	const char* errors);
				1534
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1535	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1536	const Py_UNICODE data, / Unicode char buffer */
				1537	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1538	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1539	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1540	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1541
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1542	/* --- ASCII Codecs -------------------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1543
				1544	Only 7-bit ASCII data is excepted. All other codes generate errors.
				1545
				1546	*/
				1547
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1548	PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1549	const char string, / ASCII encoded string */
				1550	Py_ssize_t length, /* size of string */
				1551	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1552	);
				1553
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1554	PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1555	PyObject unicode / Unicode object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1556	);
				1557
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1558	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1559	PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
				1560	PyObject* unicode,
				1561	const char* errors);
				1562
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1563	PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1564	const Py_UNICODE data, / Unicode char buffer */
				1565	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1566	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1567	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1568	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1569
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1570	/* --- Character Map Codecs -----------------------------------------------
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1571
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1572	This codec uses mappings to encode and decode characters.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1573
				1574	Decoding mappings must map single string characters to single
				1575	Unicode characters, integers (which are then interpreted as Unicode
				1576	ordinals) or None (meaning "undefined mapping" and causing an
				1577	error).
				1578
				1579	Encoding mappings must map single Unicode characters to single
				1580	string characters, integers (which are then interpreted as Latin-1
				1581	ordinals) or None (meaning "undefined mapping" and causing an
				1582	error).
				1583
				1584	If a character lookup fails with a LookupError, the character is
				1585	copied as-is meaning that its ordinal value will be interpreted as
				1586	Unicode or Latin-1 ordinal resp. Because of this mappings only need
				1587	to contain those mappings which map characters to different code
				1588	points.
				1589
				1590	*/
				1591
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1592	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1593	const char string, / Encoded string */
				1594	Py_ssize_t length, /* size of string */
				1595	PyObject mapping, / character mapping
				1596	(char ordinal -> unicode ordinal) */
				1597	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1598	);
				1599
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1600	PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1601	PyObject unicode, / Unicode object */
				1602	PyObject mapping / character mapping
				1603	(unicode ordinal -> char ordinal) */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1604	);
				1605
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1606	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1607	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1608	const Py_UNICODE data, / Unicode char buffer */
				1609	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1610	PyObject mapping, / character mapping
				1611	(unicode ordinal -> char ordinal) */
				1612	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1613	);
Martin v. Löwis	23e275b	2011-11-02 18:02:51 +0100	[diff] [blame]	1614	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
				1615	PyObject unicode, / Unicode object */
				1616	PyObject mapping, / character mapping
				1617	(unicode ordinal -> char ordinal) */
				1618	const char errors / error handling */
				1619	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1620	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1621
				1622	/* Translate a Py_UNICODE buffer of the given length by applying a
				1623	character mapping table to it and return the resulting Unicode
				1624	object.
				1625
				1626	The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1627	ordinal integers or None (causing deletion of the character).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1628
				1629	Mapping tables may be dictionaries or sequences. Unmapped character
				1630	ordinals (ones which cause a LookupError) are left untouched and
				1631	are copied as-is.
				1632
				1633	*/
				1634
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1635	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1636	PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1637	const Py_UNICODE data, / Unicode char buffer */
				1638	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1639	PyObject table, / Translate table */
				1640	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1641	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1642	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1643
Steve Dower	cc16be8	2016-09-08 10:35:16 -0700	[diff] [blame]	1644	#ifdef MS_WINDOWS
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1645
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1646	/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1647
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1648	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1649	const char string, / MBCS encoded string */
Steve Dower	f5aba58	2016-09-06 19:42:27 -0700	[diff] [blame]	1650	Py_ssize_t length, /* size of string */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1651	const char errors / error handling */
				1652	);
				1653
Thomas Wouters	0e3f591	2006-08-11 14:57:12 +0000	[diff] [blame]	1654	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
				1655	const char string, / MBCS encoded string */
				1656	Py_ssize_t length, /* size of string */
				1657	const char errors, / error handling */
				1658	Py_ssize_t consumed / bytes consumed */
				1659	);
				1660
Victor Stinner	3a50e70	2011-10-18 21:21:00 +0200	[diff] [blame]	1661	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
				1662	int code_page, /* code page number */
				1663	const char string, / encoded string */
				1664	Py_ssize_t length, /* size of string */
				1665	const char errors, / error handling */
				1666	Py_ssize_t consumed / bytes consumed */
				1667	);
				1668
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1669	PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1670	PyObject unicode / Unicode object */
				1671	);
				1672
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1673	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1674	PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1675	const Py_UNICODE data, / Unicode char buffer */
Victor Stinner	3a50e70	2011-10-18 21:21:00 +0200	[diff] [blame]	1676	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1677	const char errors / error handling */
				1678	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1679	#endif
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	1680
Victor Stinner	3a50e70	2011-10-18 21:21:00 +0200	[diff] [blame]	1681	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
				1682	int code_page, /* code page number */
				1683	PyObject unicode, / Unicode object */
				1684	const char errors / error handling */
				1685	);
				1686
Steve Dower	cc16be8	2016-09-08 10:35:16 -0700	[diff] [blame]	1687	#endif /* MS_WINDOWS */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	1688
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1689	/* --- Decimal Encoder ---------------------------------------------------- */
				1690
				1691	/* Takes a Unicode string holding a decimal value and writes it into
				1692	an output buffer using standard ASCII digit codes.
				1693
				1694	The output buffer has to provide at least length+1 bytes of storage
				1695	area. The output string is 0-terminated.
				1696
				1697	The encoder converts whitespace to ' ', decimal characters to their
				1698	corresponding ASCII digit and all other Latin-1 characters except
				1699	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
				1700	are treated as errors. This includes embedded NULL bytes.
				1701
				1702	Error handling is defined by the errors argument:
				1703
				1704	NULL or "strict": raise a ValueError
				1705	"ignore": ignore the wrong characters (these are not copied to the
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1706	output buffer)
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1707	"replace": replaces illegal characters with '?'
				1708
				1709	Returns 0 on success, -1 on failure.
				1710
				1711	*/
				1712
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1713	#ifndef Py_LIMITED_API
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1714	PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1715	Py_UNICODE s, / Unicode buffer */
				1716	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				1717	char output, / Output buffer; must have size >= length */
				1718	const char errors / error handling */
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1719	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	1720	#endif
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	1721
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1722	/* Transforms code points that have decimal digit property to the
				1723	corresponding ASCII digit code points.
				1724
				1725	Returns a new Unicode string on success, NULL on failure.
				1726	*/
				1727
Georg Brandl	b550308	2010-12-05 11:40:48 +0000	[diff] [blame]	1728	#ifndef Py_LIMITED_API
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1729	PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
				1730	Py_UNICODE s, / Unicode buffer */
				1731	Py_ssize_t length /* Number of Py_UNICODE chars to transform */
				1732	);
Georg Brandl	b550308	2010-12-05 11:40:48 +0000	[diff] [blame]	1733	#endif
Alexander Belopolsky	942af5a	2010-12-04 03:38:46 +0000	[diff] [blame]	1734
Victor Stinner	6f9568b	2011-11-17 00:12:44 +0100	[diff] [blame]	1735	/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1736	as argument instead of a raw buffer and length. This function additionally
				1737	transforms spaces to ASCII because this is what the callers in longobject,
				1738	floatobject, and complexobject did anyways. */
				1739
				1740	#ifndef Py_LIMITED_API
				1741	PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
				1742	PyObject unicode / Unicode object */
				1743	);
				1744	#endif
				1745
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	1746	/* --- Locale encoding --------------------------------------------------- */
				1747
				1748	/* Decode a string from the current locale encoding. The decoder is strict if
				1749	surrogateescape is equal to zero, otherwise it uses the 'surrogateescape'
				1750	error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
				1751	be decoded as a surrogate character and surrogateescape is not equal to
				1752	zero, the byte sequence is escaped using the 'surrogateescape' error handler
				1753	instead of being decoded. str must end with a null character but cannot
Victor Stinner	f2ea71f	2011-12-17 04:13:41 +0100	[diff] [blame]	1754	contain embedded null characters. */
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	1755
				1756	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
				1757	const char *str,
				1758	Py_ssize_t len,
Victor Stinner	1b57967	2011-12-17 05:47:23 +0100	[diff] [blame]	1759	const char *errors);
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	1760
				1761	/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
				1762	length using strlen(). */
				1763
				1764	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
				1765	const char *str,
Victor Stinner	1b57967	2011-12-17 05:47:23 +0100	[diff] [blame]	1766	const char *errors);
Victor Stinner	af02e1c	2011-12-16 23:56:01 +0100	[diff] [blame]	1767
Victor Stinner	f2ea71f	2011-12-17 04:13:41 +0100	[diff] [blame]	1768	/* Encode a Unicode object to the current locale encoding. The encoder is
				1769	strict is surrogateescape is equal to zero, otherwise the
				1770	"surrogateescape" error handler is used. Return a bytes object. The string
Victor Stinner	d45c7f8	2012-12-04 01:34:47 +0100	[diff] [blame]	1771	cannot contain embedded null characters. */
Victor Stinner	f2ea71f	2011-12-17 04:13:41 +0100	[diff] [blame]	1772
				1773	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
				1774	PyObject *unicode,
Victor Stinner	1b57967	2011-12-17 05:47:23 +0100	[diff] [blame]	1775	const char *errors
Victor Stinner	f2ea71f	2011-12-17 04:13:41 +0100	[diff] [blame]	1776	);
				1777
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1778	/* --- File system encoding ---------------------------------------------- */
				1779
Victor Stinner	47fcb5b	2010-08-13 23:59:58 +0000	[diff] [blame]	1780	/* ParseTuple converter: encode str objects to bytes using
				1781	PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1782
				1783	PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject, void);
				1784
Victor Stinner	47fcb5b	2010-08-13 23:59:58 +0000	[diff] [blame]	1785	/* ParseTuple converter: decode bytes objects to unicode using
				1786	PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
				1787
				1788	PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject, void);
				1789
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1790	/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
				1791	and the "surrogateescape" error handler.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1792
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1793	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1794	encoding.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1795
Benjamin Peterson	ccbd694	2010-05-15 17:43:18 +0000	[diff] [blame]	1796	Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1797	*/
				1798
				1799	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
				1800	const char s / encoded string */
				1801	);
				1802
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1803	/* Decode a string using Py_FileSystemDefaultEncoding
				1804	and the "surrogateescape" error handler.
				1805
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1806	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1807	encoding.
Victor Stinner	77c3862	2010-05-14 15:58:55 +0000	[diff] [blame]	1808	*/
				1809
Martin v. Löwis	011e842	2009-05-05 04:43:17 +0000	[diff] [blame]	1810	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
				1811	const char s, / encoded string */
				1812	Py_ssize_t size /* size */
				1813	);
				1814
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1815	/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
Benjamin Peterson	ccbd694	2010-05-15 17:43:18 +0000	[diff] [blame]	1816	"surrogateescape" error handler, and return bytes.
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1817
Victor Stinner	f3170cc	2010-10-15 12:04:23 +0000	[diff] [blame]	1818	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				1819	encoding.
Victor Stinner	ae6265f	2010-05-15 16:27:27 +0000	[diff] [blame]	1820	*/
				1821
				1822	PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
				1823	PyObject *unicode
				1824	);
				1825
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1826	/* --- Methods & Slots ----------------------------------------------------
				1827
				1828	These are capable of handling Unicode objects and strings on input
				1829	(we refer to them as strings in the descriptions) and return
Georg Brandl	c6bc4c6	2011-10-05 16:23:09 +0200	[diff] [blame]	1830	Unicode objects or integers as appropriate. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1831
				1832	/* Concat two strings giving a new Unicode string. */
				1833
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1834	PyAPI_FUNC(PyObject*) PyUnicode_Concat(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1835	PyObject left, / Left string */
				1836	PyObject right / Right string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1837	);
				1838
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1839	/* Concat two strings and put the result in *pleft
				1840	(sets pleft to NULL on error) /
				1841
				1842	PyAPI_FUNC(void) PyUnicode_Append(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1843	PyObject *pleft, / Pointer to left string */
				1844	PyObject right / Right string */
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1845	);
				1846
				1847	/* Concat two strings, put the result in *pleft and drop the right object
				1848	(sets pleft to NULL on error) /
				1849
				1850	PyAPI_FUNC(void) PyUnicode_AppendAndDel(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1851	PyObject *pleft, / Pointer to left string */
				1852	PyObject right / Right string */
Walter Dörwald	1ab8330	2007-05-18 17:15:44 +0000	[diff] [blame]	1853	);
				1854
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1855	/* Split a string giving a list of Unicode strings.
				1856
				1857	If sep is NULL, splitting will be done at all whitespace
				1858	substrings. Otherwise, splits occur at the given separator.
				1859
				1860	At most maxsplit splits will be done. If negative, no limit is set.
				1861
				1862	Separators are not included in the resulting list.
				1863
				1864	*/
				1865
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1866	PyAPI_FUNC(PyObject*) PyUnicode_Split(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1867	PyObject s, / String to split */
				1868	PyObject sep, / String separator */
				1869	Py_ssize_t maxsplit /* Maxsplit count */
				1870	);
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1871
				1872	/* Dito, but split at line breaks.
				1873
				1874	CRLF is considered to be one line break. Line breaks are not
				1875	included in the resulting list. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1876
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1877	PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1878	PyObject s, / String to split */
				1879	int keepends /* If true, line end markers are included */
				1880	);
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1881
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1882	/* Partition a string using a given separator. */
				1883
				1884	PyAPI_FUNC(PyObject*) PyUnicode_Partition(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1885	PyObject s, / String to partition */
				1886	PyObject sep / String separator */
				1887	);
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1888
				1889	/* Partition a string using a given separator, searching from the end of the
				1890	string. */
				1891
				1892	PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1893	PyObject s, / String to partition */
				1894	PyObject sep / String separator */
				1895	);
Thomas Wouters	477c8d5	2006-05-27 19:21:47 +0000	[diff] [blame]	1896
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1897	/* Split a string giving a list of Unicode strings.
				1898
				1899	If sep is NULL, splitting will be done at all whitespace
				1900	substrings. Otherwise, splits occur at the given separator.
				1901
				1902	At most maxsplit splits will be done. But unlike PyUnicode_Split
				1903	PyUnicode_RSplit splits from the end of the string. If negative,
				1904	no limit is set.
				1905
				1906	Separators are not included in the resulting list.
				1907
				1908	*/
				1909
				1910	PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1911	PyObject s, / String to split */
				1912	PyObject sep, / String separator */
				1913	Py_ssize_t maxsplit /* Maxsplit count */
				1914	);
Hye-Shik Chang	3ae811b	2003-12-15 18:49:53 +0000	[diff] [blame]	1915
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1916	/* Translate a string by applying a character mapping table to it and
				1917	return the resulting Unicode object.
				1918
				1919	The mapping table must map Unicode ordinal integers to Unicode
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1920	ordinal integers or None (causing deletion of the character).
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1921
				1922	Mapping tables may be dictionaries or sequences. Unmapped character
				1923	ordinals (ones which cause a LookupError) are left untouched and
				1924	are copied as-is.
				1925
				1926	*/
				1927
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1928	PyAPI_FUNC(PyObject *) PyUnicode_Translate(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1929	PyObject str, / String */
				1930	PyObject table, / Translate table */
				1931	const char errors / error handling */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1932	);
				1933
				1934	/* Join a sequence of strings using the given separator and return
				1935	the resulting Unicode string. */
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1936
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1937	PyAPI_FUNC(PyObject*) PyUnicode_Join(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1938	PyObject separator, / Separator string */
				1939	PyObject seq / Sequence object */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1940	);
				1941
Serhiy Storchaka	ea525a2	2016-09-06 22:07:53 +0300	[diff] [blame]	1942	#ifndef Py_LIMITED_API
				1943	PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
				1944	PyObject *separator,
				1945	PyObject **items,
				1946	Py_ssize_t seqlen
				1947	);
				1948	#endif /* Py_LIMITED_API */
				1949
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1950	/* Return 1 if substr matches str[start:end] at the given tail end, 0
				1951	otherwise. */
				1952
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1953	PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1954	PyObject str, / String */
				1955	PyObject substr, / Prefix or Suffix string */
				1956	Py_ssize_t start, /* Start index */
				1957	Py_ssize_t end, /* Stop index */
				1958	int direction /* Tail end: -1 prefix, +1 suffix */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1959	);
				1960
				1961	/* Return the first position of substr in str[start:end] using the
Marc-André Lemburg	4da6fd6	2002-05-29 11:33:13 +0000	[diff] [blame]	1962	given search direction or -1 if not found. -2 is returned in case
				1963	an error occurred and an exception is set. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1964
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1965	PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1966	PyObject str, / String */
				1967	PyObject substr, / Substring to find */
				1968	Py_ssize_t start, /* Start index */
				1969	Py_ssize_t end, /* Stop index */
				1970	int direction /* Find direction: +1 forward, -1 backward */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1971	);
				1972
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	1973	/* Like PyUnicode_Find, but search for single character only. */
				1974	PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
				1975	PyObject *str,
				1976	Py_UCS4 ch,
				1977	Py_ssize_t start,
				1978	Py_ssize_t end,
				1979	int direction
				1980	);
				1981
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1982	/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1983
Martin v. Löwis	18e1655	2006-02-15 17:27:45 +0000	[diff] [blame]	1984	PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1985	PyObject str, / String */
				1986	PyObject substr, / Substring to count */
				1987	Py_ssize_t start, /* Start index */
				1988	Py_ssize_t end /* Stop index */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1989	);
				1990
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	1991	/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1992	and return the resulting Unicode object. */
				1993
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	1994	PyAPI_FUNC(PyObject *) PyUnicode_Replace(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	1995	PyObject str, / String */
				1996	PyObject substr, / Substring to find */
				1997	PyObject replstr, / Substring to replace */
				1998	Py_ssize_t maxcount /* Max. number of replacements to apply;
				1999	-1 = all */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2000	);
				2001
				2002	/* Compare two strings and return -1, 0, 1 for less than, equal,
Victor Stinner	90db9c4	2012-10-04 21:53:50 +0200	[diff] [blame]	2003	greater than resp.
				2004	Raise an exception and return -1 on error. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2005
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2006	PyAPI_FUNC(int) PyUnicode_Compare(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	2007	PyObject left, / Left string */
				2008	PyObject right / Right string */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2009	);
				2010
Martin v. Löwis	1c0689c	2014-01-03 21:36:49 +0100	[diff] [blame]	2011	#ifndef Py_LIMITED_API
Victor Stinner	ad14ccd	2013-11-07 00:46:04 +0100	[diff] [blame]	2012	PyAPI_FUNC(int) _PyUnicode_CompareWithId(
				2013	PyObject left, / Left string */
				2014	_Py_Identifier right / Right identifier */
				2015	);
Martin v. Löwis	1c0689c	2014-01-03 21:36:49 +0100	[diff] [blame]	2016	#endif
Victor Stinner	ad14ccd	2013-11-07 00:46:04 +0100	[diff] [blame]	2017
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2018	PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
				2019	PyObject *left,
Victor Stinner	dc2081f	2010-12-27 01:49:29 +0000	[diff] [blame]	2020	const char right / ASCII-encoded string */
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2021	);
				2022
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	2023	/* Rich compare two strings and return one of the following:
				2024
				2025	- NULL in case an exception was raised
Martin Panter	69332c1	2016-08-04 13:07:31 +0000	[diff] [blame]	2026	- Py_True or Py_False for successful comparisons
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	2027	- Py_NotImplemented in case the type combination is unknown
				2028
				2029	Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
				2030	case the conversion of the arguments to Unicode fails with a
				2031	UnicodeDecodeError.
				2032
				2033	Possible values for op:
				2034
				2035	Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
				2036
				2037	*/
				2038
				2039	PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	2040	PyObject left, / Left string */
				2041	PyObject right, / Right string */
				2042	int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
Thomas Wouters	00ee7ba	2006-08-21 19:07:27 +0000	[diff] [blame]	2043	);
				2044
Serhiy Storchaka	d65c949	2015-11-02 14:10:23 +0200	[diff] [blame]	2045	/* Apply an argument tuple or dictionary to a format string and return
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2046	the resulting Unicode string. */
				2047
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2048	PyAPI_FUNC(PyObject *) PyUnicode_Format(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	2049	PyObject format, / Format string */
				2050	PyObject args / Argument tuple or dictionary */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2051	);
				2052
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	2053	/* Checks whether element is contained in container and return 1/0
				2054	accordingly.
				2055
Martin Panter	cc71a79	2016-04-05 06:19:42 +0000	[diff] [blame]	2056	element has to coerce to a one element Unicode string. -1 is
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	2057	returned in case of an error. */
				2058
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2059	PyAPI_FUNC(int) PyUnicode_Contains(
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	2060	PyObject container, / Container string */
				2061	PyObject element / Element string */
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	2062	);
				2063
Martin v. Löwis	4738340	2007-08-15 07:32:56 +0000	[diff] [blame]	2064	/* Checks whether argument is a valid identifier. */
				2065
				2066	PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
				2067
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2068	#ifndef Py_LIMITED_API
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	2069	/* Externally visible for str.strip(unicode) */
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2070	PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
Victor Stinner	9db1a8b	2011-10-23 20:04:37 +0200	[diff] [blame]	2071	PyObject *self,
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	2072	int striptype,
				2073	PyObject *sepobj
				2074	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2075	#endif
Walter Dörwald	de02bcb	2002-04-22 17:42:37 +0000	[diff] [blame]	2076
Eric Smith	a3b1ac8	2009-04-03 14:45:06 +0000	[diff] [blame]	2077	/* Using explicit passed-in values, insert the thousands grouping
				2078	into the string pointed to by buffer. For the argument descriptions,
				2079	see Objects/stringlib/localeutil.h */
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2080	#ifndef Py_LIMITED_API
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	2081	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
Victor Stinner	c3cec78	2011-10-05 21:24:08 +0200	[diff] [blame]	2082	PyObject *unicode,
Victor Stinner	41a863c	2012-02-24 00:37:51 +0100	[diff] [blame]	2083	Py_ssize_t index,
Martin v. Löwis	d63a3b8	2011-09-28 07:41:54 +0200	[diff] [blame]	2084	Py_ssize_t n_buffer,
				2085	void *digits,
				2086	Py_ssize_t n_digits,
				2087	Py_ssize_t min_width,
				2088	const char *grouping,
Victor Stinner	41a863c	2012-02-24 00:37:51 +0100	[diff] [blame]	2089	PyObject *thousands_sep,
				2090	Py_UCS4 *maxchar);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2091	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2092	/* === Characters Type APIs =============================================== */
				2093
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	2094	/* Helper array used by Py_UNICODE_ISSPACE(). */
				2095
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2096	#ifndef Py_LIMITED_API
Benjamin Peterson	960cf0f	2009-01-09 04:11:44 +0000	[diff] [blame]	2097	PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
				2098
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2099	/* These should not be used directly. Use the Py_UNICODE_IS* and
Antoine Pitrou	f95a1b3	2010-05-09 15:52:27 +0000	[diff] [blame]	2100	Py_UNICODE_TO* macros instead.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2101
				2102	These APIs are implemented in Objects/unicodectype.c.
				2103
				2104	*/
				2105
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2106	PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2107	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2108	);
				2109
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2110	PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2111	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2112	);
				2113
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2114	PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2115	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2116	);
				2117
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	2118	PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2119	Py_UCS4 ch /* Unicode character */
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	2120	);
				2121
				2122	PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2123	Py_UCS4 ch /* Unicode character */
Martin v. Löwis	13c3e38	2007-08-14 22:37:03 +0000	[diff] [blame]	2124	);
				2125
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2126	PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2127	const Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2128	);
				2129
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2130	PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2131	const Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2132	);
				2133
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2134	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
				2135	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2136	);
				2137
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2138	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
				2139	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2140	);
				2141
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2142	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
				2143	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2144	);
				2145
Benjamin Peterson	b2bf01d	2012-01-11 18:17:06 -0500	[diff] [blame]	2146	PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
				2147	Py_UCS4 ch, /* Unicode character */
				2148	Py_UCS4 *res
				2149	);
				2150
				2151	PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
				2152	Py_UCS4 ch, /* Unicode character */
				2153	Py_UCS4 *res
				2154	);
				2155
				2156	PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
				2157	Py_UCS4 ch, /* Unicode character */
				2158	Py_UCS4 *res
				2159	);
				2160
Benjamin Peterson	d5890c8	2012-01-14 13:23:30 -0500	[diff] [blame]	2161	PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
				2162	Py_UCS4 ch, /* Unicode character */
				2163	Py_UCS4 *res
				2164	);
				2165
Benjamin Peterson	b2bf01d	2012-01-11 18:17:06 -0500	[diff] [blame]	2166	PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
Amaury Forgeot d'Arc	77b1ecf	2012-01-13 22:12:37 +0100	[diff] [blame]	2167	Py_UCS4 ch /* Unicode character */
Benjamin Peterson	b2bf01d	2012-01-11 18:17:06 -0500	[diff] [blame]	2168	);
				2169
				2170	PyAPI_FUNC(int) _PyUnicode_IsCased(
Amaury Forgeot d'Arc	77b1ecf	2012-01-13 22:12:37 +0100	[diff] [blame]	2171	Py_UCS4 ch /* Unicode character */
Benjamin Peterson	b2bf01d	2012-01-11 18:17:06 -0500	[diff] [blame]	2172	);
				2173
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2174	PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2175	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2176	);
				2177
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2178	PyAPI_FUNC(int) _PyUnicode_ToDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2179	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2180	);
				2181
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2182	PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2183	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2184	);
				2185
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2186	PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2187	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2188	);
				2189
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2190	PyAPI_FUNC(int) _PyUnicode_IsDigit(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2191	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2192	);
				2193
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2194	PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2195	Py_UCS4 ch /* Unicode character */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2196	);
				2197
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	2198	PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2199	Py_UCS4 ch /* Unicode character */
Georg Brandl	559e5d7	2008-06-11 18:37:52 +0000	[diff] [blame]	2200	);
				2201
Mark Hammond	91a681d	2002-08-12 07:21:58 +0000	[diff] [blame]	2202	PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Amaury Forgeot d'Arc	324ac65	2010-08-18 20:44:58 +0000	[diff] [blame]	2203	Py_UCS4 ch /* Unicode character */
Marc-André Lemburg	f03e741	2000-07-05 09:45:59 +0000	[diff] [blame]	2204	);
				2205
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2206	PyAPI_FUNC(size_t) Py_UNICODE_strlen(
				2207	const Py_UNICODE *u
				2208	);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2209
				2210	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2211	Py_UNICODE *s1,
				2212	const Py_UNICODE *s2);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2213
Victor Stinner	c4eb765	2010-09-01 23:43:50 +0000	[diff] [blame]	2214	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
				2215	Py_UNICODE s1, const Py_UNICODE s2);
				2216
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2217	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2218	Py_UNICODE *s1,
				2219	const Py_UNICODE *s2,
				2220	size_t n);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2221
				2222	PyAPI_FUNC(int) Py_UNICODE_strcmp(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2223	const Py_UNICODE *s1,
				2224	const Py_UNICODE *s2
				2225	);
				2226
				2227	PyAPI_FUNC(int) Py_UNICODE_strncmp(
				2228	const Py_UNICODE *s1,
				2229	const Py_UNICODE *s2,
				2230	size_t n
				2231	);
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2232
				2233	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2234	const Py_UNICODE *s,
				2235	Py_UNICODE c
Martin v. Löwis	5b22213	2007-06-10 09:51:05 +0000	[diff] [blame]	2236	);
				2237
Victor Stinner	331ea92	2010-08-10 16:37:20 +0000	[diff] [blame]	2238	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
Victor Stinner	ef8d95c	2010-08-16 22:03:11 +0000	[diff] [blame]	2239	const Py_UNICODE *s,
				2240	Py_UNICODE c
Victor Stinner	331ea92	2010-08-10 16:37:20 +0000	[diff] [blame]	2241	);
				2242
Ethan Furman	b95b561	2015-01-23 20:05:18 -0800	[diff] [blame]	2243	PyAPI_FUNC(PyObject) _PyUnicode_FormatLong(PyObject , int, int, int);
				2244
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2245	/* Create a copy of a unicode string ending with a nul character. Return NULL
				2246	and raise a MemoryError exception on memory allocation failure, otherwise
				2247	return a new allocated buffer (use PyMem_Free() to free the buffer). */
				2248
Victor Stinner	4640860	2010-09-03 16:18:00 +0000	[diff] [blame]	2249	PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2250	PyObject *unicode
				2251	);
Martin v. Löwis	4d0d471	2010-12-03 20:14:31 +0000	[diff] [blame]	2252	#endif /* Py_LIMITED_API */
Victor Stinner	71133ff	2010-09-01 23:43:53 +0000	[diff] [blame]	2253
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	2254	#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	2255	PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
Victor Stinner	7931d9a	2011-11-04 00:22:48 +0100	[diff] [blame]	2256	PyObject *op,
Victor Stinner	fb9ea8c	2011-10-06 01:45:57 +0200	[diff] [blame]	2257	int check_content);
				2258	#endif
				2259
Serhiy Storchaka	9fab79b	2016-09-11 11:03:14 +0300	[diff] [blame]	2260	#ifndef Py_LIMITED_API
Martin v. Löwis	afe55bb	2011-10-09 10:38:36 +0200	[diff] [blame]	2261	/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
				2262	PyAPI_FUNC(PyObject) _PyUnicode_FromId(_Py_Identifier);
				2263	/* Clear all static strings. */
				2264	PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
				2265
Raymond Hettinger	ac2ef65	2015-07-04 16:04:44 -0700	[diff] [blame]	2266	/* Fast equality check when the inputs are known to be exact unicode types
				2267	and where the hash values are equal (i.e. a very probable match) */
				2268	PyAPI_FUNC(int) _PyUnicode_EQ(PyObject , PyObject );
Serhiy Storchaka	9fab79b	2016-09-11 11:03:14 +0300	[diff] [blame]	2269	#endif /* !Py_LIMITED_API */
Raymond Hettinger	ac2ef65	2015-07-04 16:04:44 -0700	[diff] [blame]	2270
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2271	#ifdef __cplusplus
				2272	}
				2273	#endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	2274	#endif /* !Py_UNICODEOBJECT_H */