Blame - Include/cpython/unicodeobject.h - platform/external/python/cpython3

blob: 806c3aa7cedbcf4875d098eec4a5696b2216d538 [file] [log] [blame]

Victor Stinner	75e4699	2018-11-26 17:29:38 +0100	[diff] [blame]	1	#ifndef Py_CPYTHON_UNICODEOBJECT_H
				2	# error "this header file must not be included directly"
				3	#endif
				4
				5	#ifdef __cplusplus
				6	extern "C" {
				7	#endif
				8
				9	/* Py_UNICODE was the native Unicode storage format (code unit) used by
				10	Python and represents a single Unicode element in the Unicode type.
				11	With PEP 393, Py_UNICODE is deprecated and replaced with a
				12	typedef to wchar_t. */
				13	#define PY_UNICODE_TYPE wchar_t
				14	typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */;
				15
				16	/* --- Internal Unicode Operations ---------------------------------------- */
				17
				18	/* Since splitting on whitespace is an important use case, and
				19	whitespace in most situations is solely ASCII whitespace, we
				20	optimize for the common case by using a quick look-up table
				21	_Py_ascii_whitespace (see below) with an inlined check.
				22
				23	*/
				24	#define Py_UNICODE_ISSPACE(ch) \
				25	((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
				26
				27	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
				28	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
				29	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				30	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				31
				32	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
				33	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
				34	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				35
				36	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				37	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				38	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
				39	#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
				40
				41	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				42	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				43	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				44
				45	#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
				46
				47	#define Py_UNICODE_ISALNUM(ch) \
				48	(Py_UNICODE_ISALPHA(ch) \|\| \
				49	Py_UNICODE_ISDECIMAL(ch) \|\| \
				50	Py_UNICODE_ISDIGIT(ch) \|\| \
				51	Py_UNICODE_ISNUMERIC(ch))
				52
				53	#define Py_UNICODE_COPY(target, source, length) \
				54	memcpy((target), (source), (length)*sizeof(Py_UNICODE))
				55
				56	#define Py_UNICODE_FILL(target, value, length) \
				57	do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
				58	for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
				59	} while (0)
				60
				61	/* macros to work with surrogates */
				62	#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
				63	#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
				64	#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
				65	/* Join two surrogate characters and return a single Py_UCS4 value. */
				66	#define Py_UNICODE_JOIN_SURROGATES(high, low) \
				67	(((((Py_UCS4)(high) & 0x03FF) << 10) \| \
				68	((Py_UCS4)(low) & 0x03FF)) + 0x10000)
				69	/* high surrogate = top 10 bits added to D800 */
				70	#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
				71	/* low surrogate = bottom 10 bits added to DC00 */
				72	#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
				73
				74	/* Check if substring matches at given offset. The offset must be
				75	valid, and the substring must not be empty. */
				76
				77	#define Py_UNICODE_MATCH(string, offset, substring) \
				78	((((string)->wstr + (offset)) == ((substring)->wstr)) && \
				79	((((string)->wstr + (offset) + (substring)->wstr_length-1) == ((substring)->wstr + (substring)->wstr_length-1))) && \
				80	!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
				81
				82	/* --- Unicode Type ------------------------------------------------------- */
				83
				84	/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
				85	structure. state.ascii and state.compact are set, and the data
				86	immediately follow the structure. utf8_length and wstr_length can be found
				87	in the length field; the utf8 pointer is equal to the data pointer. */
				88	typedef struct {
				89	/* There are 4 forms of Unicode strings:
				90
				91	- compact ascii:
				92
				93	* structure = PyASCIIObject
				94	* test: PyUnicode_IS_COMPACT_ASCII(op)
				95	* kind = PyUnicode_1BYTE_KIND
				96	* compact = 1
				97	* ascii = 1
				98	* ready = 1
				99	* (length is the length of the utf8 and wstr strings)
				100	* (data starts just after the structure)
				101	* (since ASCII is decoded from UTF-8, the utf8 string are the data)
				102
				103	- compact:
				104
				105	* structure = PyCompactUnicodeObject
				106	* test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
				107	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				108	PyUnicode_4BYTE_KIND
				109	* compact = 1
				110	* ready = 1
				111	* ascii = 0
				112	* utf8 is not shared with data
				113	* utf8_length = 0 if utf8 is NULL
				114	* wstr is shared with data and wstr_length=length
				115	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
				116	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
				117	* wstr_length = 0 if wstr is NULL
				118	* (data starts just after the structure)
				119
				120	- legacy string, not ready:
				121
				122	* structure = PyUnicodeObject
				123	* test: kind == PyUnicode_WCHAR_KIND
				124	* length = 0 (use wstr_length)
				125	* hash = -1
				126	* kind = PyUnicode_WCHAR_KIND
				127	* compact = 0
				128	* ascii = 0
				129	* ready = 0
				130	* interned = SSTATE_NOT_INTERNED
				131	* wstr is not NULL
				132	* data.any is NULL
				133	* utf8 is NULL
				134	* utf8_length = 0
				135
				136	- legacy string, ready:
				137
				138	* structure = PyUnicodeObject structure
				139	* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
				140	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
				141	PyUnicode_4BYTE_KIND
				142	* compact = 0
				143	* ready = 1
				144	* data.any is not NULL
				145	* utf8 is shared and utf8_length = length with data.any if ascii = 1
				146	* utf8_length = 0 if utf8 is NULL
				147	* wstr is shared with data.any and wstr_length = length
				148	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
				149	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
				150	* wstr_length = 0 if wstr is NULL
				151
				152	Compact strings use only one memory block (structure + characters),
				153	whereas legacy strings use one block for the structure and one block
				154	for characters.
				155
				156	Legacy strings are created by PyUnicode_FromUnicode() and
				157	PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
				158	when PyUnicode_READY() is called.
				159
				160	See also _PyUnicode_CheckConsistency().
				161	*/
				162	PyObject_HEAD
				163	Py_ssize_t length; /* Number of code points in the string */
				164	Py_hash_t hash; /* Hash value; -1 if not set */
				165	struct {
				166	/*
				167	SSTATE_NOT_INTERNED (0)
				168	SSTATE_INTERNED_MORTAL (1)
				169	SSTATE_INTERNED_IMMORTAL (2)
				170
				171	If interned != SSTATE_NOT_INTERNED, the two references from the
				172	dictionary to this object are not counted in ob_refcnt.
				173	*/
				174	unsigned int interned:2;
				175	/* Character size:
				176
				177	- PyUnicode_WCHAR_KIND (0):
				178
				179	* character type = wchar_t (16 or 32 bits, depending on the
				180	platform)
				181
				182	- PyUnicode_1BYTE_KIND (1):
				183
				184	* character type = Py_UCS1 (8 bits, unsigned)
				185	* all characters are in the range U+0000-U+00FF (latin1)
				186	* if ascii is set, all characters are in the range U+0000-U+007F
				187	(ASCII), otherwise at least one character is in the range
				188	U+0080-U+00FF
				189
				190	- PyUnicode_2BYTE_KIND (2):
				191
				192	* character type = Py_UCS2 (16 bits, unsigned)
				193	* all characters are in the range U+0000-U+FFFF (BMP)
				194	* at least one character is in the range U+0100-U+FFFF
				195
				196	- PyUnicode_4BYTE_KIND (4):
				197
				198	* character type = Py_UCS4 (32 bits, unsigned)
				199	* all characters are in the range U+0000-U+10FFFF
				200	* at least one character is in the range U+10000-U+10FFFF
				201	*/
				202	unsigned int kind:3;
				203	/* Compact is with respect to the allocation scheme. Compact unicode
				204	objects only require one memory block while non-compact objects use
				205	one block for the PyUnicodeObject struct and another for its data
				206	buffer. */
				207	unsigned int compact:1;
				208	/* The string only contains characters in the range U+0000-U+007F (ASCII)
				209	and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
				210	set, use the PyASCIIObject structure. */
				211	unsigned int ascii:1;
				212	/* The ready flag indicates whether the object layout is initialized
				213	completely. This means that this is either a compact object, or
				214	the data pointer is filled out. The bit is redundant, and helps
				215	to minimize the test in PyUnicode_IS_READY(). */
				216	unsigned int ready:1;
				217	/* Padding to ensure that PyUnicode_DATA() is always aligned to
				218	4 bytes (see issue #19537 on m68k). */
				219	unsigned int :24;
				220	} state;
				221	wchar_t wstr; / wchar_t representation (null-terminated) */
				222	} PyASCIIObject;
				223
				224	/* Non-ASCII strings allocated through PyUnicode_New use the
				225	PyCompactUnicodeObject structure. state.compact is set, and the data
				226	immediately follow the structure. */
				227	typedef struct {
				228	PyASCIIObject _base;
				229	Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
				230	* terminating \0. */
				231	char utf8; / UTF-8 representation (null-terminated) */
				232	Py_ssize_t wstr_length; /* Number of code points in wstr, possible
				233	* surrogates count as two code points. */
				234	} PyCompactUnicodeObject;
				235
				236	/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
				237	PyUnicodeObject structure. The actual string data is initially in the wstr
				238	block, and copied into the data block using _PyUnicode_Ready. */
				239	typedef struct {
				240	PyCompactUnicodeObject _base;
				241	union {
				242	void *any;
				243	Py_UCS1 *latin1;
				244	Py_UCS2 *ucs2;
				245	Py_UCS4 *ucs4;
				246	} data; /* Canonical, smallest-form Unicode buffer */
				247	} PyUnicodeObject;
				248
				249	/* Fast access macros */
				250	#define PyUnicode_WSTR_LENGTH(op) \
				251	(PyUnicode_IS_COMPACT_ASCII(op) ? \
				252	((PyASCIIObject*)op)->length : \
				253	((PyCompactUnicodeObject*)op)->wstr_length)
				254
				255	/* Returns the deprecated Py_UNICODE representation's size in code units
				256	(this includes surrogate pairs as 2 units).
				257	If the Py_UNICODE representation is not available, it will be computed
				258	on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
				259
				260	#define PyUnicode_GET_SIZE(op) \
				261	(assert(PyUnicode_Check(op)), \
				262	(((PyASCIIObject *)(op))->wstr) ? \
				263	PyUnicode_WSTR_LENGTH(op) : \
				264	((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
				265	assert(((PyASCIIObject *)(op))->wstr), \
				266	PyUnicode_WSTR_LENGTH(op)))
				267	/* Py_DEPRECATED(3.3) */
				268
				269	#define PyUnicode_GET_DATA_SIZE(op) \
				270	(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
				271	/* Py_DEPRECATED(3.3) */
				272
				273	/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
				274	representation on demand. Using this macro is very inefficient now,
				275	try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
				276	use PyUnicode_WRITE() and PyUnicode_READ(). */
				277
				278	#define PyUnicode_AS_UNICODE(op) \
				279	(assert(PyUnicode_Check(op)), \
				280	(((PyASCIIObject )(op))->wstr) ? (((PyASCIIObject )(op))->wstr) : \
				281	PyUnicode_AsUnicode(_PyObject_CAST(op)))
				282	/* Py_DEPRECATED(3.3) */
				283
				284	#define PyUnicode_AS_DATA(op) \
				285	((const char *)(PyUnicode_AS_UNICODE(op)))
				286	/* Py_DEPRECATED(3.3) */
				287
				288
				289	/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
				290
				291	/* Values for PyASCIIObject.state: */
				292
				293	/* Interning state. */
				294	#define SSTATE_NOT_INTERNED 0
				295	#define SSTATE_INTERNED_MORTAL 1
				296	#define SSTATE_INTERNED_IMMORTAL 2
				297
				298	/* Return true if the string contains only ASCII characters, or 0 if not. The
				299	string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
				300	ready. */
				301	#define PyUnicode_IS_ASCII(op) \
				302	(assert(PyUnicode_Check(op)), \
				303	assert(PyUnicode_IS_READY(op)), \
				304	((PyASCIIObject*)op)->state.ascii)
				305
				306	/* Return true if the string is compact or 0 if not.
				307	No type checks or Ready calls are performed. */
				308	#define PyUnicode_IS_COMPACT(op) \
				309	(((PyASCIIObject*)(op))->state.compact)
				310
				311	/* Return true if the string is a compact ASCII string (use PyASCIIObject
				312	structure), or 0 if not. No type checks or Ready calls are performed. */
				313	#define PyUnicode_IS_COMPACT_ASCII(op) \
				314	(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
				315
				316	enum PyUnicode_Kind {
				317	/* String contains only wstr byte characters. This is only possible
				318	when the string was created with a legacy API and _PyUnicode_Ready()
				319	has not been called yet. */
				320	PyUnicode_WCHAR_KIND = 0,
				321	/* Return values of the PyUnicode_KIND() macro: */
				322	PyUnicode_1BYTE_KIND = 1,
				323	PyUnicode_2BYTE_KIND = 2,
				324	PyUnicode_4BYTE_KIND = 4
				325	};
				326
				327	/* Return pointers to the canonical representation cast to unsigned char,
				328	Py_UCS2, or Py_UCS4 for direct character access.
				329	No checks are performed, use PyUnicode_KIND() before to ensure
				330	these will work correctly. */
				331
				332	#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
				333	#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
				334	#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
				335
				336	/* Return one of the PyUnicode__KIND values defined above. /
				337	#define PyUnicode_KIND(op) \
				338	(assert(PyUnicode_Check(op)), \
				339	assert(PyUnicode_IS_READY(op)), \
				340	((PyASCIIObject *)(op))->state.kind)
				341
				342	/* Return a void pointer to the raw unicode buffer. */
				343	#define _PyUnicode_COMPACT_DATA(op) \
				344	(PyUnicode_IS_ASCII(op) ? \
				345	((void)((PyASCIIObject)(op) + 1)) : \
				346	((void)((PyCompactUnicodeObject)(op) + 1)))
				347
				348	#define _PyUnicode_NONCOMPACT_DATA(op) \
				349	(assert(((PyUnicodeObject*)(op))->data.any), \
				350	((((PyUnicodeObject *)(op))->data.any)))
				351
				352	#define PyUnicode_DATA(op) \
				353	(assert(PyUnicode_Check(op)), \
				354	PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
				355	_PyUnicode_NONCOMPACT_DATA(op))
				356
				357	/* In the access macros below, "kind" may be evaluated more than once.
				358	All other macro parameters are evaluated exactly once, so it is safe
				359	to put side effects into them (such as increasing the index). */
				360
				361	/* Write into the canonical representation, this macro does not do any sanity
				362	checks and is intended for usage in loops. The caller should cache the
				363	kind and data pointers obtained from other macro calls.
				364	index is the index in the string (starts at 0) and value is the new
				365	code point value which should be written to that location. */
				366	#define PyUnicode_WRITE(kind, data, index, value) \
				367	do { \
				368	switch ((kind)) { \
				369	case PyUnicode_1BYTE_KIND: { \
				370	((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
				371	break; \
				372	} \
				373	case PyUnicode_2BYTE_KIND: { \
				374	((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
				375	break; \
				376	} \
				377	default: { \
				378	assert((kind) == PyUnicode_4BYTE_KIND); \
				379	((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
				380	} \
				381	} \
				382	} while (0)
				383
				384	/* Read a code point from the string's canonical representation. No checks
				385	or ready calls are performed. */
				386	#define PyUnicode_READ(kind, data, index) \
				387	((Py_UCS4) \
				388	((kind) == PyUnicode_1BYTE_KIND ? \
				389	((const Py_UCS1 *)(data))[(index)] : \
				390	((kind) == PyUnicode_2BYTE_KIND ? \
				391	((const Py_UCS2 *)(data))[(index)] : \
				392	((const Py_UCS4 *)(data))[(index)] \
				393	) \
				394	))
				395
				396	/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
				397	calls PyUnicode_KIND() and might call it twice. For single reads, use
				398	PyUnicode_READ_CHAR, for multiple consecutive reads callers should
				399	cache kind and use PyUnicode_READ instead. */
				400	#define PyUnicode_READ_CHAR(unicode, index) \
				401	(assert(PyUnicode_Check(unicode)), \
				402	assert(PyUnicode_IS_READY(unicode)), \
				403	(Py_UCS4) \
				404	(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
				405	((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
				406	(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
				407	((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
				408	((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
				409	) \
				410	))
				411
				412	/* Returns the length of the unicode string. The caller has to make sure that
				413	the string has it's canonical representation set before calling
				414	this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
				415	#define PyUnicode_GET_LENGTH(op) \
				416	(assert(PyUnicode_Check(op)), \
				417	assert(PyUnicode_IS_READY(op)), \
				418	((PyASCIIObject *)(op))->length)
				419
				420
				421	/* Fast check to determine whether an object is ready. Equivalent to
				422	PyUnicode_IS_COMPACT(op) \|\| ((PyUnicodeObject)(op))->data.any) /
				423
				424	#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
				425
				426	/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
				427	case. If the canonical representation is not yet set, it will still call
				428	_PyUnicode_Ready().
				429	Returns 0 on success and -1 on errors. */
				430	#define PyUnicode_READY(op) \
				431	(assert(PyUnicode_Check(op)), \
				432	(PyUnicode_IS_READY(op) ? \
				433	0 : _PyUnicode_Ready(_PyObject_CAST(op))))
				434
				435	/* Return a maximum character value which is suitable for creating another
				436	string based on op. This is always an approximation but more efficient
				437	than iterating over the string. */
				438	#define PyUnicode_MAX_CHAR_VALUE(op) \
				439	(assert(PyUnicode_IS_READY(op)), \
				440	(PyUnicode_IS_ASCII(op) ? \
				441	(0x7f) : \
				442	(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
				443	(0xffU) : \
				444	(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
				445	(0xffffU) : \
				446	(0x10ffffU)))))
				447
				448	/* === Public API ========================================================= */
				449
				450	/* --- Plain Py_UNICODE --------------------------------------------------- */
				451
				452	/* With PEP 393, this is the recommended way to allocate a new unicode object.
				453	This function will allocate the object and its buffer in a single memory
				454	block. Objects created using this function are not resizable. */
				455	PyAPI_FUNC(PyObject*) PyUnicode_New(
				456	Py_ssize_t size, /* Number of code points in the new string */
				457	Py_UCS4 maxchar /* maximum code point value in the string */
				458	);
				459
				460	/* Initializes the canonical string representation from the deprecated
				461	wstr/Py_UNICODE representation. This function is used to convert Unicode
				462	objects which were created using the old API to the new flexible format
				463	introduced with PEP 393.
				464
				465	Don't call this function directly, use the public PyUnicode_READY() macro
				466	instead. */
				467	PyAPI_FUNC(int) _PyUnicode_Ready(
				468	PyObject unicode / Unicode object */
				469	);
				470
				471	/* Get a copy of a Unicode string. */
				472	PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
				473	PyObject *unicode
				474	);
				475
				476	/* Copy character from one unicode object into another, this function performs
				477	character conversion when necessary and falls back to memcpy() if possible.
				478
				479	Fail if to is too small (smaller than how_many or smaller than
				480	len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
				481	kind(to), or if to has more than 1 reference.
				482
				483	Return the number of written character, or return -1 and raise an exception
				484	on error.
				485
				486	Pseudo-code:
				487
				488	how_many = min(how_many, len(from) - from_start)
				489	to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
				490	return how_many
				491
				492	Note: The function doesn't write a terminating null character.
				493	*/
				494	PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
				495	PyObject *to,
				496	Py_ssize_t to_start,
				497	PyObject *from,
				498	Py_ssize_t from_start,
				499	Py_ssize_t how_many
				500	);
				501
				502	/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
				503	may crash if parameters are invalid (e.g. if the output string
				504	is too short). */
				505	PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
				506	PyObject *to,
				507	Py_ssize_t to_start,
				508	PyObject *from,
				509	Py_ssize_t from_start,
				510	Py_ssize_t how_many
				511	);
				512
				513	/* Fill a string with a character: write fill_char into
				514	unicode[start:start+length].
				515
				516	Fail if fill_char is bigger than the string maximum character, or if the
				517	string has more than 1 reference.
				518
				519	Return the number of written character, or return -1 and raise an exception
				520	on error. */
				521	PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
				522	PyObject *unicode,
				523	Py_ssize_t start,
				524	Py_ssize_t length,
				525	Py_UCS4 fill_char
				526	);
				527
				528	/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
				529	if parameters are invalid (e.g. if length is longer than the string). */
				530	PyAPI_FUNC(void) _PyUnicode_FastFill(
				531	PyObject *unicode,
				532	Py_ssize_t start,
				533	Py_ssize_t length,
				534	Py_UCS4 fill_char
				535	);
				536
				537	/* Create a Unicode Object from the Py_UNICODE buffer u of the given
				538	size.
				539
				540	u may be NULL which causes the contents to be undefined. It is the
				541	user's responsibility to fill in the needed data afterwards. Note
				542	that modifying the Unicode object contents after construction is
				543	only allowed if u was set to NULL.
				544
				545	The buffer is copied into the new object. */
				546	PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
				547	const Py_UNICODE u, / Unicode buffer */
				548	Py_ssize_t size /* size of buffer */
				549	) /* Py_DEPRECATED(3.3) */;
				550
				551	/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
				552	Scan the string to find the maximum character. */
				553	PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
				554	int kind,
				555	const void *buffer,
				556	Py_ssize_t size);
				557
				558	/* Create a new string from a buffer of ASCII characters.
				559	WARNING: Don't check if the string contains any non-ASCII character. */
				560	PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
				561	const char *buffer,
				562	Py_ssize_t size);
				563
				564	/* Compute the maximum character of the substring unicode[start:end].
				565	Return 127 for an empty string. */
				566	PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
				567	PyObject *unicode,
				568	Py_ssize_t start,
				569	Py_ssize_t end);
				570
				571	/* Return a read-only pointer to the Unicode object's internal
				572	Py_UNICODE buffer.
				573	If the wchar_t/Py_UNICODE representation is not yet available, this
				574	function will calculate it. */
				575	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
				576	PyObject unicode / Unicode object */
				577	) /* Py_DEPRECATED(3.3) */;
				578
				579	/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
				580	contains null characters. */
				581	PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
				582	PyObject unicode / Unicode object */
				583	);
				584
				585	/* Return a read-only pointer to the Unicode object's internal
				586	Py_UNICODE buffer and save the length at size.
				587	If the wchar_t/Py_UNICODE representation is not yet available, this
				588	function will calculate it. */
				589
				590	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
				591	PyObject unicode, / Unicode object */
				592	Py_ssize_t size / location where to save the length */
				593	) /* Py_DEPRECATED(3.3) */;
				594
				595	/* Get the maximum ordinal for a Unicode character. */
				596	PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3);
				597
				598
				599	/* --- _PyUnicodeWriter API ----------------------------------------------- */
				600
				601	typedef struct {
				602	PyObject *buffer;
				603	void *data;
				604	enum PyUnicode_Kind kind;
				605	Py_UCS4 maxchar;
				606	Py_ssize_t size;
				607	Py_ssize_t pos;
				608
				609	/* minimum number of allocated characters (default: 0) */
				610	Py_ssize_t min_length;
				611
				612	/* minimum character (default: 127, ASCII) */
				613	Py_UCS4 min_char;
				614
				615	/* If non-zero, overallocate the buffer (default: 0). */
				616	unsigned char overallocate;
				617
				618	/* If readonly is 1, buffer is a shared string (cannot be modified)
				619	and size is set to 0. */
				620	unsigned char readonly;
				621	} _PyUnicodeWriter ;
				622
				623	/* Initialize a Unicode writer.
				624	*
				625	* By default, the minimum buffer size is 0 character and overallocation is
				626	* disabled. Set min_length, min_char and overallocate attributes to control
				627	* the allocation of the buffer. */
				628	PyAPI_FUNC(void)
				629	_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
				630
				631	/* Prepare the buffer to write 'length' characters
				632	with the specified maximum character.
				633
				634	Return 0 on success, raise an exception and return -1 on error. */
				635	#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
				636	(((MAXCHAR) <= (WRITER)->maxchar \
				637	&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
				638	? 0 \
				639	: (((LENGTH) == 0) \
				640	? 0 \
				641	: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
				642
				643	/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
				644	instead. */
				645	PyAPI_FUNC(int)
				646	_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
				647	Py_ssize_t length, Py_UCS4 maxchar);
				648
				649	/* Prepare the buffer to have at least the kind KIND.
				650	For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
				651	support characters in range U+000-U+FFFF.
				652
				653	Return 0 on success, raise an exception and return -1 on error. */
				654	#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
				655	(assert((KIND) != PyUnicode_WCHAR_KIND), \
				656	(KIND) <= (WRITER)->kind \
				657	? 0 \
				658	: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
				659
				660	/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
				661	macro instead. */
				662	PyAPI_FUNC(int)
				663	_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
				664	enum PyUnicode_Kind kind);
				665
				666	/* Append a Unicode character.
				667	Return 0 on success, raise an exception and return -1 on error. */
				668	PyAPI_FUNC(int)
				669	_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
				670	Py_UCS4 ch
				671	);
				672
				673	/* Append a Unicode string.
				674	Return 0 on success, raise an exception and return -1 on error. */
				675	PyAPI_FUNC(int)
				676	_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
				677	PyObject str / Unicode string */
				678	);
				679
				680	/* Append a substring of a Unicode string.
				681	Return 0 on success, raise an exception and return -1 on error. */
				682	PyAPI_FUNC(int)
				683	_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
				684	PyObject str, / Unicode string */
				685	Py_ssize_t start,
				686	Py_ssize_t end
				687	);
				688
				689	/* Append an ASCII-encoded byte string.
				690	Return 0 on success, raise an exception and return -1 on error. */
				691	PyAPI_FUNC(int)
				692	_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
				693	const char str, / ASCII-encoded byte string */
				694	Py_ssize_t len /* number of bytes, or -1 if unknown */
				695	);
				696
				697	/* Append a latin1-encoded byte string.
				698	Return 0 on success, raise an exception and return -1 on error. */
				699	PyAPI_FUNC(int)
				700	_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
				701	const char str, / latin1-encoded byte string */
				702	Py_ssize_t len /* length in bytes */
				703	);
				704
				705	/* Get the value of the writer as a Unicode string. Clear the
				706	buffer of the writer. Raise an exception and return NULL
				707	on error. */
				708	PyAPI_FUNC(PyObject *)
				709	_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
				710
				711	/* Deallocate memory of a writer (clear its internal buffer). */
				712	PyAPI_FUNC(void)
				713	_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
				714
				715
				716	/* Format the object based on the format_spec, as defined in PEP 3101
				717	(Advanced String Formatting). */
				718	PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
				719	_PyUnicodeWriter *writer,
				720	PyObject *obj,
				721	PyObject *format_spec,
				722	Py_ssize_t start,
				723	Py_ssize_t end);
				724
Victor Stinner	75e4699	2018-11-26 17:29:38 +0100	[diff] [blame]	725	/* --- wchar_t support for platforms which support it --------------------- */
				726
				727	#ifdef HAVE_WCHAR_H
				728	PyAPI_FUNC(void) _PyUnicode_AsKind(PyObject s, unsigned int kind);
				729	#endif
				730
				731	/* --- Manage the default encoding ---------------------------------------- */
				732
				733	/* Returns a pointer to the default encoding (UTF-8) of the
				734	Unicode object unicode and the size of the encoded representation
				735	in bytes stored in *size.
				736
				737	In case of an error, no *size is set.
				738
				739	This function caches the UTF-8 encoded string in the unicodeobject
				740	and subsequent calls will return the same string. The memory is released
				741	when the unicodeobject is deallocated.
				742
				743	_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
				744	support the previous internal function with the same behaviour.
				745
				746	*** This API is for interpreter INTERNAL USE ONLY and will likely
				747	*** be removed or changed in the future.
				748
				749	*** If you need to access the Unicode object as UTF-8 bytes string,
				750	*** please use PyUnicode_AsUTF8String() instead.
				751	*/
				752
				753	PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
				754	PyObject *unicode,
				755	Py_ssize_t *size);
				756
				757	#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
				758
				759	/* Returns a pointer to the default encoding (UTF-8) of the
				760	Unicode object unicode.
				761
				762	Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
				763	in the unicodeobject.
				764
				765	_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
				766	support the previous internal function with the same behaviour.
				767
				768	Use of this API is DEPRECATED since no size information can be
				769	extracted from the returned data.
				770
				771	*** This API is for interpreter INTERNAL USE ONLY and will likely
				772	*** be removed or changed for Python 3.1.
				773
				774	*** If you need to access the Unicode object as UTF-8 bytes string,
				775	*** please use PyUnicode_AsUTF8String() instead.
				776
				777	*/
				778
				779	PyAPI_FUNC(const char ) PyUnicode_AsUTF8(PyObject unicode);
				780
				781	#define _PyUnicode_AsString PyUnicode_AsUTF8
				782
				783	/* --- Generic Codecs ----------------------------------------------------- */
				784
				785	/* Encodes a Py_UNICODE buffer of the given size and returns a
				786	Python string object. */
				787	PyAPI_FUNC(PyObject*) PyUnicode_Encode(
				788	const Py_UNICODE s, / Unicode char buffer */
				789	Py_ssize_t size, /* number of Py_UNICODE chars to encode */
				790	const char encoding, / encoding */
				791	const char errors / error handling */
				792	) Py_DEPRECATED(3.3);
				793
				794	/* --- UTF-7 Codecs ------------------------------------------------------- */
				795
				796	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
				797	const Py_UNICODE data, / Unicode char buffer */
				798	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				799	int base64SetO, /* Encode RFC2152 Set O characters in base64 */
				800	int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
				801	const char errors / error handling */
				802	) Py_DEPRECATED(3.3);
				803
				804	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
				805	PyObject unicode, / Unicode object */
				806	int base64SetO, /* Encode RFC2152 Set O characters in base64 */
				807	int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
				808	const char errors / error handling */
				809	);
				810
				811	/* --- UTF-8 Codecs ------------------------------------------------------- */
				812
				813	PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
				814	PyObject *unicode,
				815	const char *errors);
				816
				817	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
				818	const Py_UNICODE data, / Unicode char buffer */
				819	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				820	const char errors / error handling */
				821	) Py_DEPRECATED(3.3);
				822
				823	/* --- UTF-32 Codecs ------------------------------------------------------ */
				824
				825	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
				826	const Py_UNICODE data, / Unicode char buffer */
				827	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				828	const char errors, / error handling */
				829	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				830	) Py_DEPRECATED(3.3);
				831
				832	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
				833	PyObject object, / Unicode object */
				834	const char errors, / error handling */
				835	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				836	);
				837
				838	/* --- UTF-16 Codecs ------------------------------------------------------ */
				839
				840	/* Returns a Python string object holding the UTF-16 encoded value of
				841	the Unicode data.
				842
				843	If byteorder is not 0, output is written according to the following
				844	byte order:
				845
				846	byteorder == -1: little endian
				847	byteorder == 0: native byte order (writes a BOM mark)
				848	byteorder == 1: big endian
				849
				850	If byteorder is 0, the output string will always start with the
				851	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				852	prepended.
				853
				854	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
				855	UCS-2. This trick makes it possible to add full UTF-16 capabilities
				856	at a later point without compromising the APIs.
				857
				858	*/
				859	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
				860	const Py_UNICODE data, / Unicode char buffer */
				861	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				862	const char errors, / error handling */
				863	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				864	) Py_DEPRECATED(3.3);
				865
				866	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
				867	PyObject* unicode, /* Unicode object */
				868	const char errors, / error handling */
				869	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				870	);
				871
				872	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				873
				874	/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
				875	chars. */
				876	PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
				877	const char string, / Unicode-Escape encoded string */
				878	Py_ssize_t length, /* size of string */
				879	const char errors, / error handling */
				880	const char *first_invalid_escape / on return, points to first
				881	invalid escaped char in
				882	string. */
				883	);
				884
				885	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
				886	const Py_UNICODE data, / Unicode char buffer */
				887	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
				888	) Py_DEPRECATED(3.3);
				889
				890	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				891
				892	PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
				893	const Py_UNICODE data, / Unicode char buffer */
				894	Py_ssize_t length /* Number of Py_UNICODE chars to encode */
				895	) Py_DEPRECATED(3.3);
				896
Victor Stinner	75e4699	2018-11-26 17:29:38 +0100	[diff] [blame]	897	/* --- Latin-1 Codecs ----------------------------------------------------- */
				898
				899	PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
				900	PyObject* unicode,
				901	const char* errors);
				902
				903	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
				904	const Py_UNICODE data, / Unicode char buffer */
				905	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				906	const char errors / error handling */
				907	) Py_DEPRECATED(3.3);
				908
				909	/* --- ASCII Codecs ------------------------------------------------------- */
				910
				911	PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
				912	PyObject* unicode,
				913	const char* errors);
				914
				915	PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
				916	const Py_UNICODE data, / Unicode char buffer */
				917	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				918	const char errors / error handling */
				919	) Py_DEPRECATED(3.3);
				920
				921	/* --- Character Map Codecs ----------------------------------------------- */
				922
				923	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
				924	const Py_UNICODE data, / Unicode char buffer */
				925	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				926	PyObject mapping, / encoding mapping */
				927	const char errors / error handling */
				928	) Py_DEPRECATED(3.3);
				929
				930	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
				931	PyObject unicode, / Unicode object */
				932	PyObject mapping, / encoding mapping */
				933	const char errors / error handling */
				934	);
				935
				936	/* Translate a Py_UNICODE buffer of the given length by applying a
				937	character mapping table to it and return the resulting Unicode
				938	object.
				939
				940	The mapping table must map Unicode ordinal integers to Unicode strings,
				941	Unicode ordinal integers or None (causing deletion of the character).
				942
				943	Mapping tables may be dictionaries or sequences. Unmapped character
				944	ordinals (ones which cause a LookupError) are left untouched and
				945	are copied as-is.
				946
				947	*/
				948	PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
				949	const Py_UNICODE data, / Unicode char buffer */
				950	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				951	PyObject table, / Translate table */
				952	const char errors / error handling */
				953	) Py_DEPRECATED(3.3);
				954
				955	/* --- MBCS codecs for Windows -------------------------------------------- */
				956
				957	#ifdef MS_WINDOWS
				958	PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
				959	const Py_UNICODE data, / Unicode char buffer */
				960	Py_ssize_t length, /* number of Py_UNICODE chars to encode */
				961	const char errors / error handling */
				962	) Py_DEPRECATED(3.3);
				963	#endif
				964
				965	/* --- Decimal Encoder ---------------------------------------------------- */
				966
				967	/* Takes a Unicode string holding a decimal value and writes it into
				968	an output buffer using standard ASCII digit codes.
				969
				970	The output buffer has to provide at least length+1 bytes of storage
				971	area. The output string is 0-terminated.
				972
				973	The encoder converts whitespace to ' ', decimal characters to their
				974	corresponding ASCII digit and all other Latin-1 characters except
				975	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
				976	are treated as errors. This includes embedded NULL bytes.
				977
				978	Error handling is defined by the errors argument:
				979
				980	NULL or "strict": raise a ValueError
				981	"ignore": ignore the wrong characters (these are not copied to the
				982	output buffer)
				983	"replace": replaces illegal characters with '?'
				984
				985	Returns 0 on success, -1 on failure.
				986
				987	*/
				988
				989	PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
				990	Py_UNICODE s, / Unicode buffer */
				991	Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
				992	char output, / Output buffer; must have size >= length */
				993	const char errors / error handling */
				994	) /* Py_DEPRECATED(3.3) */;
				995
				996	/* Transforms code points that have decimal digit property to the
				997	corresponding ASCII digit code points.
				998
				999	Returns a new Unicode string on success, NULL on failure.
				1000	*/
				1001
				1002	PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
				1003	Py_UNICODE s, / Unicode buffer */
				1004	Py_ssize_t length /* Number of Py_UNICODE chars to transform */
				1005	) /* Py_DEPRECATED(3.3) */;
				1006
				1007	/* Coverts a Unicode object holding a decimal value to an ASCII string
				1008	for using in int, float and complex parsers.
				1009	Transforms code points that have decimal digit property to the
				1010	corresponding ASCII digit code points. Transforms spaces to ASCII.
				1011	Transforms code points starting from the first non-ASCII code point that
				1012	is neither a decimal digit nor a space to the end into '?'. */
				1013
				1014	PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
				1015	PyObject unicode / Unicode object */
				1016	);
				1017
				1018	/* --- Methods & Slots ---------------------------------------------------- */
				1019
				1020	PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
				1021	PyObject *separator,
				1022	PyObject const items,
				1023	Py_ssize_t seqlen
				1024	);
				1025
				1026	/* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
				1027	0 otherwise. The right argument must be ASCII identifier.
				1028	Any error occurs inside will be cleared before return. */
				1029	PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
				1030	PyObject left, / Left string */
				1031	_Py_Identifier right / Right identifier */
				1032	);
				1033
				1034	/* Test whether a unicode is equal to ASCII string. Return 1 if true,
				1035	0 otherwise. The right argument must be ASCII-encoded string.
				1036	Any error occurs inside will be cleared before return. */
				1037	PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
				1038	PyObject *left,
				1039	const char right / ASCII-encoded string */
				1040	);
				1041
				1042	/* Externally visible for str.strip(unicode) */
				1043	PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
				1044	PyObject *self,
				1045	int striptype,
				1046	PyObject *sepobj
				1047	);
				1048
				1049	/* Using explicit passed-in values, insert the thousands grouping
				1050	into the string pointed to by buffer. For the argument descriptions,
				1051	see Objects/stringlib/localeutil.h */
				1052	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
				1053	_PyUnicodeWriter *writer,
				1054	Py_ssize_t n_buffer,
				1055	PyObject *digits,
				1056	Py_ssize_t d_pos,
				1057	Py_ssize_t n_digits,
				1058	Py_ssize_t min_width,
				1059	const char *grouping,
				1060	PyObject *thousands_sep,
				1061	Py_UCS4 *maxchar);
				1062
				1063	/* === Characters Type APIs =============================================== */
				1064
				1065	/* Helper array used by Py_UNICODE_ISSPACE(). */
				1066
				1067	PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
				1068
				1069	/* These should not be used directly. Use the Py_UNICODE_IS* and
				1070	Py_UNICODE_TO* macros instead.
				1071
				1072	These APIs are implemented in Objects/unicodectype.c.
				1073
				1074	*/
				1075
				1076	PyAPI_FUNC(int) _PyUnicode_IsLowercase(
				1077	Py_UCS4 ch /* Unicode character */
				1078	);
				1079
				1080	PyAPI_FUNC(int) _PyUnicode_IsUppercase(
				1081	Py_UCS4 ch /* Unicode character */
				1082	);
				1083
				1084	PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
				1085	Py_UCS4 ch /* Unicode character */
				1086	);
				1087
				1088	PyAPI_FUNC(int) _PyUnicode_IsXidStart(
				1089	Py_UCS4 ch /* Unicode character */
				1090	);
				1091
				1092	PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
				1093	Py_UCS4 ch /* Unicode character */
				1094	);
				1095
				1096	PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
				1097	const Py_UCS4 ch /* Unicode character */
				1098	);
				1099
				1100	PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
				1101	const Py_UCS4 ch /* Unicode character */
				1102	);
				1103
				1104	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
				1105	Py_UCS4 ch /* Unicode character */
				1106	) /* Py_DEPRECATED(3.3) */;
				1107
				1108	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
				1109	Py_UCS4 ch /* Unicode character */
				1110	) /* Py_DEPRECATED(3.3) */;
				1111
				1112	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
				1113	Py_UCS4 ch /* Unicode character */
				1114	) Py_DEPRECATED(3.3);
				1115
				1116	PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
				1117	Py_UCS4 ch, /* Unicode character */
				1118	Py_UCS4 *res
				1119	);
				1120
				1121	PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
				1122	Py_UCS4 ch, /* Unicode character */
				1123	Py_UCS4 *res
				1124	);
				1125
				1126	PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
				1127	Py_UCS4 ch, /* Unicode character */
				1128	Py_UCS4 *res
				1129	);
				1130
				1131	PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
				1132	Py_UCS4 ch, /* Unicode character */
				1133	Py_UCS4 *res
				1134	);
				1135
				1136	PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
				1137	Py_UCS4 ch /* Unicode character */
				1138	);
				1139
				1140	PyAPI_FUNC(int) _PyUnicode_IsCased(
				1141	Py_UCS4 ch /* Unicode character */
				1142	);
				1143
				1144	PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
				1145	Py_UCS4 ch /* Unicode character */
				1146	);
				1147
				1148	PyAPI_FUNC(int) _PyUnicode_ToDigit(
				1149	Py_UCS4 ch /* Unicode character */
				1150	);
				1151
				1152	PyAPI_FUNC(double) _PyUnicode_ToNumeric(
				1153	Py_UCS4 ch /* Unicode character */
				1154	);
				1155
				1156	PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
				1157	Py_UCS4 ch /* Unicode character */
				1158	);
				1159
				1160	PyAPI_FUNC(int) _PyUnicode_IsDigit(
				1161	Py_UCS4 ch /* Unicode character */
				1162	);
				1163
				1164	PyAPI_FUNC(int) _PyUnicode_IsNumeric(
				1165	Py_UCS4 ch /* Unicode character */
				1166	);
				1167
				1168	PyAPI_FUNC(int) _PyUnicode_IsPrintable(
				1169	Py_UCS4 ch /* Unicode character */
				1170	);
				1171
				1172	PyAPI_FUNC(int) _PyUnicode_IsAlpha(
				1173	Py_UCS4 ch /* Unicode character */
				1174	);
				1175
				1176	PyAPI_FUNC(size_t) Py_UNICODE_strlen(
				1177	const Py_UNICODE *u
				1178	) Py_DEPRECATED(3.3);
				1179
				1180	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
				1181	Py_UNICODE *s1,
				1182	const Py_UNICODE *s2) Py_DEPRECATED(3.3);
				1183
				1184	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
				1185	Py_UNICODE s1, const Py_UNICODE s2) Py_DEPRECATED(3.3);
				1186
				1187	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
				1188	Py_UNICODE *s1,
				1189	const Py_UNICODE *s2,
				1190	size_t n) Py_DEPRECATED(3.3);
				1191
				1192	PyAPI_FUNC(int) Py_UNICODE_strcmp(
				1193	const Py_UNICODE *s1,
				1194	const Py_UNICODE *s2
				1195	) Py_DEPRECATED(3.3);
				1196
				1197	PyAPI_FUNC(int) Py_UNICODE_strncmp(
				1198	const Py_UNICODE *s1,
				1199	const Py_UNICODE *s2,
				1200	size_t n
				1201	) Py_DEPRECATED(3.3);
				1202
				1203	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
				1204	const Py_UNICODE *s,
				1205	Py_UNICODE c
				1206	) Py_DEPRECATED(3.3);
				1207
				1208	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
				1209	const Py_UNICODE *s,
				1210	Py_UNICODE c
				1211	) Py_DEPRECATED(3.3);
				1212
				1213	PyAPI_FUNC(PyObject) _PyUnicode_FormatLong(PyObject , int, int, int);
				1214
				1215	/* Create a copy of a unicode string ending with a nul character. Return NULL
				1216	and raise a MemoryError exception on memory allocation failure, otherwise
				1217	return a new allocated buffer (use PyMem_Free() to free the buffer). */
				1218
				1219	PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
				1220	PyObject *unicode
				1221	) Py_DEPRECATED(3.3);
				1222
				1223	/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
				1224	PyAPI_FUNC(PyObject) _PyUnicode_FromId(_Py_Identifier);
				1225	/* Clear all static strings. */
				1226	PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
				1227
				1228	/* Fast equality check when the inputs are known to be exact unicode types
				1229	and where the hash values are equal (i.e. a very probable match) */
				1230	PyAPI_FUNC(int) _PyUnicode_EQ(PyObject , PyObject );
				1231
				1232	#ifdef __cplusplus
				1233	}
				1234	#endif