Blame - Include/unicodeobject.h - platform/external/python/cpython3

blob: bed3b7b8a1a176f73c3f293019baeda6bf1ffa81 [file] [log] [blame]

Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1	#ifndef Py_UNICODEOBJECT_H
				2	#define Py_UNICODEOBJECT_H
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	3
				4	/*
				5
				6	Unicode implementation based on original code by Fredrik Lundh,
				7	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
				8	Unicode Integration Proposal (see file Misc/unicode.txt).
				9
				10	(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
				11
				12
				13	Original header:
				14	--------------------------------------------------------------------
				15
				16	* Yet another Unicode string type for Python. This type supports the
				17	* 16-bit Basic Multilingual Plane (BMP) only.
				18	*
				19	* Written by Fredrik Lundh, January 1999.
				20	*
				21	* Copyright (c) 1999 by Secret Labs AB.
				22	* Copyright (c) 1999 by Fredrik Lundh.
				23	*
				24	* fredrik@pythonware.com
				25	* http://www.pythonware.com
				26	*
				27	* --------------------------------------------------------------------
				28	* This Unicode String Type is
				29	*
				30	* Copyright (c) 1999 by Secret Labs AB
				31	* Copyright (c) 1999 by Fredrik Lundh
				32	*
				33	* By obtaining, using, and/or copying this software and/or its
				34	* associated documentation, you agree that you have read, understood,
				35	* and will comply with the following terms and conditions:
				36	*
				37	* Permission to use, copy, modify, and distribute this software and its
				38	* associated documentation for any purpose and without fee is hereby
				39	* granted, provided that the above copyright notice appears in all
				40	* copies, and that both that copyright notice and this permission notice
				41	* appear in supporting documentation, and that the name of Secret Labs
				42	* AB or the author not be used in advertising or publicity pertaining to
				43	* distribution of the software without specific, written prior
				44	* permission.
				45	*
				46	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
				47	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
				48	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
				49	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
				50	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
				51	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
				52	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
				53	* -------------------------------------------------------------------- */
				54
				55	#include "ctype.h"
				56
				57	/* === Internal API ======================================================= */
				58
				59	/* --- Internal Unicode Format -------------------------------------------- */
				60
				61	/* Set these flags if the platform has "wchar.h", "wctype.h" and the
				62	wchar_t type is a 16-bit unsigned type */
				63	/* #define HAVE_WCHAR_H */
				64	/* #define HAVE_USABLE_WCHAR_T */
				65
				66	/* Defaults for various platforms */
				67	#ifndef HAVE_USABLE_WCHAR_T
				68
				69	/* Windows has a usable wchar_t type */
				70	# if defined(MS_WIN32)
				71	# define HAVE_USABLE_WCHAR_T
				72	# endif
				73
				74	#endif
				75
				76	/* If the compiler provides a wchar_t type we try to support it
				77	through the interface functions PyUnicode_FromWideChar() and
				78	PyUnicode_AsWideChar(). */
				79
				80	#ifdef HAVE_USABLE_WCHAR_T
				81	# define HAVE_WCHAR_H
				82	#endif
				83
				84	#ifdef HAVE_WCHAR_H
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	85	/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
				86	# ifdef _HAVE_BSDI
				87	# include <time.h>
				88	# endif
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	89	# include "wchar.h"
				90	#endif
				91
				92	#ifdef HAVE_USABLE_WCHAR_T
				93
				94	/* If the compiler defines whcar_t as a 16-bit unsigned type we can
				95	use the compiler type directly. Works fine with all modern Windows
				96	platforms. */
				97
				98	typedef wchar_t Py_UNICODE;
				99
				100	#else
				101
				102	/* Use if you have a standard ANSI compiler, without wchar_t support.
				103	If a short is not 16 bits on your platform, you have to fix the
				104	typedef below, or the module initialization code will complain. */
				105
				106	typedef unsigned short Py_UNICODE;
				107
				108	#endif
				109
				110	/* --- Internal Unicode Operations ---------------------------------------- */
				111
				112	/* If you want Python to use the compiler's wctype.h functions instead
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	113	of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
				114	configure Python using --with-ctype-functions. This reduces the
				115	interpreter's code size. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	116
				117	#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
				118
				119	#include "wctype.h"
				120
				121	#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
				122
				123	#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
				124	#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
				125	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				126	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				127
				128	#define Py_UNICODE_TOLOWER(ch) towlower(ch)
				129	#define Py_UNICODE_TOUPPER(ch) towupper(ch)
				130	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				131
				132	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				133	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				134	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
				135
				136	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				137	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				138	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				139
				140	#else
				141
				142	#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
				143
				144	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
				145	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
				146	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				147	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				148
				149	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
				150	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
				151	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				152
				153	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				154	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				155	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
				156
				157	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				158	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				159	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				160
				161	#endif
				162
				163	#define Py_UNICODE_COPY(target, source, length)\
				164	(memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
				165
				166	#define Py_UNICODE_FILL(target, value, length) do\
				167	{int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
				168	while (0)
				169
				170	#define Py_UNICODE_MATCH(string, offset, substring)\
				171	(!memcmp((string)->str + (offset), (substring)->str,\
				172	(substring)->length*sizeof(Py_UNICODE)))
				173
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	174	#ifdef __cplusplus
				175	extern "C" {
				176	#endif
				177
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	178	/* --- Unicode Type ------------------------------------------------------- */
				179
				180	typedef struct {
				181	PyObject_HEAD
				182	int length; /* Length of raw Unicode data in buffer */
				183	Py_UNICODE str; / Raw Unicode buffer */
				184	long hash; /* Hash value; -1 if not set */
				185	PyObject utf8str; / UTF-8 encoded version as Python string,
				186	or NULL */
				187	} PyUnicodeObject;
				188
				189	extern DL_IMPORT(PyTypeObject) PyUnicode_Type;
				190
				191	#define PyUnicode_Check(op) (((op)->ob_type == &PyUnicode_Type))
				192
				193	/* Fast access macros */
				194	#define PyUnicode_GET_SIZE(op) \
				195	(((PyUnicodeObject *)(op))->length)
				196	#define PyUnicode_GET_DATA_SIZE(op) \
				197	(((PyUnicodeObject )(op))->length sizeof(Py_UNICODE))
				198	#define PyUnicode_AS_UNICODE(op) \
				199	(((PyUnicodeObject *)(op))->str)
				200	#define PyUnicode_AS_DATA(op) \
				201	((const char )((PyUnicodeObject )(op))->str)
				202
				203	/* --- Constants ---------------------------------------------------------- */
				204
				205	/* This Unicode character will be used as replacement character during
				206	decoding if the errors argument is set to "replace". Note: the
				207	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
				208	Unicode 3.0. */
				209
				210	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
				211
				212	/* === Public API ========================================================= */
				213
				214	/* --- Plain Py_UNICODE --------------------------------------------------- */
				215
				216	/* Create a Unicode Object from the Py_UNICODE buffer u of the given
				217	size. u may be NULL which causes the contents to be undefined. It
				218	is the user's responsibility to fill in the needed data.
				219
				220	The buffer is copied into the new object. */
				221
				222	extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
				223	const Py_UNICODE u, / Unicode buffer */
				224	int size /* size of buffer */
				225	);
				226
				227	/* Return a read-only pointer to the Unicode object's internal
				228	Py_UNICODE buffer. */
				229
				230	extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
				231	PyObject unicode / Unicode object */
				232	);
				233
				234	/* Get the length of the Unicode object. */
				235
				236	extern DL_IMPORT(int) PyUnicode_GetSize(
				237	PyObject unicode / Unicode object */
				238	);
				239
Guido van Rossum	52c2359	2000-04-10 13:41:41 +0000	[diff] [blame]	240	/* Resize an already allocated Unicode object to the new size length.
				241
				242	*unicode is modified to point to the new (resized) object and 0
				243	returned on success.
				244
				245	This API may only be called by the function which also called the
				246	Unicode constructor. The refcount on the object must be 1. Otherwise,
				247	an error is returned.
				248
				249	Error handling is implemented as follows: an exception is set, -1
				250	is returned and *unicode left untouched.
				251
				252	*/
				253
				254	extern DL_IMPORT(int) PyUnicode_Resize(
				255	PyObject *unicode, / Pointer to the Unicode object */
				256	int length /* New length */
				257	);
				258
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	259	/* Coerce obj to an Unicode object and return a reference with
				260	incremented refcount.
				261
				262	Coercion is done in the following way:
				263
				264	1. Unicode objects are passed back as-is with incremented
				265	refcount.
				266
				267	2. String and other char buffer compatible objects are decoded
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	268	under the assumptions that they contain data using the current
				269	default encoding. Decoding is done in "strict" mode.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	270
				271	3. All other objects raise an exception.
				272
				273	The API returns NULL in case of an error. The caller is responsible
				274	for decref'ing the returned objects.
				275
				276	*/
				277
				278	extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
				279	register PyObject obj / Object */
				280	);
				281
				282	/* --- wchar_t support for platforms which support it --------------------- */
				283
				284	#ifdef HAVE_WCHAR_H
				285
				286	/* Create a Unicode Object from the whcar_t buffer w of the given
				287	size.
				288
				289	The buffer is copied into the new object. */
				290
				291	extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
				292	register const wchar_t w, / wchar_t buffer */
				293	int size /* size of buffer */
				294	);
				295
				296	/* Copies the Unicode Object contents into the whcar_t buffer w. At
				297	most size wchar_t characters are copied.
				298
				299	Returns the number of wchar_t characters copied or -1 in case of an
				300	error. */
				301
				302	extern DL_IMPORT(int) PyUnicode_AsWideChar(
				303	PyUnicodeObject unicode, / Unicode object */
				304	register wchar_t w, / wchar_t buffer */
				305	int size /* size of buffer */
				306	);
				307
				308	#endif
				309
				310	/* === Builtin Codecs =====================================================
				311
				312	Many of these APIs take two arguments encoding and errors. These
				313	parameters encoding and errors have the same semantics as the ones
				314	of the builtin unicode() API.
				315
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	316	Setting encoding to NULL causes the default encoding to be used.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	317
				318	Error handling is set by errors which may also be set to NULL
				319	meaning to use the default handling defined for the codec. Default
				320	error handling for all builtin codecs is "strict" (ValueErrors are
				321	raised).
				322
				323	The codecs all use a similar interface. Only deviation from the
				324	generic ones are documented.
				325
				326	*/
				327
Fred Drake	cb093fe	2000-05-09 19:51:53 +0000	[diff] [blame]	328	/* --- Manage the default encoding ---------------------------------------- */
				329
				330	/* Returns the currently active default encoding.
				331
				332	The default encoding is currently implemented as run-time settable
				333	process global. This may change in future versions of the
				334	interpreter to become a parameter which is managed on a per-thread
				335	basis.
				336
				337	*/
				338
				339	extern DL_IMPORT(const char*) PyUnicode_GetDefaultEncoding();
				340
				341	/* Sets the currently active default encoding.
				342
				343	Returns 0 on success, -1 in case of an error.
				344
				345	*/
				346
				347	extern DL_IMPORT(int) PyUnicode_SetDefaultEncoding(
				348	const char encoding / Encoding name in standard form */
				349	);
				350
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	351	/* --- Generic Codecs ----------------------------------------------------- */
				352
				353	/* Create a Unicode object by decoding the encoded string s of the
				354	given size. */
				355
				356	extern DL_IMPORT(PyObject*) PyUnicode_Decode(
				357	const char s, / encoded string */
				358	int size, /* size of buffer */
				359	const char encoding, / encoding */
				360	const char errors / error handling */
				361	);
				362
				363	/* Encodes a Py_UNICODE buffer of the given size and returns a
				364	Python string object. */
				365
				366	extern DL_IMPORT(PyObject*) PyUnicode_Encode(
				367	const Py_UNICODE s, / Unicode char buffer */
				368	int size, /* number of Py_UNICODE chars to encode */
				369	const char encoding, / encoding */
				370	const char errors / error handling */
				371	);
				372
				373	/* Encodes a Unicode object and returns the result as Python string
				374	object. */
				375
				376	extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
				377	PyObject unicode, / Unicode object */
				378	const char encoding, / encoding */
				379	const char errors / error handling */
				380	);
				381
				382	/* --- UTF-8 Codecs ------------------------------------------------------- */
				383
				384	extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
				385	const char string, / UTF-8 encoded string */
				386	int length, /* size of string */
				387	const char errors / error handling */
				388	);
				389
				390	extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
				391	PyObject unicode / Unicode object */
				392	);
				393
				394	extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
				395	const Py_UNICODE data, / Unicode char buffer */
				396	int length, /* number of Py_UNICODE chars to encode */
				397	const char errors / error handling */
				398	);
				399
				400	/* --- UTF-16 Codecs ------------------------------------------------------ */
				401
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	402	/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	403	the corresponding Unicode object.
				404
				405	errors (if non-NULL) defines the error handling. It defaults
				406	to "strict".
				407
				408	If byteorder is non-NULL, the decoder starts decoding using the
				409	given byte order:
				410
				411	*byteorder == -1: little endian
				412	*byteorder == 0: native order
				413	*byteorder == 1: big endian
				414
				415	and then switches according to all BOM marks it finds in the input
				416	data. BOM marks are not copied into the resulting Unicode string.
				417	After completion, *byteorder is set to the current byte order at
				418	the end of input data.
				419
				420	If byteorder is NULL, the codec starts in native order mode.
				421
				422	*/
				423
				424	extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
				425	const char string, / UTF-16 encoded string */
				426	int length, /* size of string */
				427	const char errors, / error handling */
				428	int byteorder / pointer to byteorder to use
				429	0=native;-1=LE,1=BE; updated on
				430	exit */
				431	);
				432
				433	/* Returns a Python string using the UTF-16 encoding in native byte
				434	order. The string always starts with a BOM mark. */
				435
				436	extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
				437	PyObject unicode / Unicode object */
				438	);
				439
				440	/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	441	the Unicode data.
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	442
				443	If byteorder is not 0, output is written according to the following
				444	byte order:
				445
				446	byteorder == -1: little endian
				447	byteorder == 0: native byte order (writes a BOM mark)
				448	byteorder == 1: big endian
				449
				450	If byteorder is 0, the output string will always start with the
				451	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				452	prepended.
				453
				454	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
				455	UCS-2. This trick makes it possible to add full UTF-16 capabilities
				456	at a later point without comprimising the APIs.
				457
				458	*/
				459
				460	extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
				461	const Py_UNICODE data, / Unicode char buffer */
				462	int length, /* number of Py_UNICODE chars to encode */
				463	const char errors, / error handling */
				464	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				465	);
				466
				467	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				468
				469	extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
				470	const char string, / Unicode-Escape encoded string */
				471	int length, /* size of string */
				472	const char errors / error handling */
				473	);
				474
				475	extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
				476	PyObject unicode / Unicode object */
				477	);
				478
				479	extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
				480	const Py_UNICODE data, / Unicode char buffer */
				481	int length /* Number of Py_UNICODE chars to encode */
				482	);
				483
				484	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				485
				486	extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
				487	const char string, / Raw-Unicode-Escape encoded string */
				488	int length, /* size of string */
				489	const char errors / error handling */
				490	);
				491
				492	extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
				493	PyObject unicode / Unicode object */
				494	);
				495
				496	extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
				497	const Py_UNICODE data, / Unicode char buffer */
				498	int length /* Number of Py_UNICODE chars to encode */
				499	);
				500
				501	/* --- Latin-1 Codecs -----------------------------------------------------
				502
				503	Note: Latin-1 corresponds to the first 256 Unicode ordinals.
				504
				505	*/
				506
				507	extern DL_IMPORT(PyObject*) PyUnicode_DecodeLatin1(
				508	const char string, / Latin-1 encoded string */
				509	int length, /* size of string */
				510	const char errors / error handling */
				511	);
				512
				513	extern DL_IMPORT(PyObject*) PyUnicode_AsLatin1String(
				514	PyObject unicode / Unicode object */
				515	);
				516
				517	extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
				518	const Py_UNICODE data, / Unicode char buffer */
				519	int length, /* Number of Py_UNICODE chars to encode */
				520	const char errors / error handling */
				521	);
				522
				523	/* --- ASCII Codecs -------------------------------------------------------
				524
				525	Only 7-bit ASCII data is excepted. All other codes generate errors.
				526
				527	*/
				528
				529	extern DL_IMPORT(PyObject*) PyUnicode_DecodeASCII(
				530	const char string, / ASCII encoded string */
				531	int length, /* size of string */
				532	const char errors / error handling */
				533	);
				534
				535	extern DL_IMPORT(PyObject*) PyUnicode_AsASCIIString(
				536	PyObject unicode / Unicode object */
				537	);
				538
				539	extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
				540	const Py_UNICODE data, / Unicode char buffer */
				541	int length, /* Number of Py_UNICODE chars to encode */
				542	const char errors / error handling */
				543	);
				544
				545	/* --- Character Map Codecs -----------------------------------------------
				546
				547	This codec uses mappings to encode and decode characters.
				548
				549	Decoding mappings must map single string characters to single
				550	Unicode characters, integers (which are then interpreted as Unicode
				551	ordinals) or None (meaning "undefined mapping" and causing an
				552	error).
				553
				554	Encoding mappings must map single Unicode characters to single
				555	string characters, integers (which are then interpreted as Latin-1
				556	ordinals) or None (meaning "undefined mapping" and causing an
				557	error).
				558
				559	If a character lookup fails with a LookupError, the character is
				560	copied as-is meaning that its ordinal value will be interpreted as
				561	Unicode or Latin-1 ordinal resp. Because of this mappings only need
				562	to contain those mappings which map characters to different code
				563	points.
				564
				565	*/
				566
				567	extern DL_IMPORT(PyObject*) PyUnicode_DecodeCharmap(
				568	const char string, / Encoded string */
				569	int length, /* size of string */
				570	PyObject mapping, / character mapping
				571	(char ordinal -> unicode ordinal) */
				572	const char errors / error handling */
				573	);
				574
				575	extern DL_IMPORT(PyObject*) PyUnicode_AsCharmapString(
				576	PyObject unicode, / Unicode object */
				577	PyObject mapping / character mapping
				578	(unicode ordinal -> char ordinal) */
				579	);
				580
				581	extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
				582	const Py_UNICODE data, / Unicode char buffer */
				583	int length, /* Number of Py_UNICODE chars to encode */
				584	PyObject mapping, / character mapping
				585	(unicode ordinal -> char ordinal) */
				586	const char errors / error handling */
				587	);
				588
				589	/* Translate a Py_UNICODE buffer of the given length by applying a
				590	character mapping table to it and return the resulting Unicode
				591	object.
				592
				593	The mapping table must map Unicode ordinal integers to Unicode
				594	ordinal integers or None (causing deletion of the character).
				595
				596	Mapping tables may be dictionaries or sequences. Unmapped character
				597	ordinals (ones which cause a LookupError) are left untouched and
				598	are copied as-is.
				599
				600	*/
				601
				602	extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
				603	const Py_UNICODE data, / Unicode char buffer */
				604	int length, /* Number of Py_UNICODE chars to encode */
				605	PyObject table, / Translate table */
				606	const char errors / error handling */
				607	);
				608
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	609	#ifdef MS_WIN32
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	610
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	611	/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	612
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	613	extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
				614	const char string, / MBCS encoded string */
				615	int length, /* size of string */
				616	const char errors / error handling */
				617	);
				618
				619	extern DL_IMPORT(PyObject*) PyUnicode_AsMBCSString(
				620	PyObject unicode / Unicode object */
				621	);
				622
				623	extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
				624	const Py_UNICODE data, / Unicode char buffer */
				625	int length, /* Number of Py_UNICODE chars to encode */
				626	const char errors / error handling */
				627	);
				628
Guido van Rossum	efec115	2000-03-28 02:01:15 +0000	[diff] [blame]	629	#endif /* MS_WIN32 */
Guido van Rossum	24bdb04	2000-03-28 20:29:59 +0000	[diff] [blame]	630
Guido van Rossum	9e896b3	2000-04-05 20:11:21 +0000	[diff] [blame]	631	/* --- Decimal Encoder ---------------------------------------------------- */
				632
				633	/* Takes a Unicode string holding a decimal value and writes it into
				634	an output buffer using standard ASCII digit codes.
				635
				636	The output buffer has to provide at least length+1 bytes of storage
				637	area. The output string is 0-terminated.
				638
				639	The encoder converts whitespace to ' ', decimal characters to their
				640	corresponding ASCII digit and all other Latin-1 characters except
				641	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
				642	are treated as errors. This includes embedded NULL bytes.
				643
				644	Error handling is defined by the errors argument:
				645
				646	NULL or "strict": raise a ValueError
				647	"ignore": ignore the wrong characters (these are not copied to the
				648	output buffer)
				649	"replace": replaces illegal characters with '?'
				650
				651	Returns 0 on success, -1 on failure.
				652
				653	*/
				654
				655	extern DL_IMPORT(int) PyUnicode_EncodeDecimal(
				656	Py_UNICODE s, / Unicode buffer */
				657	int length, /* Number of Py_UNICODE chars to encode */
				658	char output, / Output buffer; must have size >= length */
				659	const char errors / error handling */
				660	);
				661
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	662	/* --- Methods & Slots ----------------------------------------------------
				663
				664	These are capable of handling Unicode objects and strings on input
				665	(we refer to them as strings in the descriptions) and return
				666	Unicode objects or integers as apporpriate. */
				667
				668	/* Concat two strings giving a new Unicode string. */
				669
				670	extern DL_IMPORT(PyObject*) PyUnicode_Concat(
				671	PyObject left, / Left string */
				672	PyObject right / Right string */
				673	);
				674
				675	/* Split a string giving a list of Unicode strings.
				676
				677	If sep is NULL, splitting will be done at all whitespace
				678	substrings. Otherwise, splits occur at the given separator.
				679
				680	At most maxsplit splits will be done. If negative, no limit is set.
				681
				682	Separators are not included in the resulting list.
				683
				684	*/
				685
				686	extern DL_IMPORT(PyObject*) PyUnicode_Split(
				687	PyObject s, / String to split */
				688	PyObject sep, / String separator */
				689	int maxsplit /* Maxsplit count */
				690	);
				691
				692	/* Dito, but split at line breaks.
				693
				694	CRLF is considered to be one line break. Line breaks are not
				695	included in the resulting list. */
				696
				697	extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
				698	PyObject s, / String to split */
Guido van Rossum	004d64f	2000-04-11 15:39:46 +0000	[diff] [blame]	699	int keepends /* If true, line end markers are included */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	700	);
				701
				702	/* Translate a string by applying a character mapping table to it and
				703	return the resulting Unicode object.
				704
				705	The mapping table must map Unicode ordinal integers to Unicode
				706	ordinal integers or None (causing deletion of the character).
				707
				708	Mapping tables may be dictionaries or sequences. Unmapped character
				709	ordinals (ones which cause a LookupError) are left untouched and
				710	are copied as-is.
				711
				712	*/
				713
				714	extern DL_IMPORT(PyObject *) PyUnicode_Translate(
				715	PyObject str, / String */
				716	PyObject table, / Translate table */
				717	const char errors / error handling */
				718	);
				719
				720	/* Join a sequence of strings using the given separator and return
				721	the resulting Unicode string. */
				722
				723	extern DL_IMPORT(PyObject*) PyUnicode_Join(
				724	PyObject separator, / Separator string */
				725	PyObject seq / Sequence object */
				726	);
				727
				728	/* Return 1 if substr matches str[start:end] at the given tail end, 0
				729	otherwise. */
				730
				731	extern DL_IMPORT(int) PyUnicode_Tailmatch(
				732	PyObject str, / String */
				733	PyObject substr, / Prefix or Suffix string */
				734	int start, /* Start index */
				735	int end, /* Stop index */
				736	int direction /* Tail end: -1 prefix, +1 suffix */
				737	);
				738
				739	/* Return the first position of substr in str[start:end] using the
				740	given search direction or -1 if not found. */
				741
				742	extern DL_IMPORT(int) PyUnicode_Find(
				743	PyObject str, / String */
				744	PyObject substr, / Substring to find */
				745	int start, /* Start index */
				746	int end, /* Stop index */
				747	int direction /* Find direction: +1 forward, -1 backward */
				748	);
				749
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	750	/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	751
				752	extern DL_IMPORT(int) PyUnicode_Count(
				753	PyObject str, / String */
				754	PyObject substr, / Substring to count */
				755	int start, /* Start index */
				756	int end /* Stop index */
				757	);
				758
Barry Warsaw	51ac580	2000-03-20 16:36:48 +0000	[diff] [blame]	759	/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	760	and return the resulting Unicode object. */
				761
				762	extern DL_IMPORT(PyObject *) PyUnicode_Replace(
				763	PyObject str, / String */
				764	PyObject substr, / Substring to find */
				765	PyObject replstr, / Substring to replace */
				766	int maxcount /* Max. number of replacements to apply;
				767	-1 = all */
				768	);
				769
				770	/* Compare two strings and return -1, 0, 1 for less than, equal,
				771	greater than resp. */
				772
				773	extern DL_IMPORT(int) PyUnicode_Compare(
				774	PyObject left, / Left string */
				775	PyObject right / Right string */
				776	);
				777
				778	/* Apply a argument tuple or dictionar to a format string and return
				779	the resulting Unicode string. */
				780
				781	extern DL_IMPORT(PyObject *) PyUnicode_Format(
				782	PyObject format, / Format string */
				783	PyObject args / Argument tuple or dictionary */
				784	);
				785
Guido van Rossum	d0d366b	2000-03-13 23:22:24 +0000	[diff] [blame]	786	/* Checks whether element is contained in container and return 1/0
				787	accordingly.
				788
				789	element has to coerce to an one element Unicode string. -1 is
				790	returned in case of an error. */
				791
				792	extern DL_IMPORT(int) PyUnicode_Contains(
				793	PyObject container, / Container string */
				794	PyObject element / Element string */
				795	);
				796
Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	797	/* === Characters Type APIs =============================================== */
				798
				799	/* These should not be used directly. Use the Py_UNICODE_IS* and
				800	Py_UNICODE_TO* macros instead.
				801
				802	These APIs are implemented in Objects/unicodectype.c.
				803
				804	*/
				805
				806	extern DL_IMPORT(int) _PyUnicode_IsLowercase(
				807	register const Py_UNICODE ch /* Unicode character */
				808	);
				809
				810	extern DL_IMPORT(int) _PyUnicode_IsUppercase(
				811	register const Py_UNICODE ch /* Unicode character */
				812	);
				813
				814	extern DL_IMPORT(int) _PyUnicode_IsTitlecase(
				815	register const Py_UNICODE ch /* Unicode character */
				816	);
				817
				818	extern DL_IMPORT(int) _PyUnicode_IsWhitespace(
				819	register const Py_UNICODE ch /* Unicode character */
				820	);
				821
				822	extern DL_IMPORT(int) _PyUnicode_IsLinebreak(
				823	register const Py_UNICODE ch /* Unicode character */
				824	);
				825
				826	extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToLowercase(
				827	register const Py_UNICODE ch /* Unicode character */
				828	);
				829
				830	extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToUppercase(
				831	register const Py_UNICODE ch /* Unicode character */
				832	);
				833
				834	extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToTitlecase(
				835	register const Py_UNICODE ch /* Unicode character */
				836	);
				837
				838	extern DL_IMPORT(int) _PyUnicode_ToDecimalDigit(
				839	register const Py_UNICODE ch /* Unicode character */
				840	);
				841
				842	extern DL_IMPORT(int) _PyUnicode_ToDigit(
				843	register const Py_UNICODE ch /* Unicode character */
				844	);
				845
				846	extern DL_IMPORT(double) _PyUnicode_ToNumeric(
				847	register const Py_UNICODE ch /* Unicode character */
				848	);
				849
				850	extern DL_IMPORT(int) _PyUnicode_IsDecimalDigit(
				851	register const Py_UNICODE ch /* Unicode character */
				852	);
				853
				854	extern DL_IMPORT(int) _PyUnicode_IsDigit(
				855	register const Py_UNICODE ch /* Unicode character */
				856	);
				857
				858	extern DL_IMPORT(int) _PyUnicode_IsNumeric(
				859	register const Py_UNICODE ch /* Unicode character */
				860	);
				861
				862	#ifdef __cplusplus
				863	}
				864	#endif
				865	#endif /* !Py_UNICODEOBJECT_H */