Blame - Include/unicodeobject.h - platform/external/python/cpython2

blob: 358c18af1ca548f34bc7e622fe32e246b0ad693f [file] [log] [blame]

Guido van Rossum	d822518	2000-03-10 22:33:05 +0000	[diff] [blame]	1	#ifndef Py_UNICODEOBJECT_H
				2	#define Py_UNICODEOBJECT_H
				3	#ifdef __cplusplus
				4	extern "C" {
				5	#endif
				6
				7	/*
				8
				9	Unicode implementation based on original code by Fredrik Lundh,
				10	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
				11	Unicode Integration Proposal (see file Misc/unicode.txt).
				12
				13	(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
				14
				15
				16	Original header:
				17	--------------------------------------------------------------------
				18
				19	* Yet another Unicode string type for Python. This type supports the
				20	* 16-bit Basic Multilingual Plane (BMP) only.
				21	*
				22	* Written by Fredrik Lundh, January 1999.
				23	*
				24	* Copyright (c) 1999 by Secret Labs AB.
				25	* Copyright (c) 1999 by Fredrik Lundh.
				26	*
				27	* fredrik@pythonware.com
				28	* http://www.pythonware.com
				29	*
				30	* --------------------------------------------------------------------
				31	* This Unicode String Type is
				32	*
				33	* Copyright (c) 1999 by Secret Labs AB
				34	* Copyright (c) 1999 by Fredrik Lundh
				35	*
				36	* By obtaining, using, and/or copying this software and/or its
				37	* associated documentation, you agree that you have read, understood,
				38	* and will comply with the following terms and conditions:
				39	*
				40	* Permission to use, copy, modify, and distribute this software and its
				41	* associated documentation for any purpose and without fee is hereby
				42	* granted, provided that the above copyright notice appears in all
				43	* copies, and that both that copyright notice and this permission notice
				44	* appear in supporting documentation, and that the name of Secret Labs
				45	* AB or the author not be used in advertising or publicity pertaining to
				46	* distribution of the software without specific, written prior
				47	* permission.
				48	*
				49	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
				50	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
				51	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
				52	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
				53	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
				54	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
				55	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
				56	* -------------------------------------------------------------------- */
				57
				58	#include "ctype.h"
				59
				60	/* === Internal API ======================================================= */
				61
				62	/* --- Internal Unicode Format -------------------------------------------- */
				63
				64	/* Set these flags if the platform has "wchar.h", "wctype.h" and the
				65	wchar_t type is a 16-bit unsigned type */
				66	/* #define HAVE_WCHAR_H */
				67	/* #define HAVE_USABLE_WCHAR_T */
				68
				69	/* Defaults for various platforms */
				70	#ifndef HAVE_USABLE_WCHAR_T
				71
				72	/* Windows has a usable wchar_t type */
				73	# if defined(MS_WIN32)
				74	# define HAVE_USABLE_WCHAR_T
				75	# endif
				76
				77	#endif
				78
				79	/* If the compiler provides a wchar_t type we try to support it
				80	through the interface functions PyUnicode_FromWideChar() and
				81	PyUnicode_AsWideChar(). */
				82
				83	#ifdef HAVE_USABLE_WCHAR_T
				84	# define HAVE_WCHAR_H
				85	#endif
				86
				87	#ifdef HAVE_WCHAR_H
				88	# include "wchar.h"
				89	#endif
				90
				91	#ifdef HAVE_USABLE_WCHAR_T
				92
				93	/* If the compiler defines whcar_t as a 16-bit unsigned type we can
				94	use the compiler type directly. Works fine with all modern Windows
				95	platforms. */
				96
				97	typedef wchar_t Py_UNICODE;
				98
				99	#else
				100
				101	/* Use if you have a standard ANSI compiler, without wchar_t support.
				102	If a short is not 16 bits on your platform, you have to fix the
				103	typedef below, or the module initialization code will complain. */
				104
				105	typedef unsigned short Py_UNICODE;
				106
				107	#endif
				108
				109	/* --- Internal Unicode Operations ---------------------------------------- */
				110
				111	/* If you want Python to use the compiler's wctype.h functions instead
				112	of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS.
				113	This reduces the interpreter's code size. */
				114
				115	#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
				116
				117	#include "wctype.h"
				118
				119	#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
				120
				121	#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
				122	#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
				123	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				124	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				125
				126	#define Py_UNICODE_TOLOWER(ch) towlower(ch)
				127	#define Py_UNICODE_TOUPPER(ch) towupper(ch)
				128	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				129
				130	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				131	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				132	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
				133
				134	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				135	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				136	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				137
				138	#else
				139
				140	#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
				141
				142	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
				143	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
				144	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
				145	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
				146
				147	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
				148	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
				149	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
				150
				151	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
				152	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
				153	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
				154
				155	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
				156	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
				157	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
				158
				159	#endif
				160
				161	#define Py_UNICODE_COPY(target, source, length)\
				162	(memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
				163
				164	#define Py_UNICODE_FILL(target, value, length) do\
				165	{int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
				166	while (0)
				167
				168	#define Py_UNICODE_MATCH(string, offset, substring)\
				169	(!memcmp((string)->str + (offset), (substring)->str,\
				170	(substring)->length*sizeof(Py_UNICODE)))
				171
				172	/* --- Unicode Type ------------------------------------------------------- */
				173
				174	typedef struct {
				175	PyObject_HEAD
				176	int length; /* Length of raw Unicode data in buffer */
				177	Py_UNICODE str; / Raw Unicode buffer */
				178	long hash; /* Hash value; -1 if not set */
				179	PyObject utf8str; / UTF-8 encoded version as Python string,
				180	or NULL */
				181	} PyUnicodeObject;
				182
				183	extern DL_IMPORT(PyTypeObject) PyUnicode_Type;
				184
				185	#define PyUnicode_Check(op) (((op)->ob_type == &PyUnicode_Type))
				186
				187	/* Fast access macros */
				188	#define PyUnicode_GET_SIZE(op) \
				189	(((PyUnicodeObject *)(op))->length)
				190	#define PyUnicode_GET_DATA_SIZE(op) \
				191	(((PyUnicodeObject )(op))->length sizeof(Py_UNICODE))
				192	#define PyUnicode_AS_UNICODE(op) \
				193	(((PyUnicodeObject *)(op))->str)
				194	#define PyUnicode_AS_DATA(op) \
				195	((const char )((PyUnicodeObject )(op))->str)
				196
				197	/* --- Constants ---------------------------------------------------------- */
				198
				199	/* This Unicode character will be used as replacement character during
				200	decoding if the errors argument is set to "replace". Note: the
				201	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
				202	Unicode 3.0. */
				203
				204	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
				205
				206	/* === Public API ========================================================= */
				207
				208	/* --- Plain Py_UNICODE --------------------------------------------------- */
				209
				210	/* Create a Unicode Object from the Py_UNICODE buffer u of the given
				211	size. u may be NULL which causes the contents to be undefined. It
				212	is the user's responsibility to fill in the needed data.
				213
				214	The buffer is copied into the new object. */
				215
				216	extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
				217	const Py_UNICODE u, / Unicode buffer */
				218	int size /* size of buffer */
				219	);
				220
				221	/* Return a read-only pointer to the Unicode object's internal
				222	Py_UNICODE buffer. */
				223
				224	extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
				225	PyObject unicode / Unicode object */
				226	);
				227
				228	/* Get the length of the Unicode object. */
				229
				230	extern DL_IMPORT(int) PyUnicode_GetSize(
				231	PyObject unicode / Unicode object */
				232	);
				233
				234	/* Coerce obj to an Unicode object and return a reference with
				235	incremented refcount.
				236
				237	Coercion is done in the following way:
				238
				239	1. Unicode objects are passed back as-is with incremented
				240	refcount.
				241
				242	2. String and other char buffer compatible objects are decoded
				243	under the assumptions that they contain UTF-8 data. Decoding
				244	is done in "strict" mode.
				245
				246	3. All other objects raise an exception.
				247
				248	The API returns NULL in case of an error. The caller is responsible
				249	for decref'ing the returned objects.
				250
				251	*/
				252
				253	extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
				254	register PyObject obj / Object */
				255	);
				256
				257	/* --- wchar_t support for platforms which support it --------------------- */
				258
				259	#ifdef HAVE_WCHAR_H
				260
				261	/* Create a Unicode Object from the whcar_t buffer w of the given
				262	size.
				263
				264	The buffer is copied into the new object. */
				265
				266	extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
				267	register const wchar_t w, / wchar_t buffer */
				268	int size /* size of buffer */
				269	);
				270
				271	/* Copies the Unicode Object contents into the whcar_t buffer w. At
				272	most size wchar_t characters are copied.
				273
				274	Returns the number of wchar_t characters copied or -1 in case of an
				275	error. */
				276
				277	extern DL_IMPORT(int) PyUnicode_AsWideChar(
				278	PyUnicodeObject unicode, / Unicode object */
				279	register wchar_t w, / wchar_t buffer */
				280	int size /* size of buffer */
				281	);
				282
				283	#endif
				284
				285	/* === Builtin Codecs =====================================================
				286
				287	Many of these APIs take two arguments encoding and errors. These
				288	parameters encoding and errors have the same semantics as the ones
				289	of the builtin unicode() API.
				290
				291	Setting encoding to NULL causes the default encoding to be used
				292	which is UTF-8.
				293
				294	Error handling is set by errors which may also be set to NULL
				295	meaning to use the default handling defined for the codec. Default
				296	error handling for all builtin codecs is "strict" (ValueErrors are
				297	raised).
				298
				299	The codecs all use a similar interface. Only deviation from the
				300	generic ones are documented.
				301
				302	*/
				303
				304	/* --- Generic Codecs ----------------------------------------------------- */
				305
				306	/* Create a Unicode object by decoding the encoded string s of the
				307	given size. */
				308
				309	extern DL_IMPORT(PyObject*) PyUnicode_Decode(
				310	const char s, / encoded string */
				311	int size, /* size of buffer */
				312	const char encoding, / encoding */
				313	const char errors / error handling */
				314	);
				315
				316	/* Encodes a Py_UNICODE buffer of the given size and returns a
				317	Python string object. */
				318
				319	extern DL_IMPORT(PyObject*) PyUnicode_Encode(
				320	const Py_UNICODE s, / Unicode char buffer */
				321	int size, /* number of Py_UNICODE chars to encode */
				322	const char encoding, / encoding */
				323	const char errors / error handling */
				324	);
				325
				326	/* Encodes a Unicode object and returns the result as Python string
				327	object. */
				328
				329	extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
				330	PyObject unicode, / Unicode object */
				331	const char encoding, / encoding */
				332	const char errors / error handling */
				333	);
				334
				335	/* --- UTF-8 Codecs ------------------------------------------------------- */
				336
				337	extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
				338	const char string, / UTF-8 encoded string */
				339	int length, /* size of string */
				340	const char errors / error handling */
				341	);
				342
				343	extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
				344	PyObject unicode / Unicode object */
				345	);
				346
				347	extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
				348	const Py_UNICODE data, / Unicode char buffer */
				349	int length, /* number of Py_UNICODE chars to encode */
				350	const char errors / error handling */
				351	);
				352
				353	/* --- UTF-16 Codecs ------------------------------------------------------ */
				354
				355	/* Decodes length bytes from a UTF-16 encoded buffer string and return
				356	the corresponding Unicode object.
				357
				358	errors (if non-NULL) defines the error handling. It defaults
				359	to "strict".
				360
				361	If byteorder is non-NULL, the decoder starts decoding using the
				362	given byte order:
				363
				364	*byteorder == -1: little endian
				365	*byteorder == 0: native order
				366	*byteorder == 1: big endian
				367
				368	and then switches according to all BOM marks it finds in the input
				369	data. BOM marks are not copied into the resulting Unicode string.
				370	After completion, *byteorder is set to the current byte order at
				371	the end of input data.
				372
				373	If byteorder is NULL, the codec starts in native order mode.
				374
				375	*/
				376
				377	extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
				378	const char string, / UTF-16 encoded string */
				379	int length, /* size of string */
				380	const char errors, / error handling */
				381	int byteorder / pointer to byteorder to use
				382	0=native;-1=LE,1=BE; updated on
				383	exit */
				384	);
				385
				386	/* Returns a Python string using the UTF-16 encoding in native byte
				387	order. The string always starts with a BOM mark. */
				388
				389	extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
				390	PyObject unicode / Unicode object */
				391	);
				392
				393	/* Returns a Python string object holding the UTF-16 encoded value of
				394	the Unicode data in s.
				395
				396	If byteorder is not 0, output is written according to the following
				397	byte order:
				398
				399	byteorder == -1: little endian
				400	byteorder == 0: native byte order (writes a BOM mark)
				401	byteorder == 1: big endian
				402
				403	If byteorder is 0, the output string will always start with the
				404	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				405	prepended.
				406
				407	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
				408	UCS-2. This trick makes it possible to add full UTF-16 capabilities
				409	at a later point without comprimising the APIs.
				410
				411	*/
				412
				413	extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
				414	const Py_UNICODE data, / Unicode char buffer */
				415	int length, /* number of Py_UNICODE chars to encode */
				416	const char errors, / error handling */
				417	int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
				418	);
				419
				420	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				421
				422	extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
				423	const char string, / Unicode-Escape encoded string */
				424	int length, /* size of string */
				425	const char errors / error handling */
				426	);
				427
				428	extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
				429	PyObject unicode / Unicode object */
				430	);
				431
				432	extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
				433	const Py_UNICODE data, / Unicode char buffer */
				434	int length /* Number of Py_UNICODE chars to encode */
				435	);
				436
				437	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				438
				439	extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
				440	const char string, / Raw-Unicode-Escape encoded string */
				441	int length, /* size of string */
				442	const char errors / error handling */
				443	);
				444
				445	extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
				446	PyObject unicode / Unicode object */
				447	);
				448
				449	extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
				450	const Py_UNICODE data, / Unicode char buffer */
				451	int length /* Number of Py_UNICODE chars to encode */
				452	);
				453
				454	/* --- Latin-1 Codecs -----------------------------------------------------
				455
				456	Note: Latin-1 corresponds to the first 256 Unicode ordinals.
				457
				458	*/
				459
				460	extern DL_IMPORT(PyObject*) PyUnicode_DecodeLatin1(
				461	const char string, / Latin-1 encoded string */
				462	int length, /* size of string */
				463	const char errors / error handling */
				464	);
				465
				466	extern DL_IMPORT(PyObject*) PyUnicode_AsLatin1String(
				467	PyObject unicode / Unicode object */
				468	);
				469
				470	extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
				471	const Py_UNICODE data, / Unicode char buffer */
				472	int length, /* Number of Py_UNICODE chars to encode */
				473	const char errors / error handling */
				474	);
				475
				476	/* --- ASCII Codecs -------------------------------------------------------
				477
				478	Only 7-bit ASCII data is excepted. All other codes generate errors.
				479
				480	*/
				481
				482	extern DL_IMPORT(PyObject*) PyUnicode_DecodeASCII(
				483	const char string, / ASCII encoded string */
				484	int length, /* size of string */
				485	const char errors / error handling */
				486	);
				487
				488	extern DL_IMPORT(PyObject*) PyUnicode_AsASCIIString(
				489	PyObject unicode / Unicode object */
				490	);
				491
				492	extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
				493	const Py_UNICODE data, / Unicode char buffer */
				494	int length, /* Number of Py_UNICODE chars to encode */
				495	const char errors / error handling */
				496	);
				497
				498	/* --- Character Map Codecs -----------------------------------------------
				499
				500	This codec uses mappings to encode and decode characters.
				501
				502	Decoding mappings must map single string characters to single
				503	Unicode characters, integers (which are then interpreted as Unicode
				504	ordinals) or None (meaning "undefined mapping" and causing an
				505	error).
				506
				507	Encoding mappings must map single Unicode characters to single
				508	string characters, integers (which are then interpreted as Latin-1
				509	ordinals) or None (meaning "undefined mapping" and causing an
				510	error).
				511
				512	If a character lookup fails with a LookupError, the character is
				513	copied as-is meaning that its ordinal value will be interpreted as
				514	Unicode or Latin-1 ordinal resp. Because of this mappings only need
				515	to contain those mappings which map characters to different code
				516	points.
				517
				518	*/
				519
				520	extern DL_IMPORT(PyObject*) PyUnicode_DecodeCharmap(
				521	const char string, / Encoded string */
				522	int length, /* size of string */
				523	PyObject mapping, / character mapping
				524	(char ordinal -> unicode ordinal) */
				525	const char errors / error handling */
				526	);
				527
				528	extern DL_IMPORT(PyObject*) PyUnicode_AsCharmapString(
				529	PyObject unicode, / Unicode object */
				530	PyObject mapping / character mapping
				531	(unicode ordinal -> char ordinal) */
				532	);
				533
				534	extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
				535	const Py_UNICODE data, / Unicode char buffer */
				536	int length, /* Number of Py_UNICODE chars to encode */
				537	PyObject mapping, / character mapping
				538	(unicode ordinal -> char ordinal) */
				539	const char errors / error handling */
				540	);
				541
				542	/* Translate a Py_UNICODE buffer of the given length by applying a
				543	character mapping table to it and return the resulting Unicode
				544	object.
				545
				546	The mapping table must map Unicode ordinal integers to Unicode
				547	ordinal integers or None (causing deletion of the character).
				548
				549	Mapping tables may be dictionaries or sequences. Unmapped character
				550	ordinals (ones which cause a LookupError) are left untouched and
				551	are copied as-is.
				552
				553	*/
				554
				555	extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
				556	const Py_UNICODE data, / Unicode char buffer */
				557	int length, /* Number of Py_UNICODE chars to encode */
				558	PyObject table, / Translate table */
				559	const char errors / error handling */
				560	);
				561
				562	/* --- Methods & Slots ----------------------------------------------------
				563
				564	These are capable of handling Unicode objects and strings on input
				565	(we refer to them as strings in the descriptions) and return
				566	Unicode objects or integers as apporpriate. */
				567
				568	/* Concat two strings giving a new Unicode string. */
				569
				570	extern DL_IMPORT(PyObject*) PyUnicode_Concat(
				571	PyObject left, / Left string */
				572	PyObject right / Right string */
				573	);
				574
				575	/* Split a string giving a list of Unicode strings.
				576
				577	If sep is NULL, splitting will be done at all whitespace
				578	substrings. Otherwise, splits occur at the given separator.
				579
				580	At most maxsplit splits will be done. If negative, no limit is set.
				581
				582	Separators are not included in the resulting list.
				583
				584	*/
				585
				586	extern DL_IMPORT(PyObject*) PyUnicode_Split(
				587	PyObject s, / String to split */
				588	PyObject sep, / String separator */
				589	int maxsplit /* Maxsplit count */
				590	);
				591
				592	/* Dito, but split at line breaks.
				593
				594	CRLF is considered to be one line break. Line breaks are not
				595	included in the resulting list. */
				596
				597	extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
				598	PyObject s, / String to split */
				599	int maxsplit /* Maxsplit count */
				600	);
				601
				602	/* Translate a string by applying a character mapping table to it and
				603	return the resulting Unicode object.
				604
				605	The mapping table must map Unicode ordinal integers to Unicode
				606	ordinal integers or None (causing deletion of the character).
				607
				608	Mapping tables may be dictionaries or sequences. Unmapped character
				609	ordinals (ones which cause a LookupError) are left untouched and
				610	are copied as-is.
				611
				612	*/
				613
				614	extern DL_IMPORT(PyObject *) PyUnicode_Translate(
				615	PyObject str, / String */
				616	PyObject table, / Translate table */
				617	const char errors / error handling */
				618	);
				619
				620	/* Join a sequence of strings using the given separator and return
				621	the resulting Unicode string. */
				622
				623	extern DL_IMPORT(PyObject*) PyUnicode_Join(
				624	PyObject separator, / Separator string */
				625	PyObject seq / Sequence object */
				626	);
				627
				628	/* Return 1 if substr matches str[start:end] at the given tail end, 0
				629	otherwise. */
				630
				631	extern DL_IMPORT(int) PyUnicode_Tailmatch(
				632	PyObject str, / String */
				633	PyObject substr, / Prefix or Suffix string */
				634	int start, /* Start index */
				635	int end, /* Stop index */
				636	int direction /* Tail end: -1 prefix, +1 suffix */
				637	);
				638
				639	/* Return the first position of substr in str[start:end] using the
				640	given search direction or -1 if not found. */
				641
				642	extern DL_IMPORT(int) PyUnicode_Find(
				643	PyObject str, / String */
				644	PyObject substr, / Substring to find */
				645	int start, /* Start index */
				646	int end, /* Stop index */
				647	int direction /* Find direction: +1 forward, -1 backward */
				648	);
				649
				650	/* Count the number of occurances of substr in str[start:end]. */
				651
				652	extern DL_IMPORT(int) PyUnicode_Count(
				653	PyObject str, / String */
				654	PyObject substr, / Substring to count */
				655	int start, /* Start index */
				656	int end /* Stop index */
				657	);
				658
				659	/* Replace at most maxcount occurances of substr in str with replstr
				660	and return the resulting Unicode object. */
				661
				662	extern DL_IMPORT(PyObject *) PyUnicode_Replace(
				663	PyObject str, / String */
				664	PyObject substr, / Substring to find */
				665	PyObject replstr, / Substring to replace */
				666	int maxcount /* Max. number of replacements to apply;
				667	-1 = all */
				668	);
				669
				670	/* Compare two strings and return -1, 0, 1 for less than, equal,
				671	greater than resp. */
				672
				673	extern DL_IMPORT(int) PyUnicode_Compare(
				674	PyObject left, / Left string */
				675	PyObject right / Right string */
				676	);
				677
				678	/* Apply a argument tuple or dictionar to a format string and return
				679	the resulting Unicode string. */
				680
				681	extern DL_IMPORT(PyObject *) PyUnicode_Format(
				682	PyObject format, / Format string */
				683	PyObject args / Argument tuple or dictionary */
				684	);
				685
				686	/* === Characters Type APIs =============================================== */
				687
				688	/* These should not be used directly. Use the Py_UNICODE_IS* and
				689	Py_UNICODE_TO* macros instead.
				690
				691	These APIs are implemented in Objects/unicodectype.c.
				692
				693	*/
				694
				695	extern DL_IMPORT(int) _PyUnicode_IsLowercase(
				696	register const Py_UNICODE ch /* Unicode character */
				697	);
				698
				699	extern DL_IMPORT(int) _PyUnicode_IsUppercase(
				700	register const Py_UNICODE ch /* Unicode character */
				701	);
				702
				703	extern DL_IMPORT(int) _PyUnicode_IsTitlecase(
				704	register const Py_UNICODE ch /* Unicode character */
				705	);
				706
				707	extern DL_IMPORT(int) _PyUnicode_IsWhitespace(
				708	register const Py_UNICODE ch /* Unicode character */
				709	);
				710
				711	extern DL_IMPORT(int) _PyUnicode_IsLinebreak(
				712	register const Py_UNICODE ch /* Unicode character */
				713	);
				714
				715	extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToLowercase(
				716	register const Py_UNICODE ch /* Unicode character */
				717	);
				718
				719	extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToUppercase(
				720	register const Py_UNICODE ch /* Unicode character */
				721	);
				722
				723	extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToTitlecase(
				724	register const Py_UNICODE ch /* Unicode character */
				725	);
				726
				727	extern DL_IMPORT(int) _PyUnicode_ToDecimalDigit(
				728	register const Py_UNICODE ch /* Unicode character */
				729	);
				730
				731	extern DL_IMPORT(int) _PyUnicode_ToDigit(
				732	register const Py_UNICODE ch /* Unicode character */
				733	);
				734
				735	extern DL_IMPORT(double) _PyUnicode_ToNumeric(
				736	register const Py_UNICODE ch /* Unicode character */
				737	);
				738
				739	extern DL_IMPORT(int) _PyUnicode_IsDecimalDigit(
				740	register const Py_UNICODE ch /* Unicode character */
				741	);
				742
				743	extern DL_IMPORT(int) _PyUnicode_IsDigit(
				744	register const Py_UNICODE ch /* Unicode character */
				745	);
				746
				747	extern DL_IMPORT(int) _PyUnicode_IsNumeric(
				748	register const Py_UNICODE ch /* Unicode character */
				749	);
				750
				751	#ifdef __cplusplus
				752	}
				753	#endif
				754	#endif /* !Py_UNICODEOBJECT_H */