blob: 025c8b765466ec1a8508e90073c2d1be4241f012 [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
4/*
5
6Unicode implementation based on original code by Fredrik Lundh,
7modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
8Unicode Integration Proposal (see file Misc/unicode.txt).
9
Guido van Rossum16b1ad92000-08-03 16:24:25 +000010Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumd8225182000-03-10 22:33:05 +000011
12
13 Original header:
14 --------------------------------------------------------------------
15
16 * Yet another Unicode string type for Python. This type supports the
17 * 16-bit Basic Multilingual Plane (BMP) only.
18 *
19 * Written by Fredrik Lundh, January 1999.
20 *
21 * Copyright (c) 1999 by Secret Labs AB.
22 * Copyright (c) 1999 by Fredrik Lundh.
23 *
24 * fredrik@pythonware.com
25 * http://www.pythonware.com
26 *
27 * --------------------------------------------------------------------
28 * This Unicode String Type is
29 *
30 * Copyright (c) 1999 by Secret Labs AB
31 * Copyright (c) 1999 by Fredrik Lundh
32 *
33 * By obtaining, using, and/or copying this software and/or its
34 * associated documentation, you agree that you have read, understood,
35 * and will comply with the following terms and conditions:
36 *
37 * Permission to use, copy, modify, and distribute this software and its
38 * associated documentation for any purpose and without fee is hereby
39 * granted, provided that the above copyright notice appears in all
40 * copies, and that both that copyright notice and this permission notice
41 * appear in supporting documentation, and that the name of Secret Labs
42 * AB or the author not be used in advertising or publicity pertaining to
43 * distribution of the software without specific, written prior
44 * permission.
45 *
46 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
47 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
48 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
49 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
50 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
51 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
52 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
53 * -------------------------------------------------------------------- */
54
55#include "ctype.h"
56
57/* === Internal API ======================================================= */
58
59/* --- Internal Unicode Format -------------------------------------------- */
60
Martin v. Löwis339d0f72001-08-17 18:39:25 +000061#ifndef Py_USING_UNICODE
62
63#define PyUnicode_Check(op) 0
64
65#else
66
Fredrik Lundh9b14ab32001-06-26 22:59:49 +000067/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
68 properly set, but the default rules below doesn't set it. I'll
69 sort this out some other day -- fredrik@pythonware.com */
70
71#ifndef Py_UNICODE_SIZE
72#error Must define Py_UNICODE_SIZE
73#endif
74
Fredrik Lundh8f455852001-06-27 18:59:43 +000075/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
76 strings are stored as UCS-2 (with limited support for UTF-16) */
77
78#if Py_UNICODE_SIZE >= 4
79#define Py_UNICODE_WIDE
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000080#endif
Fredrik Lundh1294ad02001-06-26 17:17:07 +000081
Guido van Rossumd8225182000-03-10 22:33:05 +000082/* Set these flags if the platform has "wchar.h", "wctype.h" and the
83 wchar_t type is a 16-bit unsigned type */
84/* #define HAVE_WCHAR_H */
85/* #define HAVE_USABLE_WCHAR_T */
86
87/* Defaults for various platforms */
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000088#ifndef PY_UNICODE_TYPE
Guido van Rossumd8225182000-03-10 22:33:05 +000089
Fredrik Lundh1294ad02001-06-26 17:17:07 +000090/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh8f455852001-06-27 18:59:43 +000091# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossumd8225182000-03-10 22:33:05 +000092# define HAVE_USABLE_WCHAR_T
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000093# define PY_UNICODE_TYPE wchar_t
94# endif
95
Fredrik Lundh8f455852001-06-27 18:59:43 +000096# if defined(Py_UNICODE_WIDE)
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000097# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossumd8225182000-03-10 22:33:05 +000098# endif
99
100#endif
101
102/* If the compiler provides a wchar_t type we try to support it
103 through the interface functions PyUnicode_FromWideChar() and
104 PyUnicode_AsWideChar(). */
105
106#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg1a731c62000-08-11 11:43:10 +0000107# ifndef HAVE_WCHAR_H
108# define HAVE_WCHAR_H
109# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000110#endif
111
112#ifdef HAVE_WCHAR_H
Guido van Rossum24bdb042000-03-28 20:29:59 +0000113/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
114# ifdef _HAVE_BSDI
115# include <time.h>
116# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000117# include "wchar.h"
118#endif
119
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000120/*
121 * Use this typedef when you need to represent a UTF-16 surrogate pair
122 * as single unsigned integer.
123 */
124#if SIZEOF_INT >= 4
125typedef unsigned int Py_UCS4;
126#elif SIZEOF_LONG >= 4
127typedef unsigned long Py_UCS4;
Guido van Rossumd8225182000-03-10 22:33:05 +0000128#endif
129
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000130typedef PY_UNICODE_TYPE Py_UNICODE;
Marc-André Lemburg43279102000-07-07 09:01:41 +0000131
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000132/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
133
134/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
135 produce different external names and thus cause import errors in
136 case Python interpreters and extensions with mixed compiled in
137 Unicode width assumptions are combined. */
138
139#ifndef Py_UNICODE_WIDE
140
141# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
142# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
143# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
144# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
145# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
146# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
147# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
148# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
149# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
150# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
151# define PyUnicode_Compare PyUnicodeUCS2_Compare
152# define PyUnicode_Concat PyUnicodeUCS2_Concat
153# define PyUnicode_Contains PyUnicodeUCS2_Contains
154# define PyUnicode_Count PyUnicodeUCS2_Count
155# define PyUnicode_Decode PyUnicodeUCS2_Decode
156# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
157# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
158# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
159# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
160# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
161# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
162# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
163# define PyUnicode_Encode PyUnicodeUCS2_Encode
164# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
165# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
166# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
167# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
168# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
169# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
170# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
171# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
172# define PyUnicode_Find PyUnicodeUCS2_Find
173# define PyUnicode_Format PyUnicodeUCS2_Format
174# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
175# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
176# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
177# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
178# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
179# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
180# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
181# define PyUnicode_Join PyUnicodeUCS2_Join
182# define PyUnicode_Replace PyUnicodeUCS2_Replace
183# define PyUnicode_Resize PyUnicodeUCS2_Resize
184# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
185# define PyUnicode_Split PyUnicodeUCS2_Split
186# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
187# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
188# define PyUnicode_Translate PyUnicodeUCS2_Translate
189# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
190# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
191# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
192# define _PyUnicode_Init _PyUnicodeUCS2_Init
193# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
194# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
195# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
196# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
197# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
198# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
199# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
200# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
201# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
202# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
203# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
204# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
205# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
206# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
207# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
208
209#else
210
211# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
212# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
213# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
214# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
215# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
216# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
217# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
218# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
219# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
220# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
221# define PyUnicode_Compare PyUnicodeUCS4_Compare
222# define PyUnicode_Concat PyUnicodeUCS4_Concat
223# define PyUnicode_Contains PyUnicodeUCS4_Contains
224# define PyUnicode_Count PyUnicodeUCS4_Count
225# define PyUnicode_Decode PyUnicodeUCS4_Decode
226# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
227# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
228# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
229# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
230# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
231# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
232# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
233# define PyUnicode_Encode PyUnicodeUCS4_Encode
234# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
235# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
236# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
237# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
238# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
239# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
240# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
241# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
242# define PyUnicode_Find PyUnicodeUCS4_Find
243# define PyUnicode_Format PyUnicodeUCS4_Format
244# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
245# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
246# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
247# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
248# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
249# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
250# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
251# define PyUnicode_Join PyUnicodeUCS4_Join
252# define PyUnicode_Replace PyUnicodeUCS4_Replace
253# define PyUnicode_Resize PyUnicodeUCS4_Resize
254# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
255# define PyUnicode_Split PyUnicodeUCS4_Split
256# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
257# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
258# define PyUnicode_Translate PyUnicodeUCS4_Translate
259# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
260# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
261# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
262# define _PyUnicode_Init _PyUnicodeUCS4_Init
263# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
264# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
265# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
266# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
267# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
268# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
269# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
270# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
271# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
272# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
273# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
274# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
275# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
276# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
277# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
278
279
280#endif
281
Guido van Rossumd8225182000-03-10 22:33:05 +0000282/* --- Internal Unicode Operations ---------------------------------------- */
283
284/* If you want Python to use the compiler's wctype.h functions instead
Barry Warsaw51ac5802000-03-20 16:36:48 +0000285 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
286 configure Python using --with-ctype-functions. This reduces the
287 interpreter's code size. */
Guido van Rossumd8225182000-03-10 22:33:05 +0000288
289#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
290
291#include "wctype.h"
292
293#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
294
295#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
296#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
297#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
298#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
299
300#define Py_UNICODE_TOLOWER(ch) towlower(ch)
301#define Py_UNICODE_TOUPPER(ch) towupper(ch)
302#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
303
304#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
305#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
306#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
307
308#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
309#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
310#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
311
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000312#define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
313
Guido van Rossumd8225182000-03-10 22:33:05 +0000314#else
315
316#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
317
318#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
319#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
320#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
321#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
322
323#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
324#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
325#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
326
327#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
328#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
329#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
330
331#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
332#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
333#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
334
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000335#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000336
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000337#endif
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000338
339#define Py_UNICODE_ISALNUM(ch) \
340 (Py_UNICODE_ISALPHA(ch) || \
341 Py_UNICODE_ISDECIMAL(ch) || \
342 Py_UNICODE_ISDIGIT(ch) || \
343 Py_UNICODE_ISNUMERIC(ch))
344
Guido van Rossumd8225182000-03-10 22:33:05 +0000345#define Py_UNICODE_COPY(target, source, length)\
346 (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
347
348#define Py_UNICODE_FILL(target, value, length) do\
349 {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
350 while (0)
351
352#define Py_UNICODE_MATCH(string, offset, substring)\
Marc-André Lemburg2f4d0e92000-06-18 22:22:27 +0000353 ((*((string)->str + (offset)) == *((substring)->str)) &&\
354 !memcmp((string)->str + (offset), (substring)->str,\
Guido van Rossumd8225182000-03-10 22:33:05 +0000355 (substring)->length*sizeof(Py_UNICODE)))
356
Barry Warsaw51ac5802000-03-20 16:36:48 +0000357#ifdef __cplusplus
358extern "C" {
359#endif
360
Guido van Rossumd8225182000-03-10 22:33:05 +0000361/* --- Unicode Type ------------------------------------------------------- */
362
363typedef struct {
364 PyObject_HEAD
365 int length; /* Length of raw Unicode data in buffer */
366 Py_UNICODE *str; /* Raw Unicode buffer */
367 long hash; /* Hash value; -1 if not set */
Marc-André Lemburgbff879c2000-08-03 18:46:08 +0000368 PyObject *defenc; /* (Default) Encoded version as Python
369 string, or NULL; this is used for
370 implementing the buffer protocol */
Guido van Rossumd8225182000-03-10 22:33:05 +0000371} PyUnicodeObject;
372
373extern DL_IMPORT(PyTypeObject) PyUnicode_Type;
374
375#define PyUnicode_Check(op) (((op)->ob_type == &PyUnicode_Type))
376
377/* Fast access macros */
378#define PyUnicode_GET_SIZE(op) \
379 (((PyUnicodeObject *)(op))->length)
380#define PyUnicode_GET_DATA_SIZE(op) \
381 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
382#define PyUnicode_AS_UNICODE(op) \
383 (((PyUnicodeObject *)(op))->str)
384#define PyUnicode_AS_DATA(op) \
385 ((const char *)((PyUnicodeObject *)(op))->str)
386
387/* --- Constants ---------------------------------------------------------- */
388
389/* This Unicode character will be used as replacement character during
390 decoding if the errors argument is set to "replace". Note: the
391 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
392 Unicode 3.0. */
393
394#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
395
396/* === Public API ========================================================= */
397
398/* --- Plain Py_UNICODE --------------------------------------------------- */
399
400/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Marc-André Lemburg8155e0e2001-04-23 14:44:21 +0000401 size.
402
403 u may be NULL which causes the contents to be undefined. It is the
404 user's responsibility to fill in the needed data afterwards. Note
405 that modifying the Unicode object contents after construction is
406 only allowed if u was set to NULL.
Guido van Rossumd8225182000-03-10 22:33:05 +0000407
408 The buffer is copied into the new object. */
409
410extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
411 const Py_UNICODE *u, /* Unicode buffer */
412 int size /* size of buffer */
413 );
414
415/* Return a read-only pointer to the Unicode object's internal
416 Py_UNICODE buffer. */
417
418extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
419 PyObject *unicode /* Unicode object */
420 );
421
422/* Get the length of the Unicode object. */
423
424extern DL_IMPORT(int) PyUnicode_GetSize(
425 PyObject *unicode /* Unicode object */
426 );
427
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000428/* Get the maximum ordinal for a Unicode character. */
429extern DL_IMPORT(Py_UNICODE) PyUnicode_GetMax(void);
430
Guido van Rossum52c23592000-04-10 13:41:41 +0000431/* Resize an already allocated Unicode object to the new size length.
432
433 *unicode is modified to point to the new (resized) object and 0
434 returned on success.
435
436 This API may only be called by the function which also called the
437 Unicode constructor. The refcount on the object must be 1. Otherwise,
438 an error is returned.
439
440 Error handling is implemented as follows: an exception is set, -1
441 is returned and *unicode left untouched.
442
443*/
444
445extern DL_IMPORT(int) PyUnicode_Resize(
446 PyObject **unicode, /* Pointer to the Unicode object */
447 int length /* New length */
448 );
449
Guido van Rossumd8225182000-03-10 22:33:05 +0000450/* Coerce obj to an Unicode object and return a reference with
451 *incremented* refcount.
452
453 Coercion is done in the following way:
454
455 1. Unicode objects are passed back as-is with incremented
456 refcount.
457
458 2. String and other char buffer compatible objects are decoded
Fred Drakecb093fe2000-05-09 19:51:53 +0000459 under the assumptions that they contain data using the current
460 default encoding. Decoding is done in "strict" mode.
Guido van Rossumd8225182000-03-10 22:33:05 +0000461
462 3. All other objects raise an exception.
463
464 The API returns NULL in case of an error. The caller is responsible
465 for decref'ing the returned objects.
466
467*/
468
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000469extern DL_IMPORT(PyObject*) PyUnicode_FromEncodedObject(
470 register PyObject *obj, /* Object */
471 const char *encoding, /* encoding */
472 const char *errors /* error handling */
473 );
474
475/* Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict");
476 which results in using the default encoding as basis for
477 decoding the object.
478
479 Coerces obj to an Unicode object and return a reference with
480 *incremented* refcount.
481
482 The API returns NULL in case of an error. The caller is responsible
483 for decref'ing the returned objects.
484
485*/
486
Guido van Rossumd8225182000-03-10 22:33:05 +0000487extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
488 register PyObject *obj /* Object */
489 );
490
491/* --- wchar_t support for platforms which support it --------------------- */
492
493#ifdef HAVE_WCHAR_H
494
495/* Create a Unicode Object from the whcar_t buffer w of the given
496 size.
497
498 The buffer is copied into the new object. */
499
500extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
501 register const wchar_t *w, /* wchar_t buffer */
502 int size /* size of buffer */
503 );
504
505/* Copies the Unicode Object contents into the whcar_t buffer w. At
506 most size wchar_t characters are copied.
507
508 Returns the number of wchar_t characters copied or -1 in case of an
509 error. */
510
511extern DL_IMPORT(int) PyUnicode_AsWideChar(
512 PyUnicodeObject *unicode, /* Unicode object */
513 register wchar_t *w, /* wchar_t buffer */
514 int size /* size of buffer */
515 );
516
517#endif
518
519/* === Builtin Codecs =====================================================
520
521 Many of these APIs take two arguments encoding and errors. These
522 parameters encoding and errors have the same semantics as the ones
523 of the builtin unicode() API.
524
Fred Drakecb093fe2000-05-09 19:51:53 +0000525 Setting encoding to NULL causes the default encoding to be used.
Guido van Rossumd8225182000-03-10 22:33:05 +0000526
527 Error handling is set by errors which may also be set to NULL
528 meaning to use the default handling defined for the codec. Default
529 error handling for all builtin codecs is "strict" (ValueErrors are
530 raised).
531
532 The codecs all use a similar interface. Only deviation from the
533 generic ones are documented.
534
535*/
536
Fred Drakecb093fe2000-05-09 19:51:53 +0000537/* --- Manage the default encoding ---------------------------------------- */
538
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000539/* Return a Python string holding the default encoded value of the
540 Unicode object.
541
542 The resulting string is cached in the Unicode object for subsequent
543 usage by this function. The cached version is needed to implement
544 the character buffer interface and will live (at least) as long as
545 the Unicode object itself.
546
547 The refcount of the string is *not* incremented.
548
549 *** Exported for internal use by the interpreter only !!! ***
550
551*/
552
553extern DL_IMPORT(PyObject *) _PyUnicode_AsDefaultEncodedString(
554 PyObject *, const char *);
555
Fred Drakecb093fe2000-05-09 19:51:53 +0000556/* Returns the currently active default encoding.
557
558 The default encoding is currently implemented as run-time settable
559 process global. This may change in future versions of the
560 interpreter to become a parameter which is managed on a per-thread
561 basis.
562
563 */
564
Thomas Wouters5f375912000-07-22 23:30:03 +0000565extern DL_IMPORT(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drakecb093fe2000-05-09 19:51:53 +0000566
567/* Sets the currently active default encoding.
568
569 Returns 0 on success, -1 in case of an error.
570
571 */
572
573extern DL_IMPORT(int) PyUnicode_SetDefaultEncoding(
574 const char *encoding /* Encoding name in standard form */
575 );
576
Guido van Rossumd8225182000-03-10 22:33:05 +0000577/* --- Generic Codecs ----------------------------------------------------- */
578
579/* Create a Unicode object by decoding the encoded string s of the
580 given size. */
581
582extern DL_IMPORT(PyObject*) PyUnicode_Decode(
583 const char *s, /* encoded string */
584 int size, /* size of buffer */
585 const char *encoding, /* encoding */
586 const char *errors /* error handling */
587 );
588
589/* Encodes a Py_UNICODE buffer of the given size and returns a
590 Python string object. */
591
592extern DL_IMPORT(PyObject*) PyUnicode_Encode(
593 const Py_UNICODE *s, /* Unicode char buffer */
594 int size, /* number of Py_UNICODE chars to encode */
595 const char *encoding, /* encoding */
596 const char *errors /* error handling */
597 );
598
599/* Encodes a Unicode object and returns the result as Python string
600 object. */
601
602extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
603 PyObject *unicode, /* Unicode object */
604 const char *encoding, /* encoding */
605 const char *errors /* error handling */
606 );
607
608/* --- UTF-8 Codecs ------------------------------------------------------- */
609
610extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
611 const char *string, /* UTF-8 encoded string */
612 int length, /* size of string */
613 const char *errors /* error handling */
614 );
615
616extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
617 PyObject *unicode /* Unicode object */
618 );
619
620extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
621 const Py_UNICODE *data, /* Unicode char buffer */
622 int length, /* number of Py_UNICODE chars to encode */
623 const char *errors /* error handling */
624 );
625
626/* --- UTF-16 Codecs ------------------------------------------------------ */
627
Guido van Rossum9e896b32000-04-05 20:11:21 +0000628/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossumd8225182000-03-10 22:33:05 +0000629 the corresponding Unicode object.
630
631 errors (if non-NULL) defines the error handling. It defaults
632 to "strict".
633
634 If byteorder is non-NULL, the decoder starts decoding using the
635 given byte order:
636
637 *byteorder == -1: little endian
638 *byteorder == 0: native order
639 *byteorder == 1: big endian
640
Marc-André Lemburg489b56e2001-05-21 20:30:15 +0000641 In native mode, the first two bytes of the stream are checked for a
642 BOM mark. If found, the BOM mark is analysed, the byte order
643 adjusted and the BOM skipped. In the other modes, no BOM mark
644 interpretation is done. After completion, *byteorder is set to the
645 current byte order at the end of input data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000646
647 If byteorder is NULL, the codec starts in native order mode.
648
649*/
650
651extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
652 const char *string, /* UTF-16 encoded string */
653 int length, /* size of string */
654 const char *errors, /* error handling */
655 int *byteorder /* pointer to byteorder to use
656 0=native;-1=LE,1=BE; updated on
657 exit */
658 );
659
660/* Returns a Python string using the UTF-16 encoding in native byte
661 order. The string always starts with a BOM mark. */
662
663extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
664 PyObject *unicode /* Unicode object */
665 );
666
667/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum9e896b32000-04-05 20:11:21 +0000668 the Unicode data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000669
670 If byteorder is not 0, output is written according to the following
671 byte order:
672
673 byteorder == -1: little endian
674 byteorder == 0: native byte order (writes a BOM mark)
675 byteorder == 1: big endian
676
677 If byteorder is 0, the output string will always start with the
678 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
679 prepended.
680
681 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
682 UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters7e474022000-07-16 12:04:32 +0000683 at a later point without compromising the APIs.
Guido van Rossumd8225182000-03-10 22:33:05 +0000684
685*/
686
687extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
688 const Py_UNICODE *data, /* Unicode char buffer */
689 int length, /* number of Py_UNICODE chars to encode */
690 const char *errors, /* error handling */
691 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
692 );
693
694/* --- Unicode-Escape Codecs ---------------------------------------------- */
695
696extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
697 const char *string, /* Unicode-Escape encoded string */
698 int length, /* size of string */
699 const char *errors /* error handling */
700 );
701
702extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
703 PyObject *unicode /* Unicode object */
704 );
705
706extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
707 const Py_UNICODE *data, /* Unicode char buffer */
708 int length /* Number of Py_UNICODE chars to encode */
709 );
710
711/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
712
713extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
714 const char *string, /* Raw-Unicode-Escape encoded string */
715 int length, /* size of string */
716 const char *errors /* error handling */
717 );
718
719extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
720 PyObject *unicode /* Unicode object */
721 );
722
723extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
724 const Py_UNICODE *data, /* Unicode char buffer */
725 int length /* Number of Py_UNICODE chars to encode */
726 );
727
728/* --- Latin-1 Codecs -----------------------------------------------------
729
730 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
731
732*/
733
734extern DL_IMPORT(PyObject*) PyUnicode_DecodeLatin1(
735 const char *string, /* Latin-1 encoded string */
736 int length, /* size of string */
737 const char *errors /* error handling */
738 );
739
740extern DL_IMPORT(PyObject*) PyUnicode_AsLatin1String(
741 PyObject *unicode /* Unicode object */
742 );
743
744extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
745 const Py_UNICODE *data, /* Unicode char buffer */
746 int length, /* Number of Py_UNICODE chars to encode */
747 const char *errors /* error handling */
748 );
749
750/* --- ASCII Codecs -------------------------------------------------------
751
752 Only 7-bit ASCII data is excepted. All other codes generate errors.
753
754*/
755
756extern DL_IMPORT(PyObject*) PyUnicode_DecodeASCII(
757 const char *string, /* ASCII encoded string */
758 int length, /* size of string */
759 const char *errors /* error handling */
760 );
761
762extern DL_IMPORT(PyObject*) PyUnicode_AsASCIIString(
763 PyObject *unicode /* Unicode object */
764 );
765
766extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
767 const Py_UNICODE *data, /* Unicode char buffer */
768 int length, /* Number of Py_UNICODE chars to encode */
769 const char *errors /* error handling */
770 );
771
772/* --- Character Map Codecs -----------------------------------------------
773
774 This codec uses mappings to encode and decode characters.
775
776 Decoding mappings must map single string characters to single
777 Unicode characters, integers (which are then interpreted as Unicode
778 ordinals) or None (meaning "undefined mapping" and causing an
779 error).
780
781 Encoding mappings must map single Unicode characters to single
782 string characters, integers (which are then interpreted as Latin-1
783 ordinals) or None (meaning "undefined mapping" and causing an
784 error).
785
786 If a character lookup fails with a LookupError, the character is
787 copied as-is meaning that its ordinal value will be interpreted as
788 Unicode or Latin-1 ordinal resp. Because of this mappings only need
789 to contain those mappings which map characters to different code
790 points.
791
792*/
793
794extern DL_IMPORT(PyObject*) PyUnicode_DecodeCharmap(
795 const char *string, /* Encoded string */
796 int length, /* size of string */
797 PyObject *mapping, /* character mapping
798 (char ordinal -> unicode ordinal) */
799 const char *errors /* error handling */
800 );
801
802extern DL_IMPORT(PyObject*) PyUnicode_AsCharmapString(
803 PyObject *unicode, /* Unicode object */
804 PyObject *mapping /* character mapping
805 (unicode ordinal -> char ordinal) */
806 );
807
808extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
809 const Py_UNICODE *data, /* Unicode char buffer */
810 int length, /* Number of Py_UNICODE chars to encode */
811 PyObject *mapping, /* character mapping
812 (unicode ordinal -> char ordinal) */
813 const char *errors /* error handling */
814 );
815
816/* Translate a Py_UNICODE buffer of the given length by applying a
817 character mapping table to it and return the resulting Unicode
818 object.
819
820 The mapping table must map Unicode ordinal integers to Unicode
821 ordinal integers or None (causing deletion of the character).
822
823 Mapping tables may be dictionaries or sequences. Unmapped character
824 ordinals (ones which cause a LookupError) are left untouched and
825 are copied as-is.
826
827*/
828
829extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
830 const Py_UNICODE *data, /* Unicode char buffer */
831 int length, /* Number of Py_UNICODE chars to encode */
832 PyObject *table, /* Translate table */
833 const char *errors /* error handling */
834 );
835
Guido van Rossumefec1152000-03-28 02:01:15 +0000836#ifdef MS_WIN32
Guido van Rossum24bdb042000-03-28 20:29:59 +0000837
Guido van Rossumefec1152000-03-28 02:01:15 +0000838/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000839
Guido van Rossumefec1152000-03-28 02:01:15 +0000840extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
841 const char *string, /* MBCS encoded string */
842 int length, /* size of string */
843 const char *errors /* error handling */
844 );
845
846extern DL_IMPORT(PyObject*) PyUnicode_AsMBCSString(
847 PyObject *unicode /* Unicode object */
848 );
849
850extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
851 const Py_UNICODE *data, /* Unicode char buffer */
852 int length, /* Number of Py_UNICODE chars to encode */
853 const char *errors /* error handling */
854 );
855
Guido van Rossumefec1152000-03-28 02:01:15 +0000856#endif /* MS_WIN32 */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000857
Guido van Rossum9e896b32000-04-05 20:11:21 +0000858/* --- Decimal Encoder ---------------------------------------------------- */
859
860/* Takes a Unicode string holding a decimal value and writes it into
861 an output buffer using standard ASCII digit codes.
862
863 The output buffer has to provide at least length+1 bytes of storage
864 area. The output string is 0-terminated.
865
866 The encoder converts whitespace to ' ', decimal characters to their
867 corresponding ASCII digit and all other Latin-1 characters except
868 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
869 are treated as errors. This includes embedded NULL bytes.
870
871 Error handling is defined by the errors argument:
872
873 NULL or "strict": raise a ValueError
874 "ignore": ignore the wrong characters (these are not copied to the
875 output buffer)
876 "replace": replaces illegal characters with '?'
877
878 Returns 0 on success, -1 on failure.
879
880*/
881
882extern DL_IMPORT(int) PyUnicode_EncodeDecimal(
883 Py_UNICODE *s, /* Unicode buffer */
884 int length, /* Number of Py_UNICODE chars to encode */
885 char *output, /* Output buffer; must have size >= length */
886 const char *errors /* error handling */
887 );
888
Guido van Rossumd8225182000-03-10 22:33:05 +0000889/* --- Methods & Slots ----------------------------------------------------
890
891 These are capable of handling Unicode objects and strings on input
892 (we refer to them as strings in the descriptions) and return
893 Unicode objects or integers as apporpriate. */
894
895/* Concat two strings giving a new Unicode string. */
896
897extern DL_IMPORT(PyObject*) PyUnicode_Concat(
898 PyObject *left, /* Left string */
899 PyObject *right /* Right string */
900 );
901
902/* Split a string giving a list of Unicode strings.
903
904 If sep is NULL, splitting will be done at all whitespace
905 substrings. Otherwise, splits occur at the given separator.
906
907 At most maxsplit splits will be done. If negative, no limit is set.
908
909 Separators are not included in the resulting list.
910
911*/
912
913extern DL_IMPORT(PyObject*) PyUnicode_Split(
914 PyObject *s, /* String to split */
915 PyObject *sep, /* String separator */
916 int maxsplit /* Maxsplit count */
917 );
918
919/* Dito, but split at line breaks.
920
921 CRLF is considered to be one line break. Line breaks are not
922 included in the resulting list. */
923
924extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
925 PyObject *s, /* String to split */
Guido van Rossum004d64f2000-04-11 15:39:46 +0000926 int keepends /* If true, line end markers are included */
Guido van Rossumd8225182000-03-10 22:33:05 +0000927 );
928
929/* Translate a string by applying a character mapping table to it and
930 return the resulting Unicode object.
931
932 The mapping table must map Unicode ordinal integers to Unicode
933 ordinal integers or None (causing deletion of the character).
934
935 Mapping tables may be dictionaries or sequences. Unmapped character
936 ordinals (ones which cause a LookupError) are left untouched and
937 are copied as-is.
938
939*/
940
941extern DL_IMPORT(PyObject *) PyUnicode_Translate(
942 PyObject *str, /* String */
943 PyObject *table, /* Translate table */
944 const char *errors /* error handling */
945 );
946
947/* Join a sequence of strings using the given separator and return
948 the resulting Unicode string. */
949
950extern DL_IMPORT(PyObject*) PyUnicode_Join(
951 PyObject *separator, /* Separator string */
952 PyObject *seq /* Sequence object */
953 );
954
955/* Return 1 if substr matches str[start:end] at the given tail end, 0
956 otherwise. */
957
958extern DL_IMPORT(int) PyUnicode_Tailmatch(
959 PyObject *str, /* String */
960 PyObject *substr, /* Prefix or Suffix string */
961 int start, /* Start index */
962 int end, /* Stop index */
963 int direction /* Tail end: -1 prefix, +1 suffix */
964 );
965
966/* Return the first position of substr in str[start:end] using the
967 given search direction or -1 if not found. */
968
969extern DL_IMPORT(int) PyUnicode_Find(
970 PyObject *str, /* String */
971 PyObject *substr, /* Substring to find */
972 int start, /* Start index */
973 int end, /* Stop index */
974 int direction /* Find direction: +1 forward, -1 backward */
975 );
976
Barry Warsaw51ac5802000-03-20 16:36:48 +0000977/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +0000978
979extern DL_IMPORT(int) PyUnicode_Count(
980 PyObject *str, /* String */
981 PyObject *substr, /* Substring to count */
982 int start, /* Start index */
983 int end /* Stop index */
984 );
985
Barry Warsaw51ac5802000-03-20 16:36:48 +0000986/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +0000987 and return the resulting Unicode object. */
988
989extern DL_IMPORT(PyObject *) PyUnicode_Replace(
990 PyObject *str, /* String */
991 PyObject *substr, /* Substring to find */
992 PyObject *replstr, /* Substring to replace */
993 int maxcount /* Max. number of replacements to apply;
994 -1 = all */
995 );
996
997/* Compare two strings and return -1, 0, 1 for less than, equal,
998 greater than resp. */
999
1000extern DL_IMPORT(int) PyUnicode_Compare(
1001 PyObject *left, /* Left string */
1002 PyObject *right /* Right string */
1003 );
1004
Thomas Wouters7e474022000-07-16 12:04:32 +00001005/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossumd8225182000-03-10 22:33:05 +00001006 the resulting Unicode string. */
1007
1008extern DL_IMPORT(PyObject *) PyUnicode_Format(
1009 PyObject *format, /* Format string */
1010 PyObject *args /* Argument tuple or dictionary */
1011 );
1012
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001013/* Checks whether element is contained in container and return 1/0
1014 accordingly.
1015
1016 element has to coerce to an one element Unicode string. -1 is
1017 returned in case of an error. */
1018
1019extern DL_IMPORT(int) PyUnicode_Contains(
1020 PyObject *container, /* Container string */
1021 PyObject *element /* Element string */
1022 );
1023
Guido van Rossumd8225182000-03-10 22:33:05 +00001024/* === Characters Type APIs =============================================== */
1025
1026/* These should not be used directly. Use the Py_UNICODE_IS* and
1027 Py_UNICODE_TO* macros instead.
1028
1029 These APIs are implemented in Objects/unicodectype.c.
1030
1031*/
1032
1033extern DL_IMPORT(int) _PyUnicode_IsLowercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001034 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001035 );
1036
1037extern DL_IMPORT(int) _PyUnicode_IsUppercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001038 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001039 );
1040
1041extern DL_IMPORT(int) _PyUnicode_IsTitlecase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001042 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001043 );
1044
1045extern DL_IMPORT(int) _PyUnicode_IsWhitespace(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001046 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001047 );
1048
1049extern DL_IMPORT(int) _PyUnicode_IsLinebreak(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001050 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001051 );
1052
1053extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToLowercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001054 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001055 );
1056
1057extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToUppercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001058 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001059 );
1060
1061extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToTitlecase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001062 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001063 );
1064
1065extern DL_IMPORT(int) _PyUnicode_ToDecimalDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001066 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001067 );
1068
1069extern DL_IMPORT(int) _PyUnicode_ToDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001070 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001071 );
1072
1073extern DL_IMPORT(double) _PyUnicode_ToNumeric(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001074 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001075 );
1076
1077extern DL_IMPORT(int) _PyUnicode_IsDecimalDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001078 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001079 );
1080
1081extern DL_IMPORT(int) _PyUnicode_IsDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001082 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001083 );
1084
1085extern DL_IMPORT(int) _PyUnicode_IsNumeric(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001086 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001087 );
1088
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001089extern DL_IMPORT(int) _PyUnicode_IsAlpha(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001090 Py_UNICODE ch /* Unicode character */
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001091 );
1092
Guido van Rossumd8225182000-03-10 22:33:05 +00001093#ifdef __cplusplus
1094}
1095#endif
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001096#endif /* Py_USING_UNICODE */
Guido van Rossumd8225182000-03-10 22:33:05 +00001097#endif /* !Py_UNICODEOBJECT_H */