blob: a7e50c3ce86a3aa8130ffcd29da2bdce1303e232 [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
4/*
5
6Unicode implementation based on original code by Fredrik Lundh,
7modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
8Unicode Integration Proposal (see file Misc/unicode.txt).
9
Guido van Rossum16b1ad92000-08-03 16:24:25 +000010Copyright (c) Corporation for National Research Initiatives.
Guido van Rossumd8225182000-03-10 22:33:05 +000011
12
13 Original header:
14 --------------------------------------------------------------------
15
16 * Yet another Unicode string type for Python. This type supports the
17 * 16-bit Basic Multilingual Plane (BMP) only.
18 *
19 * Written by Fredrik Lundh, January 1999.
20 *
21 * Copyright (c) 1999 by Secret Labs AB.
22 * Copyright (c) 1999 by Fredrik Lundh.
23 *
24 * fredrik@pythonware.com
25 * http://www.pythonware.com
26 *
27 * --------------------------------------------------------------------
28 * This Unicode String Type is
29 *
30 * Copyright (c) 1999 by Secret Labs AB
31 * Copyright (c) 1999 by Fredrik Lundh
32 *
33 * By obtaining, using, and/or copying this software and/or its
34 * associated documentation, you agree that you have read, understood,
35 * and will comply with the following terms and conditions:
36 *
37 * Permission to use, copy, modify, and distribute this software and its
38 * associated documentation for any purpose and without fee is hereby
39 * granted, provided that the above copyright notice appears in all
40 * copies, and that both that copyright notice and this permission notice
41 * appear in supporting documentation, and that the name of Secret Labs
42 * AB or the author not be used in advertising or publicity pertaining to
43 * distribution of the software without specific, written prior
44 * permission.
45 *
46 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
47 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
48 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
49 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
50 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
51 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
52 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
53 * -------------------------------------------------------------------- */
54
55#include "ctype.h"
56
57/* === Internal API ======================================================= */
58
59/* --- Internal Unicode Format -------------------------------------------- */
60
Fredrik Lundh9b14ab32001-06-26 22:59:49 +000061/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
62 properly set, but the default rules below doesn't set it. I'll
63 sort this out some other day -- fredrik@pythonware.com */
64
65#ifndef Py_UNICODE_SIZE
66#error Must define Py_UNICODE_SIZE
67#endif
68
Fredrik Lundh8f455852001-06-27 18:59:43 +000069/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
70 strings are stored as UCS-2 (with limited support for UTF-16) */
71
72#if Py_UNICODE_SIZE >= 4
73#define Py_UNICODE_WIDE
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000074#endif
Fredrik Lundh1294ad02001-06-26 17:17:07 +000075
Guido van Rossumd8225182000-03-10 22:33:05 +000076/* Set these flags if the platform has "wchar.h", "wctype.h" and the
77 wchar_t type is a 16-bit unsigned type */
78/* #define HAVE_WCHAR_H */
79/* #define HAVE_USABLE_WCHAR_T */
80
81/* Defaults for various platforms */
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000082#ifndef PY_UNICODE_TYPE
Guido van Rossumd8225182000-03-10 22:33:05 +000083
Fredrik Lundh1294ad02001-06-26 17:17:07 +000084/* Windows has a usable wchar_t type (unless we're using UCS-4) */
Fredrik Lundh8f455852001-06-27 18:59:43 +000085# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
Guido van Rossumd8225182000-03-10 22:33:05 +000086# define HAVE_USABLE_WCHAR_T
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000087# define PY_UNICODE_TYPE wchar_t
88# endif
89
Fredrik Lundh8f455852001-06-27 18:59:43 +000090# if defined(Py_UNICODE_WIDE)
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +000091# define PY_UNICODE_TYPE Py_UCS4
Guido van Rossumd8225182000-03-10 22:33:05 +000092# endif
93
94#endif
95
96/* If the compiler provides a wchar_t type we try to support it
97 through the interface functions PyUnicode_FromWideChar() and
98 PyUnicode_AsWideChar(). */
99
100#ifdef HAVE_USABLE_WCHAR_T
Marc-André Lemburg1a731c62000-08-11 11:43:10 +0000101# ifndef HAVE_WCHAR_H
102# define HAVE_WCHAR_H
103# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000104#endif
105
106#ifdef HAVE_WCHAR_H
Guido van Rossum24bdb042000-03-28 20:29:59 +0000107/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
108# ifdef _HAVE_BSDI
109# include <time.h>
110# endif
Guido van Rossumd8225182000-03-10 22:33:05 +0000111# include "wchar.h"
112#endif
113
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000114/*
115 * Use this typedef when you need to represent a UTF-16 surrogate pair
116 * as single unsigned integer.
117 */
118#if SIZEOF_INT >= 4
119typedef unsigned int Py_UCS4;
120#elif SIZEOF_LONG >= 4
121typedef unsigned long Py_UCS4;
Guido van Rossumd8225182000-03-10 22:33:05 +0000122#endif
123
Martin v. Löwis0ba70cc2001-06-26 22:22:37 +0000124typedef PY_UNICODE_TYPE Py_UNICODE;
Marc-André Lemburg43279102000-07-07 09:01:41 +0000125
Marc-André Lemburgb5ac6f62001-07-31 14:30:16 +0000126/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
127
128/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
129 produce different external names and thus cause import errors in
130 case Python interpreters and extensions with mixed compiled in
131 Unicode width assumptions are combined. */
132
133#ifndef Py_UNICODE_WIDE
134
135# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
136# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
137# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
138# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
139# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
140# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
141# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
142# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
143# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
144# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
145# define PyUnicode_Compare PyUnicodeUCS2_Compare
146# define PyUnicode_Concat PyUnicodeUCS2_Concat
147# define PyUnicode_Contains PyUnicodeUCS2_Contains
148# define PyUnicode_Count PyUnicodeUCS2_Count
149# define PyUnicode_Decode PyUnicodeUCS2_Decode
150# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
151# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
152# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
153# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
154# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
155# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
156# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
157# define PyUnicode_Encode PyUnicodeUCS2_Encode
158# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
159# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
160# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
161# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
162# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
163# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
164# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
165# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
166# define PyUnicode_Find PyUnicodeUCS2_Find
167# define PyUnicode_Format PyUnicodeUCS2_Format
168# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
169# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
170# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
171# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
172# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
173# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
174# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
175# define PyUnicode_Join PyUnicodeUCS2_Join
176# define PyUnicode_Replace PyUnicodeUCS2_Replace
177# define PyUnicode_Resize PyUnicodeUCS2_Resize
178# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
179# define PyUnicode_Split PyUnicodeUCS2_Split
180# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
181# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
182# define PyUnicode_Translate PyUnicodeUCS2_Translate
183# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
184# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
185# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
186# define _PyUnicode_Init _PyUnicodeUCS2_Init
187# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
188# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
189# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
190# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
191# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
192# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
193# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
194# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
195# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
196# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
197# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
198# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
199# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
200# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
201# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
202
203#else
204
205# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
206# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
207# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
208# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
209# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
210# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
211# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
212# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
213# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
214# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
215# define PyUnicode_Compare PyUnicodeUCS4_Compare
216# define PyUnicode_Concat PyUnicodeUCS4_Concat
217# define PyUnicode_Contains PyUnicodeUCS4_Contains
218# define PyUnicode_Count PyUnicodeUCS4_Count
219# define PyUnicode_Decode PyUnicodeUCS4_Decode
220# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
221# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
222# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
223# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
224# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
225# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
226# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
227# define PyUnicode_Encode PyUnicodeUCS4_Encode
228# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
229# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
230# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
231# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
232# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
233# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
234# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
235# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
236# define PyUnicode_Find PyUnicodeUCS4_Find
237# define PyUnicode_Format PyUnicodeUCS4_Format
238# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
239# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
240# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
241# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
242# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
243# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
244# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
245# define PyUnicode_Join PyUnicodeUCS4_Join
246# define PyUnicode_Replace PyUnicodeUCS4_Replace
247# define PyUnicode_Resize PyUnicodeUCS4_Resize
248# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
249# define PyUnicode_Split PyUnicodeUCS4_Split
250# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
251# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
252# define PyUnicode_Translate PyUnicodeUCS4_Translate
253# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
254# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
255# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
256# define _PyUnicode_Init _PyUnicodeUCS4_Init
257# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
258# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
259# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
260# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
261# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
262# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
263# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
264# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
265# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
266# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
267# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
268# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
269# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
270# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
271# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
272
273
274#endif
275
Guido van Rossumd8225182000-03-10 22:33:05 +0000276/* --- Internal Unicode Operations ---------------------------------------- */
277
278/* If you want Python to use the compiler's wctype.h functions instead
Barry Warsaw51ac5802000-03-20 16:36:48 +0000279 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
280 configure Python using --with-ctype-functions. This reduces the
281 interpreter's code size. */
Guido van Rossumd8225182000-03-10 22:33:05 +0000282
283#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
284
285#include "wctype.h"
286
287#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
288
289#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
290#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
291#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
292#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
293
294#define Py_UNICODE_TOLOWER(ch) towlower(ch)
295#define Py_UNICODE_TOUPPER(ch) towupper(ch)
296#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
297
298#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
299#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
300#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
301
302#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
303#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
304#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
305
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000306#define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
307
Guido van Rossumd8225182000-03-10 22:33:05 +0000308#else
309
310#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
311
312#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
313#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
314#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
315#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
316
317#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
318#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
319#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
320
321#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
322#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
323#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
324
325#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
326#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
327#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
328
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000329#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
Guido van Rossumd8225182000-03-10 22:33:05 +0000330
Marc-André Lemburgf03e7412000-07-05 09:45:59 +0000331#endif
Marc-André Lemburga9c103b2000-07-03 10:52:13 +0000332
333#define Py_UNICODE_ISALNUM(ch) \
334 (Py_UNICODE_ISALPHA(ch) || \
335 Py_UNICODE_ISDECIMAL(ch) || \
336 Py_UNICODE_ISDIGIT(ch) || \
337 Py_UNICODE_ISNUMERIC(ch))
338
Guido van Rossumd8225182000-03-10 22:33:05 +0000339#define Py_UNICODE_COPY(target, source, length)\
340 (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
341
342#define Py_UNICODE_FILL(target, value, length) do\
343 {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
344 while (0)
345
346#define Py_UNICODE_MATCH(string, offset, substring)\
Marc-André Lemburg2f4d0e92000-06-18 22:22:27 +0000347 ((*((string)->str + (offset)) == *((substring)->str)) &&\
348 !memcmp((string)->str + (offset), (substring)->str,\
Guido van Rossumd8225182000-03-10 22:33:05 +0000349 (substring)->length*sizeof(Py_UNICODE)))
350
Barry Warsaw51ac5802000-03-20 16:36:48 +0000351#ifdef __cplusplus
352extern "C" {
353#endif
354
Guido van Rossumd8225182000-03-10 22:33:05 +0000355/* --- Unicode Type ------------------------------------------------------- */
356
357typedef struct {
358 PyObject_HEAD
359 int length; /* Length of raw Unicode data in buffer */
360 Py_UNICODE *str; /* Raw Unicode buffer */
361 long hash; /* Hash value; -1 if not set */
Marc-André Lemburgbff879c2000-08-03 18:46:08 +0000362 PyObject *defenc; /* (Default) Encoded version as Python
363 string, or NULL; this is used for
364 implementing the buffer protocol */
Guido van Rossumd8225182000-03-10 22:33:05 +0000365} PyUnicodeObject;
366
367extern DL_IMPORT(PyTypeObject) PyUnicode_Type;
368
369#define PyUnicode_Check(op) (((op)->ob_type == &PyUnicode_Type))
370
371/* Fast access macros */
372#define PyUnicode_GET_SIZE(op) \
373 (((PyUnicodeObject *)(op))->length)
374#define PyUnicode_GET_DATA_SIZE(op) \
375 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
376#define PyUnicode_AS_UNICODE(op) \
377 (((PyUnicodeObject *)(op))->str)
378#define PyUnicode_AS_DATA(op) \
379 ((const char *)((PyUnicodeObject *)(op))->str)
380
381/* --- Constants ---------------------------------------------------------- */
382
383/* This Unicode character will be used as replacement character during
384 decoding if the errors argument is set to "replace". Note: the
385 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
386 Unicode 3.0. */
387
388#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
389
390/* === Public API ========================================================= */
391
392/* --- Plain Py_UNICODE --------------------------------------------------- */
393
394/* Create a Unicode Object from the Py_UNICODE buffer u of the given
Marc-André Lemburg8155e0e2001-04-23 14:44:21 +0000395 size.
396
397 u may be NULL which causes the contents to be undefined. It is the
398 user's responsibility to fill in the needed data afterwards. Note
399 that modifying the Unicode object contents after construction is
400 only allowed if u was set to NULL.
Guido van Rossumd8225182000-03-10 22:33:05 +0000401
402 The buffer is copied into the new object. */
403
404extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
405 const Py_UNICODE *u, /* Unicode buffer */
406 int size /* size of buffer */
407 );
408
409/* Return a read-only pointer to the Unicode object's internal
410 Py_UNICODE buffer. */
411
412extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
413 PyObject *unicode /* Unicode object */
414 );
415
416/* Get the length of the Unicode object. */
417
418extern DL_IMPORT(int) PyUnicode_GetSize(
419 PyObject *unicode /* Unicode object */
420 );
421
Martin v. Löwisce9b5a52001-06-27 06:28:56 +0000422/* Get the maximum ordinal for a Unicode character. */
423extern DL_IMPORT(Py_UNICODE) PyUnicode_GetMax(void);
424
Guido van Rossum52c23592000-04-10 13:41:41 +0000425/* Resize an already allocated Unicode object to the new size length.
426
427 *unicode is modified to point to the new (resized) object and 0
428 returned on success.
429
430 This API may only be called by the function which also called the
431 Unicode constructor. The refcount on the object must be 1. Otherwise,
432 an error is returned.
433
434 Error handling is implemented as follows: an exception is set, -1
435 is returned and *unicode left untouched.
436
437*/
438
439extern DL_IMPORT(int) PyUnicode_Resize(
440 PyObject **unicode, /* Pointer to the Unicode object */
441 int length /* New length */
442 );
443
Guido van Rossumd8225182000-03-10 22:33:05 +0000444/* Coerce obj to an Unicode object and return a reference with
445 *incremented* refcount.
446
447 Coercion is done in the following way:
448
449 1. Unicode objects are passed back as-is with incremented
450 refcount.
451
452 2. String and other char buffer compatible objects are decoded
Fred Drakecb093fe2000-05-09 19:51:53 +0000453 under the assumptions that they contain data using the current
454 default encoding. Decoding is done in "strict" mode.
Guido van Rossumd8225182000-03-10 22:33:05 +0000455
456 3. All other objects raise an exception.
457
458 The API returns NULL in case of an error. The caller is responsible
459 for decref'ing the returned objects.
460
461*/
462
Marc-André Lemburg5a5c81a2000-07-07 13:46:42 +0000463extern DL_IMPORT(PyObject*) PyUnicode_FromEncodedObject(
464 register PyObject *obj, /* Object */
465 const char *encoding, /* encoding */
466 const char *errors /* error handling */
467 );
468
469/* Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict");
470 which results in using the default encoding as basis for
471 decoding the object.
472
473 Coerces obj to an Unicode object and return a reference with
474 *incremented* refcount.
475
476 The API returns NULL in case of an error. The caller is responsible
477 for decref'ing the returned objects.
478
479*/
480
Guido van Rossumd8225182000-03-10 22:33:05 +0000481extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
482 register PyObject *obj /* Object */
483 );
484
485/* --- wchar_t support for platforms which support it --------------------- */
486
487#ifdef HAVE_WCHAR_H
488
489/* Create a Unicode Object from the whcar_t buffer w of the given
490 size.
491
492 The buffer is copied into the new object. */
493
494extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
495 register const wchar_t *w, /* wchar_t buffer */
496 int size /* size of buffer */
497 );
498
499/* Copies the Unicode Object contents into the whcar_t buffer w. At
500 most size wchar_t characters are copied.
501
502 Returns the number of wchar_t characters copied or -1 in case of an
503 error. */
504
505extern DL_IMPORT(int) PyUnicode_AsWideChar(
506 PyUnicodeObject *unicode, /* Unicode object */
507 register wchar_t *w, /* wchar_t buffer */
508 int size /* size of buffer */
509 );
510
511#endif
512
513/* === Builtin Codecs =====================================================
514
515 Many of these APIs take two arguments encoding and errors. These
516 parameters encoding and errors have the same semantics as the ones
517 of the builtin unicode() API.
518
Fred Drakecb093fe2000-05-09 19:51:53 +0000519 Setting encoding to NULL causes the default encoding to be used.
Guido van Rossumd8225182000-03-10 22:33:05 +0000520
521 Error handling is set by errors which may also be set to NULL
522 meaning to use the default handling defined for the codec. Default
523 error handling for all builtin codecs is "strict" (ValueErrors are
524 raised).
525
526 The codecs all use a similar interface. Only deviation from the
527 generic ones are documented.
528
529*/
530
Fred Drakecb093fe2000-05-09 19:51:53 +0000531/* --- Manage the default encoding ---------------------------------------- */
532
Jeremy Hylton3ce45382001-07-30 22:34:24 +0000533/* Return a Python string holding the default encoded value of the
534 Unicode object.
535
536 The resulting string is cached in the Unicode object for subsequent
537 usage by this function. The cached version is needed to implement
538 the character buffer interface and will live (at least) as long as
539 the Unicode object itself.
540
541 The refcount of the string is *not* incremented.
542
543 *** Exported for internal use by the interpreter only !!! ***
544
545*/
546
547extern DL_IMPORT(PyObject *) _PyUnicode_AsDefaultEncodedString(
548 PyObject *, const char *);
549
Fred Drakecb093fe2000-05-09 19:51:53 +0000550/* Returns the currently active default encoding.
551
552 The default encoding is currently implemented as run-time settable
553 process global. This may change in future versions of the
554 interpreter to become a parameter which is managed on a per-thread
555 basis.
556
557 */
558
Thomas Wouters5f375912000-07-22 23:30:03 +0000559extern DL_IMPORT(const char*) PyUnicode_GetDefaultEncoding(void);
Fred Drakecb093fe2000-05-09 19:51:53 +0000560
561/* Sets the currently active default encoding.
562
563 Returns 0 on success, -1 in case of an error.
564
565 */
566
567extern DL_IMPORT(int) PyUnicode_SetDefaultEncoding(
568 const char *encoding /* Encoding name in standard form */
569 );
570
Guido van Rossumd8225182000-03-10 22:33:05 +0000571/* --- Generic Codecs ----------------------------------------------------- */
572
573/* Create a Unicode object by decoding the encoded string s of the
574 given size. */
575
576extern DL_IMPORT(PyObject*) PyUnicode_Decode(
577 const char *s, /* encoded string */
578 int size, /* size of buffer */
579 const char *encoding, /* encoding */
580 const char *errors /* error handling */
581 );
582
583/* Encodes a Py_UNICODE buffer of the given size and returns a
584 Python string object. */
585
586extern DL_IMPORT(PyObject*) PyUnicode_Encode(
587 const Py_UNICODE *s, /* Unicode char buffer */
588 int size, /* number of Py_UNICODE chars to encode */
589 const char *encoding, /* encoding */
590 const char *errors /* error handling */
591 );
592
593/* Encodes a Unicode object and returns the result as Python string
594 object. */
595
596extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
597 PyObject *unicode, /* Unicode object */
598 const char *encoding, /* encoding */
599 const char *errors /* error handling */
600 );
601
602/* --- UTF-8 Codecs ------------------------------------------------------- */
603
604extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
605 const char *string, /* UTF-8 encoded string */
606 int length, /* size of string */
607 const char *errors /* error handling */
608 );
609
610extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
611 PyObject *unicode /* Unicode object */
612 );
613
614extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
615 const Py_UNICODE *data, /* Unicode char buffer */
616 int length, /* number of Py_UNICODE chars to encode */
617 const char *errors /* error handling */
618 );
619
620/* --- UTF-16 Codecs ------------------------------------------------------ */
621
Guido van Rossum9e896b32000-04-05 20:11:21 +0000622/* Decodes length bytes from a UTF-16 encoded buffer string and returns
Guido van Rossumd8225182000-03-10 22:33:05 +0000623 the corresponding Unicode object.
624
625 errors (if non-NULL) defines the error handling. It defaults
626 to "strict".
627
628 If byteorder is non-NULL, the decoder starts decoding using the
629 given byte order:
630
631 *byteorder == -1: little endian
632 *byteorder == 0: native order
633 *byteorder == 1: big endian
634
Marc-André Lemburg489b56e2001-05-21 20:30:15 +0000635 In native mode, the first two bytes of the stream are checked for a
636 BOM mark. If found, the BOM mark is analysed, the byte order
637 adjusted and the BOM skipped. In the other modes, no BOM mark
638 interpretation is done. After completion, *byteorder is set to the
639 current byte order at the end of input data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000640
641 If byteorder is NULL, the codec starts in native order mode.
642
643*/
644
645extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
646 const char *string, /* UTF-16 encoded string */
647 int length, /* size of string */
648 const char *errors, /* error handling */
649 int *byteorder /* pointer to byteorder to use
650 0=native;-1=LE,1=BE; updated on
651 exit */
652 );
653
654/* Returns a Python string using the UTF-16 encoding in native byte
655 order. The string always starts with a BOM mark. */
656
657extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
658 PyObject *unicode /* Unicode object */
659 );
660
661/* Returns a Python string object holding the UTF-16 encoded value of
Guido van Rossum9e896b32000-04-05 20:11:21 +0000662 the Unicode data.
Guido van Rossumd8225182000-03-10 22:33:05 +0000663
664 If byteorder is not 0, output is written according to the following
665 byte order:
666
667 byteorder == -1: little endian
668 byteorder == 0: native byte order (writes a BOM mark)
669 byteorder == 1: big endian
670
671 If byteorder is 0, the output string will always start with the
672 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
673 prepended.
674
675 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
676 UCS-2. This trick makes it possible to add full UTF-16 capabilities
Thomas Wouters7e474022000-07-16 12:04:32 +0000677 at a later point without compromising the APIs.
Guido van Rossumd8225182000-03-10 22:33:05 +0000678
679*/
680
681extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
682 const Py_UNICODE *data, /* Unicode char buffer */
683 int length, /* number of Py_UNICODE chars to encode */
684 const char *errors, /* error handling */
685 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
686 );
687
688/* --- Unicode-Escape Codecs ---------------------------------------------- */
689
690extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
691 const char *string, /* Unicode-Escape encoded string */
692 int length, /* size of string */
693 const char *errors /* error handling */
694 );
695
696extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
697 PyObject *unicode /* Unicode object */
698 );
699
700extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
701 const Py_UNICODE *data, /* Unicode char buffer */
702 int length /* Number of Py_UNICODE chars to encode */
703 );
704
705/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
706
707extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
708 const char *string, /* Raw-Unicode-Escape encoded string */
709 int length, /* size of string */
710 const char *errors /* error handling */
711 );
712
713extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
714 PyObject *unicode /* Unicode object */
715 );
716
717extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
718 const Py_UNICODE *data, /* Unicode char buffer */
719 int length /* Number of Py_UNICODE chars to encode */
720 );
721
722/* --- Latin-1 Codecs -----------------------------------------------------
723
724 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
725
726*/
727
728extern DL_IMPORT(PyObject*) PyUnicode_DecodeLatin1(
729 const char *string, /* Latin-1 encoded string */
730 int length, /* size of string */
731 const char *errors /* error handling */
732 );
733
734extern DL_IMPORT(PyObject*) PyUnicode_AsLatin1String(
735 PyObject *unicode /* Unicode object */
736 );
737
738extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
739 const Py_UNICODE *data, /* Unicode char buffer */
740 int length, /* Number of Py_UNICODE chars to encode */
741 const char *errors /* error handling */
742 );
743
744/* --- ASCII Codecs -------------------------------------------------------
745
746 Only 7-bit ASCII data is excepted. All other codes generate errors.
747
748*/
749
750extern DL_IMPORT(PyObject*) PyUnicode_DecodeASCII(
751 const char *string, /* ASCII encoded string */
752 int length, /* size of string */
753 const char *errors /* error handling */
754 );
755
756extern DL_IMPORT(PyObject*) PyUnicode_AsASCIIString(
757 PyObject *unicode /* Unicode object */
758 );
759
760extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
761 const Py_UNICODE *data, /* Unicode char buffer */
762 int length, /* Number of Py_UNICODE chars to encode */
763 const char *errors /* error handling */
764 );
765
766/* --- Character Map Codecs -----------------------------------------------
767
768 This codec uses mappings to encode and decode characters.
769
770 Decoding mappings must map single string characters to single
771 Unicode characters, integers (which are then interpreted as Unicode
772 ordinals) or None (meaning "undefined mapping" and causing an
773 error).
774
775 Encoding mappings must map single Unicode characters to single
776 string characters, integers (which are then interpreted as Latin-1
777 ordinals) or None (meaning "undefined mapping" and causing an
778 error).
779
780 If a character lookup fails with a LookupError, the character is
781 copied as-is meaning that its ordinal value will be interpreted as
782 Unicode or Latin-1 ordinal resp. Because of this mappings only need
783 to contain those mappings which map characters to different code
784 points.
785
786*/
787
788extern DL_IMPORT(PyObject*) PyUnicode_DecodeCharmap(
789 const char *string, /* Encoded string */
790 int length, /* size of string */
791 PyObject *mapping, /* character mapping
792 (char ordinal -> unicode ordinal) */
793 const char *errors /* error handling */
794 );
795
796extern DL_IMPORT(PyObject*) PyUnicode_AsCharmapString(
797 PyObject *unicode, /* Unicode object */
798 PyObject *mapping /* character mapping
799 (unicode ordinal -> char ordinal) */
800 );
801
802extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
803 const Py_UNICODE *data, /* Unicode char buffer */
804 int length, /* Number of Py_UNICODE chars to encode */
805 PyObject *mapping, /* character mapping
806 (unicode ordinal -> char ordinal) */
807 const char *errors /* error handling */
808 );
809
810/* Translate a Py_UNICODE buffer of the given length by applying a
811 character mapping table to it and return the resulting Unicode
812 object.
813
814 The mapping table must map Unicode ordinal integers to Unicode
815 ordinal integers or None (causing deletion of the character).
816
817 Mapping tables may be dictionaries or sequences. Unmapped character
818 ordinals (ones which cause a LookupError) are left untouched and
819 are copied as-is.
820
821*/
822
823extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
824 const Py_UNICODE *data, /* Unicode char buffer */
825 int length, /* Number of Py_UNICODE chars to encode */
826 PyObject *table, /* Translate table */
827 const char *errors /* error handling */
828 );
829
Guido van Rossumefec1152000-03-28 02:01:15 +0000830#ifdef MS_WIN32
Guido van Rossum24bdb042000-03-28 20:29:59 +0000831
Guido van Rossumefec1152000-03-28 02:01:15 +0000832/* --- MBCS codecs for Windows -------------------------------------------- */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000833
Guido van Rossumefec1152000-03-28 02:01:15 +0000834extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
835 const char *string, /* MBCS encoded string */
836 int length, /* size of string */
837 const char *errors /* error handling */
838 );
839
840extern DL_IMPORT(PyObject*) PyUnicode_AsMBCSString(
841 PyObject *unicode /* Unicode object */
842 );
843
844extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
845 const Py_UNICODE *data, /* Unicode char buffer */
846 int length, /* Number of Py_UNICODE chars to encode */
847 const char *errors /* error handling */
848 );
849
Guido van Rossumefec1152000-03-28 02:01:15 +0000850#endif /* MS_WIN32 */
Guido van Rossum24bdb042000-03-28 20:29:59 +0000851
Guido van Rossum9e896b32000-04-05 20:11:21 +0000852/* --- Decimal Encoder ---------------------------------------------------- */
853
854/* Takes a Unicode string holding a decimal value and writes it into
855 an output buffer using standard ASCII digit codes.
856
857 The output buffer has to provide at least length+1 bytes of storage
858 area. The output string is 0-terminated.
859
860 The encoder converts whitespace to ' ', decimal characters to their
861 corresponding ASCII digit and all other Latin-1 characters except
862 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
863 are treated as errors. This includes embedded NULL bytes.
864
865 Error handling is defined by the errors argument:
866
867 NULL or "strict": raise a ValueError
868 "ignore": ignore the wrong characters (these are not copied to the
869 output buffer)
870 "replace": replaces illegal characters with '?'
871
872 Returns 0 on success, -1 on failure.
873
874*/
875
876extern DL_IMPORT(int) PyUnicode_EncodeDecimal(
877 Py_UNICODE *s, /* Unicode buffer */
878 int length, /* Number of Py_UNICODE chars to encode */
879 char *output, /* Output buffer; must have size >= length */
880 const char *errors /* error handling */
881 );
882
Guido van Rossumd8225182000-03-10 22:33:05 +0000883/* --- Methods & Slots ----------------------------------------------------
884
885 These are capable of handling Unicode objects and strings on input
886 (we refer to them as strings in the descriptions) and return
887 Unicode objects or integers as apporpriate. */
888
889/* Concat two strings giving a new Unicode string. */
890
891extern DL_IMPORT(PyObject*) PyUnicode_Concat(
892 PyObject *left, /* Left string */
893 PyObject *right /* Right string */
894 );
895
896/* Split a string giving a list of Unicode strings.
897
898 If sep is NULL, splitting will be done at all whitespace
899 substrings. Otherwise, splits occur at the given separator.
900
901 At most maxsplit splits will be done. If negative, no limit is set.
902
903 Separators are not included in the resulting list.
904
905*/
906
907extern DL_IMPORT(PyObject*) PyUnicode_Split(
908 PyObject *s, /* String to split */
909 PyObject *sep, /* String separator */
910 int maxsplit /* Maxsplit count */
911 );
912
913/* Dito, but split at line breaks.
914
915 CRLF is considered to be one line break. Line breaks are not
916 included in the resulting list. */
917
918extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
919 PyObject *s, /* String to split */
Guido van Rossum004d64f2000-04-11 15:39:46 +0000920 int keepends /* If true, line end markers are included */
Guido van Rossumd8225182000-03-10 22:33:05 +0000921 );
922
923/* Translate a string by applying a character mapping table to it and
924 return the resulting Unicode object.
925
926 The mapping table must map Unicode ordinal integers to Unicode
927 ordinal integers or None (causing deletion of the character).
928
929 Mapping tables may be dictionaries or sequences. Unmapped character
930 ordinals (ones which cause a LookupError) are left untouched and
931 are copied as-is.
932
933*/
934
935extern DL_IMPORT(PyObject *) PyUnicode_Translate(
936 PyObject *str, /* String */
937 PyObject *table, /* Translate table */
938 const char *errors /* error handling */
939 );
940
941/* Join a sequence of strings using the given separator and return
942 the resulting Unicode string. */
943
944extern DL_IMPORT(PyObject*) PyUnicode_Join(
945 PyObject *separator, /* Separator string */
946 PyObject *seq /* Sequence object */
947 );
948
949/* Return 1 if substr matches str[start:end] at the given tail end, 0
950 otherwise. */
951
952extern DL_IMPORT(int) PyUnicode_Tailmatch(
953 PyObject *str, /* String */
954 PyObject *substr, /* Prefix or Suffix string */
955 int start, /* Start index */
956 int end, /* Stop index */
957 int direction /* Tail end: -1 prefix, +1 suffix */
958 );
959
960/* Return the first position of substr in str[start:end] using the
961 given search direction or -1 if not found. */
962
963extern DL_IMPORT(int) PyUnicode_Find(
964 PyObject *str, /* String */
965 PyObject *substr, /* Substring to find */
966 int start, /* Start index */
967 int end, /* Stop index */
968 int direction /* Find direction: +1 forward, -1 backward */
969 );
970
Barry Warsaw51ac5802000-03-20 16:36:48 +0000971/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +0000972
973extern DL_IMPORT(int) PyUnicode_Count(
974 PyObject *str, /* String */
975 PyObject *substr, /* Substring to count */
976 int start, /* Start index */
977 int end /* Stop index */
978 );
979
Barry Warsaw51ac5802000-03-20 16:36:48 +0000980/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +0000981 and return the resulting Unicode object. */
982
983extern DL_IMPORT(PyObject *) PyUnicode_Replace(
984 PyObject *str, /* String */
985 PyObject *substr, /* Substring to find */
986 PyObject *replstr, /* Substring to replace */
987 int maxcount /* Max. number of replacements to apply;
988 -1 = all */
989 );
990
991/* Compare two strings and return -1, 0, 1 for less than, equal,
992 greater than resp. */
993
994extern DL_IMPORT(int) PyUnicode_Compare(
995 PyObject *left, /* Left string */
996 PyObject *right /* Right string */
997 );
998
Thomas Wouters7e474022000-07-16 12:04:32 +0000999/* Apply a argument tuple or dictionary to a format string and return
Guido van Rossumd8225182000-03-10 22:33:05 +00001000 the resulting Unicode string. */
1001
1002extern DL_IMPORT(PyObject *) PyUnicode_Format(
1003 PyObject *format, /* Format string */
1004 PyObject *args /* Argument tuple or dictionary */
1005 );
1006
Guido van Rossumd0d366b2000-03-13 23:22:24 +00001007/* Checks whether element is contained in container and return 1/0
1008 accordingly.
1009
1010 element has to coerce to an one element Unicode string. -1 is
1011 returned in case of an error. */
1012
1013extern DL_IMPORT(int) PyUnicode_Contains(
1014 PyObject *container, /* Container string */
1015 PyObject *element /* Element string */
1016 );
1017
Guido van Rossumd8225182000-03-10 22:33:05 +00001018/* === Characters Type APIs =============================================== */
1019
1020/* These should not be used directly. Use the Py_UNICODE_IS* and
1021 Py_UNICODE_TO* macros instead.
1022
1023 These APIs are implemented in Objects/unicodectype.c.
1024
1025*/
1026
1027extern DL_IMPORT(int) _PyUnicode_IsLowercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001028 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001029 );
1030
1031extern DL_IMPORT(int) _PyUnicode_IsUppercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001032 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001033 );
1034
1035extern DL_IMPORT(int) _PyUnicode_IsTitlecase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001036 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001037 );
1038
1039extern DL_IMPORT(int) _PyUnicode_IsWhitespace(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001040 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001041 );
1042
1043extern DL_IMPORT(int) _PyUnicode_IsLinebreak(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001044 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001045 );
1046
1047extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToLowercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001048 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001049 );
1050
1051extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToUppercase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001052 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001053 );
1054
1055extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToTitlecase(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001056 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001057 );
1058
1059extern DL_IMPORT(int) _PyUnicode_ToDecimalDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001060 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001061 );
1062
1063extern DL_IMPORT(int) _PyUnicode_ToDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001064 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001065 );
1066
1067extern DL_IMPORT(double) _PyUnicode_ToNumeric(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001068 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001069 );
1070
1071extern DL_IMPORT(int) _PyUnicode_IsDecimalDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001072 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001073 );
1074
1075extern DL_IMPORT(int) _PyUnicode_IsDigit(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001076 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001077 );
1078
1079extern DL_IMPORT(int) _PyUnicode_IsNumeric(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001080 Py_UNICODE ch /* Unicode character */
Guido van Rossumd8225182000-03-10 22:33:05 +00001081 );
1082
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001083extern DL_IMPORT(int) _PyUnicode_IsAlpha(
Fredrik Lundh72b06852001-06-27 22:08:26 +00001084 Py_UNICODE ch /* Unicode character */
Marc-André Lemburgf03e7412000-07-05 09:45:59 +00001085 );
1086
Guido van Rossumd8225182000-03-10 22:33:05 +00001087#ifdef __cplusplus
1088}
1089#endif
1090#endif /* !Py_UNICODEOBJECT_H */