blob: e9e60d8baed4ccbc85ecbb98c4457c83d3b518ee [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
Guido van Rossumd8225182000-03-10 22:33:05 +00003
4/*
5
6Unicode implementation based on original code by Fredrik Lundh,
7modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
8Unicode Integration Proposal (see file Misc/unicode.txt).
9
10(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
11
12
13 Original header:
14 --------------------------------------------------------------------
15
16 * Yet another Unicode string type for Python. This type supports the
17 * 16-bit Basic Multilingual Plane (BMP) only.
18 *
19 * Written by Fredrik Lundh, January 1999.
20 *
21 * Copyright (c) 1999 by Secret Labs AB.
22 * Copyright (c) 1999 by Fredrik Lundh.
23 *
24 * fredrik@pythonware.com
25 * http://www.pythonware.com
26 *
27 * --------------------------------------------------------------------
28 * This Unicode String Type is
29 *
30 * Copyright (c) 1999 by Secret Labs AB
31 * Copyright (c) 1999 by Fredrik Lundh
32 *
33 * By obtaining, using, and/or copying this software and/or its
34 * associated documentation, you agree that you have read, understood,
35 * and will comply with the following terms and conditions:
36 *
37 * Permission to use, copy, modify, and distribute this software and its
38 * associated documentation for any purpose and without fee is hereby
39 * granted, provided that the above copyright notice appears in all
40 * copies, and that both that copyright notice and this permission notice
41 * appear in supporting documentation, and that the name of Secret Labs
42 * AB or the author not be used in advertising or publicity pertaining to
43 * distribution of the software without specific, written prior
44 * permission.
45 *
46 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
47 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
48 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
49 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
50 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
51 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
52 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
53 * -------------------------------------------------------------------- */
54
55#include "ctype.h"
56
57/* === Internal API ======================================================= */
58
59/* --- Internal Unicode Format -------------------------------------------- */
60
61/* Set these flags if the platform has "wchar.h", "wctype.h" and the
62 wchar_t type is a 16-bit unsigned type */
63/* #define HAVE_WCHAR_H */
64/* #define HAVE_USABLE_WCHAR_T */
65
66/* Defaults for various platforms */
67#ifndef HAVE_USABLE_WCHAR_T
68
69/* Windows has a usable wchar_t type */
70# if defined(MS_WIN32)
71# define HAVE_USABLE_WCHAR_T
72# endif
73
74#endif
75
76/* If the compiler provides a wchar_t type we try to support it
77 through the interface functions PyUnicode_FromWideChar() and
78 PyUnicode_AsWideChar(). */
79
80#ifdef HAVE_USABLE_WCHAR_T
81# define HAVE_WCHAR_H
82#endif
83
84#ifdef HAVE_WCHAR_H
85# include "wchar.h"
86#endif
87
88#ifdef HAVE_USABLE_WCHAR_T
89
90/* If the compiler defines whcar_t as a 16-bit unsigned type we can
91 use the compiler type directly. Works fine with all modern Windows
92 platforms. */
93
94typedef wchar_t Py_UNICODE;
95
96#else
97
98/* Use if you have a standard ANSI compiler, without wchar_t support.
99 If a short is not 16 bits on your platform, you have to fix the
100 typedef below, or the module initialization code will complain. */
101
102typedef unsigned short Py_UNICODE;
103
104#endif
105
106/* --- Internal Unicode Operations ---------------------------------------- */
107
108/* If you want Python to use the compiler's wctype.h functions instead
Barry Warsaw51ac5802000-03-20 16:36:48 +0000109 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
110 configure Python using --with-ctype-functions. This reduces the
111 interpreter's code size. */
Guido van Rossumd8225182000-03-10 22:33:05 +0000112
113#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
114
115#include "wctype.h"
116
117#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
118
119#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
120#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
121#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
122#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
123
124#define Py_UNICODE_TOLOWER(ch) towlower(ch)
125#define Py_UNICODE_TOUPPER(ch) towupper(ch)
126#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
127
128#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
129#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
130#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
131
132#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
133#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
134#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
135
136#else
137
138#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
139
140#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
141#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
142#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
143#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
144
145#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
146#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
147#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
148
149#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
150#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
151#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
152
153#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
154#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
155#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
156
157#endif
158
159#define Py_UNICODE_COPY(target, source, length)\
160 (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
161
162#define Py_UNICODE_FILL(target, value, length) do\
163 {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
164 while (0)
165
166#define Py_UNICODE_MATCH(string, offset, substring)\
167 (!memcmp((string)->str + (offset), (substring)->str,\
168 (substring)->length*sizeof(Py_UNICODE)))
169
Barry Warsaw51ac5802000-03-20 16:36:48 +0000170#ifdef __cplusplus
171extern "C" {
172#endif
173
Guido van Rossumd8225182000-03-10 22:33:05 +0000174/* --- Unicode Type ------------------------------------------------------- */
175
176typedef struct {
177 PyObject_HEAD
178 int length; /* Length of raw Unicode data in buffer */
179 Py_UNICODE *str; /* Raw Unicode buffer */
180 long hash; /* Hash value; -1 if not set */
181 PyObject *utf8str; /* UTF-8 encoded version as Python string,
182 or NULL */
183} PyUnicodeObject;
184
185extern DL_IMPORT(PyTypeObject) PyUnicode_Type;
186
187#define PyUnicode_Check(op) (((op)->ob_type == &PyUnicode_Type))
188
189/* Fast access macros */
190#define PyUnicode_GET_SIZE(op) \
191 (((PyUnicodeObject *)(op))->length)
192#define PyUnicode_GET_DATA_SIZE(op) \
193 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
194#define PyUnicode_AS_UNICODE(op) \
195 (((PyUnicodeObject *)(op))->str)
196#define PyUnicode_AS_DATA(op) \
197 ((const char *)((PyUnicodeObject *)(op))->str)
198
199/* --- Constants ---------------------------------------------------------- */
200
201/* This Unicode character will be used as replacement character during
202 decoding if the errors argument is set to "replace". Note: the
203 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
204 Unicode 3.0. */
205
206#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
207
208/* === Public API ========================================================= */
209
210/* --- Plain Py_UNICODE --------------------------------------------------- */
211
212/* Create a Unicode Object from the Py_UNICODE buffer u of the given
213 size. u may be NULL which causes the contents to be undefined. It
214 is the user's responsibility to fill in the needed data.
215
216 The buffer is copied into the new object. */
217
218extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
219 const Py_UNICODE *u, /* Unicode buffer */
220 int size /* size of buffer */
221 );
222
223/* Return a read-only pointer to the Unicode object's internal
224 Py_UNICODE buffer. */
225
226extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
227 PyObject *unicode /* Unicode object */
228 );
229
230/* Get the length of the Unicode object. */
231
232extern DL_IMPORT(int) PyUnicode_GetSize(
233 PyObject *unicode /* Unicode object */
234 );
235
236/* Coerce obj to an Unicode object and return a reference with
237 *incremented* refcount.
238
239 Coercion is done in the following way:
240
241 1. Unicode objects are passed back as-is with incremented
242 refcount.
243
244 2. String and other char buffer compatible objects are decoded
245 under the assumptions that they contain UTF-8 data. Decoding
246 is done in "strict" mode.
247
248 3. All other objects raise an exception.
249
250 The API returns NULL in case of an error. The caller is responsible
251 for decref'ing the returned objects.
252
253*/
254
255extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
256 register PyObject *obj /* Object */
257 );
258
259/* --- wchar_t support for platforms which support it --------------------- */
260
261#ifdef HAVE_WCHAR_H
262
263/* Create a Unicode Object from the whcar_t buffer w of the given
264 size.
265
266 The buffer is copied into the new object. */
267
268extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
269 register const wchar_t *w, /* wchar_t buffer */
270 int size /* size of buffer */
271 );
272
273/* Copies the Unicode Object contents into the whcar_t buffer w. At
274 most size wchar_t characters are copied.
275
276 Returns the number of wchar_t characters copied or -1 in case of an
277 error. */
278
279extern DL_IMPORT(int) PyUnicode_AsWideChar(
280 PyUnicodeObject *unicode, /* Unicode object */
281 register wchar_t *w, /* wchar_t buffer */
282 int size /* size of buffer */
283 );
284
285#endif
286
287/* === Builtin Codecs =====================================================
288
289 Many of these APIs take two arguments encoding and errors. These
290 parameters encoding and errors have the same semantics as the ones
291 of the builtin unicode() API.
292
293 Setting encoding to NULL causes the default encoding to be used
294 which is UTF-8.
295
296 Error handling is set by errors which may also be set to NULL
297 meaning to use the default handling defined for the codec. Default
298 error handling for all builtin codecs is "strict" (ValueErrors are
299 raised).
300
301 The codecs all use a similar interface. Only deviation from the
302 generic ones are documented.
303
304*/
305
306/* --- Generic Codecs ----------------------------------------------------- */
307
308/* Create a Unicode object by decoding the encoded string s of the
309 given size. */
310
311extern DL_IMPORT(PyObject*) PyUnicode_Decode(
312 const char *s, /* encoded string */
313 int size, /* size of buffer */
314 const char *encoding, /* encoding */
315 const char *errors /* error handling */
316 );
317
318/* Encodes a Py_UNICODE buffer of the given size and returns a
319 Python string object. */
320
321extern DL_IMPORT(PyObject*) PyUnicode_Encode(
322 const Py_UNICODE *s, /* Unicode char buffer */
323 int size, /* number of Py_UNICODE chars to encode */
324 const char *encoding, /* encoding */
325 const char *errors /* error handling */
326 );
327
328/* Encodes a Unicode object and returns the result as Python string
329 object. */
330
331extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
332 PyObject *unicode, /* Unicode object */
333 const char *encoding, /* encoding */
334 const char *errors /* error handling */
335 );
336
337/* --- UTF-8 Codecs ------------------------------------------------------- */
338
339extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
340 const char *string, /* UTF-8 encoded string */
341 int length, /* size of string */
342 const char *errors /* error handling */
343 );
344
345extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
346 PyObject *unicode /* Unicode object */
347 );
348
349extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
350 const Py_UNICODE *data, /* Unicode char buffer */
351 int length, /* number of Py_UNICODE chars to encode */
352 const char *errors /* error handling */
353 );
354
355/* --- UTF-16 Codecs ------------------------------------------------------ */
356
357/* Decodes length bytes from a UTF-16 encoded buffer string and return
358 the corresponding Unicode object.
359
360 errors (if non-NULL) defines the error handling. It defaults
361 to "strict".
362
363 If byteorder is non-NULL, the decoder starts decoding using the
364 given byte order:
365
366 *byteorder == -1: little endian
367 *byteorder == 0: native order
368 *byteorder == 1: big endian
369
370 and then switches according to all BOM marks it finds in the input
371 data. BOM marks are not copied into the resulting Unicode string.
372 After completion, *byteorder is set to the current byte order at
373 the end of input data.
374
375 If byteorder is NULL, the codec starts in native order mode.
376
377*/
378
379extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
380 const char *string, /* UTF-16 encoded string */
381 int length, /* size of string */
382 const char *errors, /* error handling */
383 int *byteorder /* pointer to byteorder to use
384 0=native;-1=LE,1=BE; updated on
385 exit */
386 );
387
388/* Returns a Python string using the UTF-16 encoding in native byte
389 order. The string always starts with a BOM mark. */
390
391extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
392 PyObject *unicode /* Unicode object */
393 );
394
395/* Returns a Python string object holding the UTF-16 encoded value of
396 the Unicode data in s.
397
398 If byteorder is not 0, output is written according to the following
399 byte order:
400
401 byteorder == -1: little endian
402 byteorder == 0: native byte order (writes a BOM mark)
403 byteorder == 1: big endian
404
405 If byteorder is 0, the output string will always start with the
406 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
407 prepended.
408
409 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
410 UCS-2. This trick makes it possible to add full UTF-16 capabilities
411 at a later point without comprimising the APIs.
412
413*/
414
415extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
416 const Py_UNICODE *data, /* Unicode char buffer */
417 int length, /* number of Py_UNICODE chars to encode */
418 const char *errors, /* error handling */
419 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
420 );
421
422/* --- Unicode-Escape Codecs ---------------------------------------------- */
423
424extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
425 const char *string, /* Unicode-Escape encoded string */
426 int length, /* size of string */
427 const char *errors /* error handling */
428 );
429
430extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
431 PyObject *unicode /* Unicode object */
432 );
433
434extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
435 const Py_UNICODE *data, /* Unicode char buffer */
436 int length /* Number of Py_UNICODE chars to encode */
437 );
438
439/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
440
441extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
442 const char *string, /* Raw-Unicode-Escape encoded string */
443 int length, /* size of string */
444 const char *errors /* error handling */
445 );
446
447extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
448 PyObject *unicode /* Unicode object */
449 );
450
451extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
452 const Py_UNICODE *data, /* Unicode char buffer */
453 int length /* Number of Py_UNICODE chars to encode */
454 );
455
456/* --- Latin-1 Codecs -----------------------------------------------------
457
458 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
459
460*/
461
462extern DL_IMPORT(PyObject*) PyUnicode_DecodeLatin1(
463 const char *string, /* Latin-1 encoded string */
464 int length, /* size of string */
465 const char *errors /* error handling */
466 );
467
468extern DL_IMPORT(PyObject*) PyUnicode_AsLatin1String(
469 PyObject *unicode /* Unicode object */
470 );
471
472extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
473 const Py_UNICODE *data, /* Unicode char buffer */
474 int length, /* Number of Py_UNICODE chars to encode */
475 const char *errors /* error handling */
476 );
477
478/* --- ASCII Codecs -------------------------------------------------------
479
480 Only 7-bit ASCII data is excepted. All other codes generate errors.
481
482*/
483
484extern DL_IMPORT(PyObject*) PyUnicode_DecodeASCII(
485 const char *string, /* ASCII encoded string */
486 int length, /* size of string */
487 const char *errors /* error handling */
488 );
489
490extern DL_IMPORT(PyObject*) PyUnicode_AsASCIIString(
491 PyObject *unicode /* Unicode object */
492 );
493
494extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
495 const Py_UNICODE *data, /* Unicode char buffer */
496 int length, /* Number of Py_UNICODE chars to encode */
497 const char *errors /* error handling */
498 );
499
500/* --- Character Map Codecs -----------------------------------------------
501
502 This codec uses mappings to encode and decode characters.
503
504 Decoding mappings must map single string characters to single
505 Unicode characters, integers (which are then interpreted as Unicode
506 ordinals) or None (meaning "undefined mapping" and causing an
507 error).
508
509 Encoding mappings must map single Unicode characters to single
510 string characters, integers (which are then interpreted as Latin-1
511 ordinals) or None (meaning "undefined mapping" and causing an
512 error).
513
514 If a character lookup fails with a LookupError, the character is
515 copied as-is meaning that its ordinal value will be interpreted as
516 Unicode or Latin-1 ordinal resp. Because of this mappings only need
517 to contain those mappings which map characters to different code
518 points.
519
520*/
521
522extern DL_IMPORT(PyObject*) PyUnicode_DecodeCharmap(
523 const char *string, /* Encoded string */
524 int length, /* size of string */
525 PyObject *mapping, /* character mapping
526 (char ordinal -> unicode ordinal) */
527 const char *errors /* error handling */
528 );
529
530extern DL_IMPORT(PyObject*) PyUnicode_AsCharmapString(
531 PyObject *unicode, /* Unicode object */
532 PyObject *mapping /* character mapping
533 (unicode ordinal -> char ordinal) */
534 );
535
536extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
537 const Py_UNICODE *data, /* Unicode char buffer */
538 int length, /* Number of Py_UNICODE chars to encode */
539 PyObject *mapping, /* character mapping
540 (unicode ordinal -> char ordinal) */
541 const char *errors /* error handling */
542 );
543
544/* Translate a Py_UNICODE buffer of the given length by applying a
545 character mapping table to it and return the resulting Unicode
546 object.
547
548 The mapping table must map Unicode ordinal integers to Unicode
549 ordinal integers or None (causing deletion of the character).
550
551 Mapping tables may be dictionaries or sequences. Unmapped character
552 ordinals (ones which cause a LookupError) are left untouched and
553 are copied as-is.
554
555*/
556
557extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
558 const Py_UNICODE *data, /* Unicode char buffer */
559 int length, /* Number of Py_UNICODE chars to encode */
560 PyObject *table, /* Translate table */
561 const char *errors /* error handling */
562 );
563
Guido van Rossumefec1152000-03-28 02:01:15 +0000564#ifdef MS_WIN32
565/* --- MBCS codecs for Windows -------------------------------------------- */
566extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
567 const char *string, /* MBCS encoded string */
568 int length, /* size of string */
569 const char *errors /* error handling */
570 );
571
572extern DL_IMPORT(PyObject*) PyUnicode_AsMBCSString(
573 PyObject *unicode /* Unicode object */
574 );
575
576extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
577 const Py_UNICODE *data, /* Unicode char buffer */
578 int length, /* Number of Py_UNICODE chars to encode */
579 const char *errors /* error handling */
580 );
581
582
583#endif /* MS_WIN32 */
Guido van Rossumd8225182000-03-10 22:33:05 +0000584/* --- Methods & Slots ----------------------------------------------------
585
586 These are capable of handling Unicode objects and strings on input
587 (we refer to them as strings in the descriptions) and return
588 Unicode objects or integers as apporpriate. */
589
590/* Concat two strings giving a new Unicode string. */
591
592extern DL_IMPORT(PyObject*) PyUnicode_Concat(
593 PyObject *left, /* Left string */
594 PyObject *right /* Right string */
595 );
596
597/* Split a string giving a list of Unicode strings.
598
599 If sep is NULL, splitting will be done at all whitespace
600 substrings. Otherwise, splits occur at the given separator.
601
602 At most maxsplit splits will be done. If negative, no limit is set.
603
604 Separators are not included in the resulting list.
605
606*/
607
608extern DL_IMPORT(PyObject*) PyUnicode_Split(
609 PyObject *s, /* String to split */
610 PyObject *sep, /* String separator */
611 int maxsplit /* Maxsplit count */
612 );
613
614/* Dito, but split at line breaks.
615
616 CRLF is considered to be one line break. Line breaks are not
617 included in the resulting list. */
618
619extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
620 PyObject *s, /* String to split */
621 int maxsplit /* Maxsplit count */
622 );
623
624/* Translate a string by applying a character mapping table to it and
625 return the resulting Unicode object.
626
627 The mapping table must map Unicode ordinal integers to Unicode
628 ordinal integers or None (causing deletion of the character).
629
630 Mapping tables may be dictionaries or sequences. Unmapped character
631 ordinals (ones which cause a LookupError) are left untouched and
632 are copied as-is.
633
634*/
635
636extern DL_IMPORT(PyObject *) PyUnicode_Translate(
637 PyObject *str, /* String */
638 PyObject *table, /* Translate table */
639 const char *errors /* error handling */
640 );
641
642/* Join a sequence of strings using the given separator and return
643 the resulting Unicode string. */
644
645extern DL_IMPORT(PyObject*) PyUnicode_Join(
646 PyObject *separator, /* Separator string */
647 PyObject *seq /* Sequence object */
648 );
649
650/* Return 1 if substr matches str[start:end] at the given tail end, 0
651 otherwise. */
652
653extern DL_IMPORT(int) PyUnicode_Tailmatch(
654 PyObject *str, /* String */
655 PyObject *substr, /* Prefix or Suffix string */
656 int start, /* Start index */
657 int end, /* Stop index */
658 int direction /* Tail end: -1 prefix, +1 suffix */
659 );
660
661/* Return the first position of substr in str[start:end] using the
662 given search direction or -1 if not found. */
663
664extern DL_IMPORT(int) PyUnicode_Find(
665 PyObject *str, /* String */
666 PyObject *substr, /* Substring to find */
667 int start, /* Start index */
668 int end, /* Stop index */
669 int direction /* Find direction: +1 forward, -1 backward */
670 );
671
Barry Warsaw51ac5802000-03-20 16:36:48 +0000672/* Count the number of occurrences of substr in str[start:end]. */
Guido van Rossumd8225182000-03-10 22:33:05 +0000673
674extern DL_IMPORT(int) PyUnicode_Count(
675 PyObject *str, /* String */
676 PyObject *substr, /* Substring to count */
677 int start, /* Start index */
678 int end /* Stop index */
679 );
680
Barry Warsaw51ac5802000-03-20 16:36:48 +0000681/* Replace at most maxcount occurrences of substr in str with replstr
Guido van Rossumd8225182000-03-10 22:33:05 +0000682 and return the resulting Unicode object. */
683
684extern DL_IMPORT(PyObject *) PyUnicode_Replace(
685 PyObject *str, /* String */
686 PyObject *substr, /* Substring to find */
687 PyObject *replstr, /* Substring to replace */
688 int maxcount /* Max. number of replacements to apply;
689 -1 = all */
690 );
691
692/* Compare two strings and return -1, 0, 1 for less than, equal,
693 greater than resp. */
694
695extern DL_IMPORT(int) PyUnicode_Compare(
696 PyObject *left, /* Left string */
697 PyObject *right /* Right string */
698 );
699
700/* Apply a argument tuple or dictionar to a format string and return
701 the resulting Unicode string. */
702
703extern DL_IMPORT(PyObject *) PyUnicode_Format(
704 PyObject *format, /* Format string */
705 PyObject *args /* Argument tuple or dictionary */
706 );
707
Guido van Rossumd0d366b2000-03-13 23:22:24 +0000708/* Checks whether element is contained in container and return 1/0
709 accordingly.
710
711 element has to coerce to an one element Unicode string. -1 is
712 returned in case of an error. */
713
714extern DL_IMPORT(int) PyUnicode_Contains(
715 PyObject *container, /* Container string */
716 PyObject *element /* Element string */
717 );
718
Guido van Rossumd8225182000-03-10 22:33:05 +0000719/* === Characters Type APIs =============================================== */
720
721/* These should not be used directly. Use the Py_UNICODE_IS* and
722 Py_UNICODE_TO* macros instead.
723
724 These APIs are implemented in Objects/unicodectype.c.
725
726*/
727
728extern DL_IMPORT(int) _PyUnicode_IsLowercase(
729 register const Py_UNICODE ch /* Unicode character */
730 );
731
732extern DL_IMPORT(int) _PyUnicode_IsUppercase(
733 register const Py_UNICODE ch /* Unicode character */
734 );
735
736extern DL_IMPORT(int) _PyUnicode_IsTitlecase(
737 register const Py_UNICODE ch /* Unicode character */
738 );
739
740extern DL_IMPORT(int) _PyUnicode_IsWhitespace(
741 register const Py_UNICODE ch /* Unicode character */
742 );
743
744extern DL_IMPORT(int) _PyUnicode_IsLinebreak(
745 register const Py_UNICODE ch /* Unicode character */
746 );
747
748extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToLowercase(
749 register const Py_UNICODE ch /* Unicode character */
750 );
751
752extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToUppercase(
753 register const Py_UNICODE ch /* Unicode character */
754 );
755
756extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToTitlecase(
757 register const Py_UNICODE ch /* Unicode character */
758 );
759
760extern DL_IMPORT(int) _PyUnicode_ToDecimalDigit(
761 register const Py_UNICODE ch /* Unicode character */
762 );
763
764extern DL_IMPORT(int) _PyUnicode_ToDigit(
765 register const Py_UNICODE ch /* Unicode character */
766 );
767
768extern DL_IMPORT(double) _PyUnicode_ToNumeric(
769 register const Py_UNICODE ch /* Unicode character */
770 );
771
772extern DL_IMPORT(int) _PyUnicode_IsDecimalDigit(
773 register const Py_UNICODE ch /* Unicode character */
774 );
775
776extern DL_IMPORT(int) _PyUnicode_IsDigit(
777 register const Py_UNICODE ch /* Unicode character */
778 );
779
780extern DL_IMPORT(int) _PyUnicode_IsNumeric(
781 register const Py_UNICODE ch /* Unicode character */
782 );
783
784#ifdef __cplusplus
785}
786#endif
787#endif /* !Py_UNICODEOBJECT_H */