blob: 358c18af1ca548f34bc7e622fe32e246b0ad693f [file] [log] [blame]
Guido van Rossumd8225182000-03-10 22:33:05 +00001#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
3#ifdef __cplusplus
4extern "C" {
5#endif
6
7/*
8
9Unicode implementation based on original code by Fredrik Lundh,
10modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
11Unicode Integration Proposal (see file Misc/unicode.txt).
12
13(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
14
15
16 Original header:
17 --------------------------------------------------------------------
18
19 * Yet another Unicode string type for Python. This type supports the
20 * 16-bit Basic Multilingual Plane (BMP) only.
21 *
22 * Written by Fredrik Lundh, January 1999.
23 *
24 * Copyright (c) 1999 by Secret Labs AB.
25 * Copyright (c) 1999 by Fredrik Lundh.
26 *
27 * fredrik@pythonware.com
28 * http://www.pythonware.com
29 *
30 * --------------------------------------------------------------------
31 * This Unicode String Type is
32 *
33 * Copyright (c) 1999 by Secret Labs AB
34 * Copyright (c) 1999 by Fredrik Lundh
35 *
36 * By obtaining, using, and/or copying this software and/or its
37 * associated documentation, you agree that you have read, understood,
38 * and will comply with the following terms and conditions:
39 *
40 * Permission to use, copy, modify, and distribute this software and its
41 * associated documentation for any purpose and without fee is hereby
42 * granted, provided that the above copyright notice appears in all
43 * copies, and that both that copyright notice and this permission notice
44 * appear in supporting documentation, and that the name of Secret Labs
45 * AB or the author not be used in advertising or publicity pertaining to
46 * distribution of the software without specific, written prior
47 * permission.
48 *
49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56 * -------------------------------------------------------------------- */
57
58#include "ctype.h"
59
60/* === Internal API ======================================================= */
61
62/* --- Internal Unicode Format -------------------------------------------- */
63
64/* Set these flags if the platform has "wchar.h", "wctype.h" and the
65 wchar_t type is a 16-bit unsigned type */
66/* #define HAVE_WCHAR_H */
67/* #define HAVE_USABLE_WCHAR_T */
68
69/* Defaults for various platforms */
70#ifndef HAVE_USABLE_WCHAR_T
71
72/* Windows has a usable wchar_t type */
73# if defined(MS_WIN32)
74# define HAVE_USABLE_WCHAR_T
75# endif
76
77#endif
78
79/* If the compiler provides a wchar_t type we try to support it
80 through the interface functions PyUnicode_FromWideChar() and
81 PyUnicode_AsWideChar(). */
82
83#ifdef HAVE_USABLE_WCHAR_T
84# define HAVE_WCHAR_H
85#endif
86
87#ifdef HAVE_WCHAR_H
88# include "wchar.h"
89#endif
90
91#ifdef HAVE_USABLE_WCHAR_T
92
93/* If the compiler defines whcar_t as a 16-bit unsigned type we can
94 use the compiler type directly. Works fine with all modern Windows
95 platforms. */
96
97typedef wchar_t Py_UNICODE;
98
99#else
100
101/* Use if you have a standard ANSI compiler, without wchar_t support.
102 If a short is not 16 bits on your platform, you have to fix the
103 typedef below, or the module initialization code will complain. */
104
105typedef unsigned short Py_UNICODE;
106
107#endif
108
109/* --- Internal Unicode Operations ---------------------------------------- */
110
111/* If you want Python to use the compiler's wctype.h functions instead
112 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS.
113 This reduces the interpreter's code size. */
114
115#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
116
117#include "wctype.h"
118
119#define Py_UNICODE_ISSPACE(ch) iswspace(ch)
120
121#define Py_UNICODE_ISLOWER(ch) iswlower(ch)
122#define Py_UNICODE_ISUPPER(ch) iswupper(ch)
123#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
124#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
125
126#define Py_UNICODE_TOLOWER(ch) towlower(ch)
127#define Py_UNICODE_TOUPPER(ch) towupper(ch)
128#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
129
130#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
131#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
132#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
133
134#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
135#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
136#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
137
138#else
139
140#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
141
142#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
143#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
144#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
145#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
146
147#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
148#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
149#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
150
151#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
152#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
153#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
154
155#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
156#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
157#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
158
159#endif
160
161#define Py_UNICODE_COPY(target, source, length)\
162 (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
163
164#define Py_UNICODE_FILL(target, value, length) do\
165 {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
166 while (0)
167
168#define Py_UNICODE_MATCH(string, offset, substring)\
169 (!memcmp((string)->str + (offset), (substring)->str,\
170 (substring)->length*sizeof(Py_UNICODE)))
171
172/* --- Unicode Type ------------------------------------------------------- */
173
174typedef struct {
175 PyObject_HEAD
176 int length; /* Length of raw Unicode data in buffer */
177 Py_UNICODE *str; /* Raw Unicode buffer */
178 long hash; /* Hash value; -1 if not set */
179 PyObject *utf8str; /* UTF-8 encoded version as Python string,
180 or NULL */
181} PyUnicodeObject;
182
183extern DL_IMPORT(PyTypeObject) PyUnicode_Type;
184
185#define PyUnicode_Check(op) (((op)->ob_type == &PyUnicode_Type))
186
187/* Fast access macros */
188#define PyUnicode_GET_SIZE(op) \
189 (((PyUnicodeObject *)(op))->length)
190#define PyUnicode_GET_DATA_SIZE(op) \
191 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
192#define PyUnicode_AS_UNICODE(op) \
193 (((PyUnicodeObject *)(op))->str)
194#define PyUnicode_AS_DATA(op) \
195 ((const char *)((PyUnicodeObject *)(op))->str)
196
197/* --- Constants ---------------------------------------------------------- */
198
199/* This Unicode character will be used as replacement character during
200 decoding if the errors argument is set to "replace". Note: the
201 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
202 Unicode 3.0. */
203
204#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
205
206/* === Public API ========================================================= */
207
208/* --- Plain Py_UNICODE --------------------------------------------------- */
209
210/* Create a Unicode Object from the Py_UNICODE buffer u of the given
211 size. u may be NULL which causes the contents to be undefined. It
212 is the user's responsibility to fill in the needed data.
213
214 The buffer is copied into the new object. */
215
216extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
217 const Py_UNICODE *u, /* Unicode buffer */
218 int size /* size of buffer */
219 );
220
221/* Return a read-only pointer to the Unicode object's internal
222 Py_UNICODE buffer. */
223
224extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
225 PyObject *unicode /* Unicode object */
226 );
227
228/* Get the length of the Unicode object. */
229
230extern DL_IMPORT(int) PyUnicode_GetSize(
231 PyObject *unicode /* Unicode object */
232 );
233
234/* Coerce obj to an Unicode object and return a reference with
235 *incremented* refcount.
236
237 Coercion is done in the following way:
238
239 1. Unicode objects are passed back as-is with incremented
240 refcount.
241
242 2. String and other char buffer compatible objects are decoded
243 under the assumptions that they contain UTF-8 data. Decoding
244 is done in "strict" mode.
245
246 3. All other objects raise an exception.
247
248 The API returns NULL in case of an error. The caller is responsible
249 for decref'ing the returned objects.
250
251*/
252
253extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
254 register PyObject *obj /* Object */
255 );
256
257/* --- wchar_t support for platforms which support it --------------------- */
258
259#ifdef HAVE_WCHAR_H
260
261/* Create a Unicode Object from the whcar_t buffer w of the given
262 size.
263
264 The buffer is copied into the new object. */
265
266extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
267 register const wchar_t *w, /* wchar_t buffer */
268 int size /* size of buffer */
269 );
270
271/* Copies the Unicode Object contents into the whcar_t buffer w. At
272 most size wchar_t characters are copied.
273
274 Returns the number of wchar_t characters copied or -1 in case of an
275 error. */
276
277extern DL_IMPORT(int) PyUnicode_AsWideChar(
278 PyUnicodeObject *unicode, /* Unicode object */
279 register wchar_t *w, /* wchar_t buffer */
280 int size /* size of buffer */
281 );
282
283#endif
284
285/* === Builtin Codecs =====================================================
286
287 Many of these APIs take two arguments encoding and errors. These
288 parameters encoding and errors have the same semantics as the ones
289 of the builtin unicode() API.
290
291 Setting encoding to NULL causes the default encoding to be used
292 which is UTF-8.
293
294 Error handling is set by errors which may also be set to NULL
295 meaning to use the default handling defined for the codec. Default
296 error handling for all builtin codecs is "strict" (ValueErrors are
297 raised).
298
299 The codecs all use a similar interface. Only deviation from the
300 generic ones are documented.
301
302*/
303
304/* --- Generic Codecs ----------------------------------------------------- */
305
306/* Create a Unicode object by decoding the encoded string s of the
307 given size. */
308
309extern DL_IMPORT(PyObject*) PyUnicode_Decode(
310 const char *s, /* encoded string */
311 int size, /* size of buffer */
312 const char *encoding, /* encoding */
313 const char *errors /* error handling */
314 );
315
316/* Encodes a Py_UNICODE buffer of the given size and returns a
317 Python string object. */
318
319extern DL_IMPORT(PyObject*) PyUnicode_Encode(
320 const Py_UNICODE *s, /* Unicode char buffer */
321 int size, /* number of Py_UNICODE chars to encode */
322 const char *encoding, /* encoding */
323 const char *errors /* error handling */
324 );
325
326/* Encodes a Unicode object and returns the result as Python string
327 object. */
328
329extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
330 PyObject *unicode, /* Unicode object */
331 const char *encoding, /* encoding */
332 const char *errors /* error handling */
333 );
334
335/* --- UTF-8 Codecs ------------------------------------------------------- */
336
337extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
338 const char *string, /* UTF-8 encoded string */
339 int length, /* size of string */
340 const char *errors /* error handling */
341 );
342
343extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
344 PyObject *unicode /* Unicode object */
345 );
346
347extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
348 const Py_UNICODE *data, /* Unicode char buffer */
349 int length, /* number of Py_UNICODE chars to encode */
350 const char *errors /* error handling */
351 );
352
353/* --- UTF-16 Codecs ------------------------------------------------------ */
354
355/* Decodes length bytes from a UTF-16 encoded buffer string and return
356 the corresponding Unicode object.
357
358 errors (if non-NULL) defines the error handling. It defaults
359 to "strict".
360
361 If byteorder is non-NULL, the decoder starts decoding using the
362 given byte order:
363
364 *byteorder == -1: little endian
365 *byteorder == 0: native order
366 *byteorder == 1: big endian
367
368 and then switches according to all BOM marks it finds in the input
369 data. BOM marks are not copied into the resulting Unicode string.
370 After completion, *byteorder is set to the current byte order at
371 the end of input data.
372
373 If byteorder is NULL, the codec starts in native order mode.
374
375*/
376
377extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
378 const char *string, /* UTF-16 encoded string */
379 int length, /* size of string */
380 const char *errors, /* error handling */
381 int *byteorder /* pointer to byteorder to use
382 0=native;-1=LE,1=BE; updated on
383 exit */
384 );
385
386/* Returns a Python string using the UTF-16 encoding in native byte
387 order. The string always starts with a BOM mark. */
388
389extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
390 PyObject *unicode /* Unicode object */
391 );
392
393/* Returns a Python string object holding the UTF-16 encoded value of
394 the Unicode data in s.
395
396 If byteorder is not 0, output is written according to the following
397 byte order:
398
399 byteorder == -1: little endian
400 byteorder == 0: native byte order (writes a BOM mark)
401 byteorder == 1: big endian
402
403 If byteorder is 0, the output string will always start with the
404 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
405 prepended.
406
407 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
408 UCS-2. This trick makes it possible to add full UTF-16 capabilities
409 at a later point without comprimising the APIs.
410
411*/
412
413extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
414 const Py_UNICODE *data, /* Unicode char buffer */
415 int length, /* number of Py_UNICODE chars to encode */
416 const char *errors, /* error handling */
417 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
418 );
419
420/* --- Unicode-Escape Codecs ---------------------------------------------- */
421
422extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
423 const char *string, /* Unicode-Escape encoded string */
424 int length, /* size of string */
425 const char *errors /* error handling */
426 );
427
428extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
429 PyObject *unicode /* Unicode object */
430 );
431
432extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
433 const Py_UNICODE *data, /* Unicode char buffer */
434 int length /* Number of Py_UNICODE chars to encode */
435 );
436
437/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
438
439extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
440 const char *string, /* Raw-Unicode-Escape encoded string */
441 int length, /* size of string */
442 const char *errors /* error handling */
443 );
444
445extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
446 PyObject *unicode /* Unicode object */
447 );
448
449extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
450 const Py_UNICODE *data, /* Unicode char buffer */
451 int length /* Number of Py_UNICODE chars to encode */
452 );
453
454/* --- Latin-1 Codecs -----------------------------------------------------
455
456 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
457
458*/
459
460extern DL_IMPORT(PyObject*) PyUnicode_DecodeLatin1(
461 const char *string, /* Latin-1 encoded string */
462 int length, /* size of string */
463 const char *errors /* error handling */
464 );
465
466extern DL_IMPORT(PyObject*) PyUnicode_AsLatin1String(
467 PyObject *unicode /* Unicode object */
468 );
469
470extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
471 const Py_UNICODE *data, /* Unicode char buffer */
472 int length, /* Number of Py_UNICODE chars to encode */
473 const char *errors /* error handling */
474 );
475
476/* --- ASCII Codecs -------------------------------------------------------
477
478 Only 7-bit ASCII data is excepted. All other codes generate errors.
479
480*/
481
482extern DL_IMPORT(PyObject*) PyUnicode_DecodeASCII(
483 const char *string, /* ASCII encoded string */
484 int length, /* size of string */
485 const char *errors /* error handling */
486 );
487
488extern DL_IMPORT(PyObject*) PyUnicode_AsASCIIString(
489 PyObject *unicode /* Unicode object */
490 );
491
492extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
493 const Py_UNICODE *data, /* Unicode char buffer */
494 int length, /* Number of Py_UNICODE chars to encode */
495 const char *errors /* error handling */
496 );
497
498/* --- Character Map Codecs -----------------------------------------------
499
500 This codec uses mappings to encode and decode characters.
501
502 Decoding mappings must map single string characters to single
503 Unicode characters, integers (which are then interpreted as Unicode
504 ordinals) or None (meaning "undefined mapping" and causing an
505 error).
506
507 Encoding mappings must map single Unicode characters to single
508 string characters, integers (which are then interpreted as Latin-1
509 ordinals) or None (meaning "undefined mapping" and causing an
510 error).
511
512 If a character lookup fails with a LookupError, the character is
513 copied as-is meaning that its ordinal value will be interpreted as
514 Unicode or Latin-1 ordinal resp. Because of this mappings only need
515 to contain those mappings which map characters to different code
516 points.
517
518*/
519
520extern DL_IMPORT(PyObject*) PyUnicode_DecodeCharmap(
521 const char *string, /* Encoded string */
522 int length, /* size of string */
523 PyObject *mapping, /* character mapping
524 (char ordinal -> unicode ordinal) */
525 const char *errors /* error handling */
526 );
527
528extern DL_IMPORT(PyObject*) PyUnicode_AsCharmapString(
529 PyObject *unicode, /* Unicode object */
530 PyObject *mapping /* character mapping
531 (unicode ordinal -> char ordinal) */
532 );
533
534extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
535 const Py_UNICODE *data, /* Unicode char buffer */
536 int length, /* Number of Py_UNICODE chars to encode */
537 PyObject *mapping, /* character mapping
538 (unicode ordinal -> char ordinal) */
539 const char *errors /* error handling */
540 );
541
542/* Translate a Py_UNICODE buffer of the given length by applying a
543 character mapping table to it and return the resulting Unicode
544 object.
545
546 The mapping table must map Unicode ordinal integers to Unicode
547 ordinal integers or None (causing deletion of the character).
548
549 Mapping tables may be dictionaries or sequences. Unmapped character
550 ordinals (ones which cause a LookupError) are left untouched and
551 are copied as-is.
552
553*/
554
555extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
556 const Py_UNICODE *data, /* Unicode char buffer */
557 int length, /* Number of Py_UNICODE chars to encode */
558 PyObject *table, /* Translate table */
559 const char *errors /* error handling */
560 );
561
562/* --- Methods & Slots ----------------------------------------------------
563
564 These are capable of handling Unicode objects and strings on input
565 (we refer to them as strings in the descriptions) and return
566 Unicode objects or integers as apporpriate. */
567
568/* Concat two strings giving a new Unicode string. */
569
570extern DL_IMPORT(PyObject*) PyUnicode_Concat(
571 PyObject *left, /* Left string */
572 PyObject *right /* Right string */
573 );
574
575/* Split a string giving a list of Unicode strings.
576
577 If sep is NULL, splitting will be done at all whitespace
578 substrings. Otherwise, splits occur at the given separator.
579
580 At most maxsplit splits will be done. If negative, no limit is set.
581
582 Separators are not included in the resulting list.
583
584*/
585
586extern DL_IMPORT(PyObject*) PyUnicode_Split(
587 PyObject *s, /* String to split */
588 PyObject *sep, /* String separator */
589 int maxsplit /* Maxsplit count */
590 );
591
592/* Dito, but split at line breaks.
593
594 CRLF is considered to be one line break. Line breaks are not
595 included in the resulting list. */
596
597extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
598 PyObject *s, /* String to split */
599 int maxsplit /* Maxsplit count */
600 );
601
602/* Translate a string by applying a character mapping table to it and
603 return the resulting Unicode object.
604
605 The mapping table must map Unicode ordinal integers to Unicode
606 ordinal integers or None (causing deletion of the character).
607
608 Mapping tables may be dictionaries or sequences. Unmapped character
609 ordinals (ones which cause a LookupError) are left untouched and
610 are copied as-is.
611
612*/
613
614extern DL_IMPORT(PyObject *) PyUnicode_Translate(
615 PyObject *str, /* String */
616 PyObject *table, /* Translate table */
617 const char *errors /* error handling */
618 );
619
620/* Join a sequence of strings using the given separator and return
621 the resulting Unicode string. */
622
623extern DL_IMPORT(PyObject*) PyUnicode_Join(
624 PyObject *separator, /* Separator string */
625 PyObject *seq /* Sequence object */
626 );
627
628/* Return 1 if substr matches str[start:end] at the given tail end, 0
629 otherwise. */
630
631extern DL_IMPORT(int) PyUnicode_Tailmatch(
632 PyObject *str, /* String */
633 PyObject *substr, /* Prefix or Suffix string */
634 int start, /* Start index */
635 int end, /* Stop index */
636 int direction /* Tail end: -1 prefix, +1 suffix */
637 );
638
639/* Return the first position of substr in str[start:end] using the
640 given search direction or -1 if not found. */
641
642extern DL_IMPORT(int) PyUnicode_Find(
643 PyObject *str, /* String */
644 PyObject *substr, /* Substring to find */
645 int start, /* Start index */
646 int end, /* Stop index */
647 int direction /* Find direction: +1 forward, -1 backward */
648 );
649
650/* Count the number of occurances of substr in str[start:end]. */
651
652extern DL_IMPORT(int) PyUnicode_Count(
653 PyObject *str, /* String */
654 PyObject *substr, /* Substring to count */
655 int start, /* Start index */
656 int end /* Stop index */
657 );
658
659/* Replace at most maxcount occurances of substr in str with replstr
660 and return the resulting Unicode object. */
661
662extern DL_IMPORT(PyObject *) PyUnicode_Replace(
663 PyObject *str, /* String */
664 PyObject *substr, /* Substring to find */
665 PyObject *replstr, /* Substring to replace */
666 int maxcount /* Max. number of replacements to apply;
667 -1 = all */
668 );
669
670/* Compare two strings and return -1, 0, 1 for less than, equal,
671 greater than resp. */
672
673extern DL_IMPORT(int) PyUnicode_Compare(
674 PyObject *left, /* Left string */
675 PyObject *right /* Right string */
676 );
677
678/* Apply a argument tuple or dictionar to a format string and return
679 the resulting Unicode string. */
680
681extern DL_IMPORT(PyObject *) PyUnicode_Format(
682 PyObject *format, /* Format string */
683 PyObject *args /* Argument tuple or dictionary */
684 );
685
686/* === Characters Type APIs =============================================== */
687
688/* These should not be used directly. Use the Py_UNICODE_IS* and
689 Py_UNICODE_TO* macros instead.
690
691 These APIs are implemented in Objects/unicodectype.c.
692
693*/
694
695extern DL_IMPORT(int) _PyUnicode_IsLowercase(
696 register const Py_UNICODE ch /* Unicode character */
697 );
698
699extern DL_IMPORT(int) _PyUnicode_IsUppercase(
700 register const Py_UNICODE ch /* Unicode character */
701 );
702
703extern DL_IMPORT(int) _PyUnicode_IsTitlecase(
704 register const Py_UNICODE ch /* Unicode character */
705 );
706
707extern DL_IMPORT(int) _PyUnicode_IsWhitespace(
708 register const Py_UNICODE ch /* Unicode character */
709 );
710
711extern DL_IMPORT(int) _PyUnicode_IsLinebreak(
712 register const Py_UNICODE ch /* Unicode character */
713 );
714
715extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToLowercase(
716 register const Py_UNICODE ch /* Unicode character */
717 );
718
719extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToUppercase(
720 register const Py_UNICODE ch /* Unicode character */
721 );
722
723extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToTitlecase(
724 register const Py_UNICODE ch /* Unicode character */
725 );
726
727extern DL_IMPORT(int) _PyUnicode_ToDecimalDigit(
728 register const Py_UNICODE ch /* Unicode character */
729 );
730
731extern DL_IMPORT(int) _PyUnicode_ToDigit(
732 register const Py_UNICODE ch /* Unicode character */
733 );
734
735extern DL_IMPORT(double) _PyUnicode_ToNumeric(
736 register const Py_UNICODE ch /* Unicode character */
737 );
738
739extern DL_IMPORT(int) _PyUnicode_IsDecimalDigit(
740 register const Py_UNICODE ch /* Unicode character */
741 );
742
743extern DL_IMPORT(int) _PyUnicode_IsDigit(
744 register const Py_UNICODE ch /* Unicode character */
745 );
746
747extern DL_IMPORT(int) _PyUnicode_IsNumeric(
748 register const Py_UNICODE ch /* Unicode character */
749 );
750
751#ifdef __cplusplus
752}
753#endif
754#endif /* !Py_UNICODEOBJECT_H */