| #ifndef Py_UNICODEOBJECT_H | 
 | #define Py_UNICODEOBJECT_H | 
 |  | 
 | #include <stdarg.h> | 
 |  | 
 | /* | 
 |  | 
 | Unicode implementation based on original code by Fredrik Lundh, | 
 | modified by Marc-Andre Lemburg (mal@lemburg.com) according to the | 
 | Unicode Integration Proposal. (See | 
 | http://www.egenix.com/files/python/unicode-proposal.txt). | 
 |  | 
 | Copyright (c) Corporation for National Research Initiatives. | 
 |  | 
 |  | 
 |  Original header: | 
 |  -------------------------------------------------------------------- | 
 |  | 
 |  * Yet another Unicode string type for Python.  This type supports the | 
 |  * 16-bit Basic Multilingual Plane (BMP) only. | 
 |  * | 
 |  * Written by Fredrik Lundh, January 1999. | 
 |  * | 
 |  * Copyright (c) 1999 by Secret Labs AB. | 
 |  * Copyright (c) 1999 by Fredrik Lundh. | 
 |  * | 
 |  * fredrik@pythonware.com | 
 |  * http://www.pythonware.com | 
 |  * | 
 |  * -------------------------------------------------------------------- | 
 |  * This Unicode String Type is | 
 |  * | 
 |  * Copyright (c) 1999 by Secret Labs AB | 
 |  * Copyright (c) 1999 by Fredrik Lundh | 
 |  * | 
 |  * By obtaining, using, and/or copying this software and/or its | 
 |  * associated documentation, you agree that you have read, understood, | 
 |  * and will comply with the following terms and conditions: | 
 |  * | 
 |  * Permission to use, copy, modify, and distribute this software and its | 
 |  * associated documentation for any purpose and without fee is hereby | 
 |  * granted, provided that the above copyright notice appears in all | 
 |  * copies, and that both that copyright notice and this permission notice | 
 |  * appear in supporting documentation, and that the name of Secret Labs | 
 |  * AB or the author not be used in advertising or publicity pertaining to | 
 |  * distribution of the software without specific, written prior | 
 |  * permission. | 
 |  * | 
 |  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO | 
 |  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND | 
 |  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR | 
 |  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 
 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 
 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT | 
 |  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 
 |  * -------------------------------------------------------------------- */ | 
 |  | 
 | #include <ctype.h> | 
 |  | 
 | /* === Internal API ======================================================= */ | 
 |  | 
 | /* --- Internal Unicode Format -------------------------------------------- */ | 
 |  | 
 | /* Python 3.x requires unicode */ | 
 | #define Py_USING_UNICODE | 
 |  | 
 | #ifndef SIZEOF_WCHAR_T | 
 | #error Must define SIZEOF_WCHAR_T | 
 | #endif | 
 |  | 
 | #define Py_UNICODE_SIZE SIZEOF_WCHAR_T | 
 |  | 
 | /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. | 
 |    Otherwise, Unicode strings are stored as UCS-2 (with limited support | 
 |    for UTF-16) */ | 
 |  | 
 | #if Py_UNICODE_SIZE >= 4 | 
 | #define Py_UNICODE_WIDE | 
 | #endif | 
 |  | 
 | /* Set these flags if the platform has "wchar.h" and the | 
 |    wchar_t type is a 16-bit unsigned type */ | 
 | /* #define HAVE_WCHAR_H */ | 
 | /* #define HAVE_USABLE_WCHAR_T */ | 
 |  | 
 | /* If the compiler provides a wchar_t type we try to support it | 
 |    through the interface functions PyUnicode_FromWideChar(), | 
 |    PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ | 
 |  | 
 | #ifdef HAVE_USABLE_WCHAR_T | 
 | # ifndef HAVE_WCHAR_H | 
 | #  define HAVE_WCHAR_H | 
 | # endif | 
 | #endif | 
 |  | 
 | #ifdef HAVE_WCHAR_H | 
 | #  include <wchar.h> | 
 | #endif | 
 |  | 
 | /* Py_UCS4 and Py_UCS2 are typedefs for the respective | 
 |    unicode representations. */ | 
 | typedef uint32_t Py_UCS4; | 
 | typedef uint16_t Py_UCS2; | 
 | typedef uint8_t Py_UCS1; | 
 |  | 
 | #ifdef __cplusplus | 
 | extern "C" { | 
 | #endif | 
 |  | 
 |  | 
 | PyAPI_DATA(PyTypeObject) PyUnicode_Type; | 
 | PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; | 
 |  | 
 | #define PyUnicode_Check(op) \ | 
 |                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) | 
 | #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) | 
 |  | 
 | /* --- Constants ---------------------------------------------------------- */ | 
 |  | 
 | /* This Unicode character will be used as replacement character during | 
 |    decoding if the errors argument is set to "replace". Note: the | 
 |    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in | 
 |    Unicode 3.0. */ | 
 |  | 
 | #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) | 
 |  | 
 | /* === Public API ========================================================= */ | 
 |  | 
 | /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ | 
 | PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( | 
 |     const char *u,             /* UTF-8 encoded string */ | 
 |     Py_ssize_t size            /* size of buffer */ | 
 |     ); | 
 |  | 
 | /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated | 
 |    UTF-8 encoded bytes.  The size is determined with strlen(). */ | 
 | PyAPI_FUNC(PyObject*) PyUnicode_FromString( | 
 |     const char *u              /* UTF-8 encoded string */ | 
 |     ); | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | PyAPI_FUNC(PyObject*) PyUnicode_Substring( | 
 |     PyObject *str, | 
 |     Py_ssize_t start, | 
 |     Py_ssize_t end); | 
 | #endif | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | /* Copy the string into a UCS4 buffer including the null character if copy_null | 
 |    is set. Return NULL and raise an exception on error. Raise a SystemError if | 
 |    the buffer is smaller than the string. Return buffer on success. | 
 |  | 
 |    buflen is the length of the buffer in (Py_UCS4) characters. */ | 
 | PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( | 
 |     PyObject *unicode, | 
 |     Py_UCS4* buffer, | 
 |     Py_ssize_t buflen, | 
 |     int copy_null); | 
 |  | 
 | /* Copy the string into a UCS4 buffer. A new buffer is allocated using | 
 |  * PyMem_Malloc; if this fails, NULL is returned with a memory error | 
 |    exception set. */ | 
 | PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); | 
 | #endif | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | /* Get the length of the Unicode object. */ | 
 |  | 
 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( | 
 |     PyObject *unicode | 
 | ); | 
 | #endif | 
 |  | 
 | /* Get the number of Py_UNICODE units in the | 
 |    string representation. */ | 
 |  | 
 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ) Py_DEPRECATED(3.3); | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | /* Read a character from the string. */ | 
 |  | 
 | PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( | 
 |     PyObject *unicode, | 
 |     Py_ssize_t index | 
 |     ); | 
 |  | 
 | /* Write a character to the string. The string must have been created through | 
 |    PyUnicode_New, must not be shared, and must not have been hashed yet. | 
 |  | 
 |    Return 0 on success, -1 on error. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_WriteChar( | 
 |     PyObject *unicode, | 
 |     Py_ssize_t index, | 
 |     Py_UCS4 character | 
 |     ); | 
 | #endif | 
 |  | 
 | /* Resize a Unicode object. The length is the number of characters, except | 
 |    if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length | 
 |    is the number of Py_UNICODE characters. | 
 |  | 
 |    *unicode is modified to point to the new (resized) object and 0 | 
 |    returned on success. | 
 |  | 
 |    Try to resize the string in place (which is usually faster than allocating | 
 |    a new string and copy characters), or create a new string. | 
 |  | 
 |    Error handling is implemented as follows: an exception is set, -1 | 
 |    is returned and *unicode left untouched. | 
 |  | 
 |    WARNING: The function doesn't check string content, the result may not be a | 
 |             string in canonical representation. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_Resize( | 
 |     PyObject **unicode,         /* Pointer to the Unicode object */ | 
 |     Py_ssize_t length           /* New length */ | 
 |     ); | 
 |  | 
 | /* Decode obj to a Unicode object. | 
 |  | 
 |    bytes, bytearray and other bytes-like objects are decoded according to the | 
 |    given encoding and error handler. The encoding and error handler can be | 
 |    NULL to have the interface use UTF-8 and "strict". | 
 |  | 
 |    All other objects (including Unicode objects) raise an exception. | 
 |  | 
 |    The API returns NULL in case of an error. The caller is responsible | 
 |    for decref'ing the returned objects. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( | 
 |     PyObject *obj,              /* Object */ | 
 |     const char *encoding,       /* encoding */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | /* Copy an instance of a Unicode subtype to a new true Unicode object if | 
 |    necessary. If obj is already a true Unicode object (not a subtype), return | 
 |    the reference with *incremented* refcount. | 
 |  | 
 |    The API returns NULL in case of an error. The caller is responsible | 
 |    for decref'ing the returned objects. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_FromObject( | 
 |     PyObject *obj      /* Object */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( | 
 |     const char *format,   /* ASCII-encoded string  */ | 
 |     va_list vargs | 
 |     ); | 
 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( | 
 |     const char *format,   /* ASCII-encoded string  */ | 
 |     ... | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); | 
 | PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); | 
 | PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( | 
 |     const char *u              /* UTF-8 encoded string */ | 
 |     ); | 
 |  | 
 | /* Use only if you know it's a string */ | 
 | #define PyUnicode_CHECK_INTERNED(op) \ | 
 |     (((PyASCIIObject *)(op))->state.interned) | 
 |  | 
 | /* --- wchar_t support for platforms which support it --------------------- */ | 
 |  | 
 | #ifdef HAVE_WCHAR_H | 
 |  | 
 | /* Create a Unicode Object from the wchar_t buffer w of the given | 
 |    size. | 
 |  | 
 |    The buffer is copied into the new object. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( | 
 |     const wchar_t *w,           /* wchar_t buffer */ | 
 |     Py_ssize_t size             /* size of buffer */ | 
 |     ); | 
 |  | 
 | /* Copies the Unicode Object contents into the wchar_t buffer w.  At | 
 |    most size wchar_t characters are copied. | 
 |  | 
 |    Note that the resulting wchar_t string may or may not be | 
 |    0-terminated.  It is the responsibility of the caller to make sure | 
 |    that the wchar_t string is 0-terminated in case this is required by | 
 |    the application. | 
 |  | 
 |    Returns the number of wchar_t characters copied (excluding a | 
 |    possibly trailing 0-termination character) or -1 in case of an | 
 |    error. */ | 
 |  | 
 | PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     wchar_t *w,                 /* wchar_t buffer */ | 
 |     Py_ssize_t size             /* size of buffer */ | 
 |     ); | 
 |  | 
 | /* Convert the Unicode object to a wide character string. The output string | 
 |    always ends with a nul character. If size is not NULL, write the number of | 
 |    wide characters (excluding the null character) into *size. | 
 |  | 
 |    Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) | 
 |    on success. On error, returns NULL, *size is undefined and raises a | 
 |    MemoryError. */ | 
 |  | 
 | PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     Py_ssize_t *size            /* number of characters of the result */ | 
 |     ); | 
 |  | 
 | #endif | 
 |  | 
 | /* --- Unicode ordinals --------------------------------------------------- */ | 
 |  | 
 | /* Create a Unicode Object from the given Unicode code point ordinal. | 
 |  | 
 |    The ordinal must be in range(0x110000). A ValueError is | 
 |    raised in case it is not. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); | 
 |  | 
 | /* --- Free-list management ----------------------------------------------- */ | 
 |  | 
 | /* Clear the free list used by the Unicode implementation. | 
 |  | 
 |    This can be used to release memory used for objects on the free | 
 |    list back to the Python memory allocator. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); | 
 |  | 
 | /* === Builtin Codecs ===================================================== | 
 |  | 
 |    Many of these APIs take two arguments encoding and errors. These | 
 |    parameters encoding and errors have the same semantics as the ones | 
 |    of the builtin str() API. | 
 |  | 
 |    Setting encoding to NULL causes the default encoding (UTF-8) to be used. | 
 |  | 
 |    Error handling is set by errors which may also be set to NULL | 
 |    meaning to use the default handling defined for the codec. Default | 
 |    error handling for all builtin codecs is "strict" (ValueErrors are | 
 |    raised). | 
 |  | 
 |    The codecs all use a similar interface. Only deviation from the | 
 |    generic ones are documented. | 
 |  | 
 | */ | 
 |  | 
 | /* --- Manage the default encoding ---------------------------------------- */ | 
 |  | 
 | /* Returns "utf-8".  */ | 
 | PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); | 
 |  | 
 | /* --- Generic Codecs ----------------------------------------------------- */ | 
 |  | 
 | /* Create a Unicode object by decoding the encoded string s of the | 
 |    given size. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_Decode( | 
 |     const char *s,              /* encoded string */ | 
 |     Py_ssize_t size,            /* size of buffer */ | 
 |     const char *encoding,       /* encoding */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | /* Decode a Unicode object unicode and return the result as Python | 
 |    object. | 
 |  | 
 |    This API is DEPRECATED. The only supported standard encoding is rot13. | 
 |    Use PyCodec_Decode() to decode with rot13 and non-standard codecs | 
 |    that decode from str. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     const char *encoding,       /* encoding */ | 
 |     const char *errors          /* error handling */ | 
 |     ) Py_DEPRECATED(3.6); | 
 |  | 
 | /* Decode a Unicode object unicode and return the result as Unicode | 
 |    object. | 
 |  | 
 |    This API is DEPRECATED. The only supported standard encoding is rot13. | 
 |    Use PyCodec_Decode() to decode with rot13 and non-standard codecs | 
 |    that decode from str to str. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     const char *encoding,       /* encoding */ | 
 |     const char *errors          /* error handling */ | 
 |     ) Py_DEPRECATED(3.6); | 
 |  | 
 | /* Encodes a Unicode object and returns the result as Python | 
 |    object. | 
 |  | 
 |    This API is DEPRECATED.  It is superseded by PyUnicode_AsEncodedString() | 
 |    since all standard encodings (except rot13) encode str to bytes. | 
 |    Use PyCodec_Encode() for encoding with rot13 and non-standard codecs | 
 |    that encode form str to non-bytes. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     const char *encoding,       /* encoding */ | 
 |     const char *errors          /* error handling */ | 
 |     ) Py_DEPRECATED(3.6); | 
 |  | 
 | /* Encodes a Unicode object and returns the result as Python string | 
 |    object. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     const char *encoding,       /* encoding */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | /* Encodes a Unicode object and returns the result as Unicode | 
 |    object. | 
 |  | 
 |    This API is DEPRECATED.  The only supported standard encodings is rot13. | 
 |    Use PyCodec_Encode() to encode with rot13 and non-standard codecs | 
 |    that encode from str to str. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     const char *encoding,       /* encoding */ | 
 |     const char *errors          /* error handling */ | 
 |     ) Py_DEPRECATED(3.6); | 
 |  | 
 | /* Build an encoding map. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( | 
 |     PyObject* string            /* 256 character map */ | 
 |    ); | 
 |  | 
 | /* --- UTF-7 Codecs ------------------------------------------------------- */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( | 
 |     const char *string,         /* UTF-7 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( | 
 |     const char *string,         /* UTF-7 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     Py_ssize_t *consumed        /* bytes consumed */ | 
 |     ); | 
 |  | 
 | /* --- UTF-8 Codecs ------------------------------------------------------- */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( | 
 |     const char *string,         /* UTF-8 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( | 
 |     const char *string,         /* UTF-8 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     Py_ssize_t *consumed        /* bytes consumed */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | /* --- UTF-32 Codecs ------------------------------------------------------ */ | 
 |  | 
 | /* Decodes length bytes from a UTF-32 encoded buffer string and returns | 
 |    the corresponding Unicode object. | 
 |  | 
 |    errors (if non-NULL) defines the error handling. It defaults | 
 |    to "strict". | 
 |  | 
 |    If byteorder is non-NULL, the decoder starts decoding using the | 
 |    given byte order: | 
 |  | 
 |     *byteorder == -1: little endian | 
 |     *byteorder == 0:  native order | 
 |     *byteorder == 1:  big endian | 
 |  | 
 |    In native mode, the first four bytes of the stream are checked for a | 
 |    BOM mark. If found, the BOM mark is analysed, the byte order | 
 |    adjusted and the BOM skipped.  In the other modes, no BOM mark | 
 |    interpretation is done. After completion, *byteorder is set to the | 
 |    current byte order at the end of input data. | 
 |  | 
 |    If byteorder is NULL, the codec starts in native order mode. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( | 
 |     const char *string,         /* UTF-32 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     int *byteorder              /* pointer to byteorder to use | 
 |                                    0=native;-1=LE,1=BE; updated on | 
 |                                    exit */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( | 
 |     const char *string,         /* UTF-32 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     int *byteorder,             /* pointer to byteorder to use | 
 |                                    0=native;-1=LE,1=BE; updated on | 
 |                                    exit */ | 
 |     Py_ssize_t *consumed        /* bytes consumed */ | 
 |     ); | 
 |  | 
 | /* Returns a Python string using the UTF-32 encoding in native byte | 
 |    order. The string always starts with a BOM mark.  */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | /* Returns a Python string object holding the UTF-32 encoded value of | 
 |    the Unicode data. | 
 |  | 
 |    If byteorder is not 0, output is written according to the following | 
 |    byte order: | 
 |  | 
 |    byteorder == -1: little endian | 
 |    byteorder == 0:  native byte order (writes a BOM mark) | 
 |    byteorder == 1:  big endian | 
 |  | 
 |    If byteorder is 0, the output string will always start with the | 
 |    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is | 
 |    prepended. | 
 |  | 
 | */ | 
 |  | 
 | /* --- UTF-16 Codecs ------------------------------------------------------ */ | 
 |  | 
 | /* Decodes length bytes from a UTF-16 encoded buffer string and returns | 
 |    the corresponding Unicode object. | 
 |  | 
 |    errors (if non-NULL) defines the error handling. It defaults | 
 |    to "strict". | 
 |  | 
 |    If byteorder is non-NULL, the decoder starts decoding using the | 
 |    given byte order: | 
 |  | 
 |     *byteorder == -1: little endian | 
 |     *byteorder == 0:  native order | 
 |     *byteorder == 1:  big endian | 
 |  | 
 |    In native mode, the first two bytes of the stream are checked for a | 
 |    BOM mark. If found, the BOM mark is analysed, the byte order | 
 |    adjusted and the BOM skipped.  In the other modes, no BOM mark | 
 |    interpretation is done. After completion, *byteorder is set to the | 
 |    current byte order at the end of input data. | 
 |  | 
 |    If byteorder is NULL, the codec starts in native order mode. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( | 
 |     const char *string,         /* UTF-16 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     int *byteorder              /* pointer to byteorder to use | 
 |                                    0=native;-1=LE,1=BE; updated on | 
 |                                    exit */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( | 
 |     const char *string,         /* UTF-16 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     int *byteorder,             /* pointer to byteorder to use | 
 |                                    0=native;-1=LE,1=BE; updated on | 
 |                                    exit */ | 
 |     Py_ssize_t *consumed        /* bytes consumed */ | 
 |     ); | 
 |  | 
 | /* Returns a Python string using the UTF-16 encoding in native byte | 
 |    order. The string always starts with a BOM mark.  */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | /* --- Unicode-Escape Codecs ---------------------------------------------- */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( | 
 |     const char *string,         /* Unicode-Escape encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( | 
 |     const char *string,         /* Raw-Unicode-Escape encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | /* --- Latin-1 Codecs ----------------------------------------------------- | 
 |  | 
 |    Note: Latin-1 corresponds to the first 256 Unicode ordinals. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( | 
 |     const char *string,         /* Latin-1 encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | /* --- ASCII Codecs ------------------------------------------------------- | 
 |  | 
 |    Only 7-bit ASCII data is excepted. All other codes generate errors. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( | 
 |     const char *string,         /* ASCII encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | /* --- Character Map Codecs ----------------------------------------------- | 
 |  | 
 |    This codec uses mappings to encode and decode characters. | 
 |  | 
 |    Decoding mappings must map byte ordinals (integers in the range from 0 to | 
 |    255) to Unicode strings, integers (which are then interpreted as Unicode | 
 |    ordinals) or None.  Unmapped data bytes (ones which cause a LookupError) | 
 |    as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined | 
 |    mapping" and cause an error. | 
 |  | 
 |    Encoding mappings must map Unicode ordinal integers to bytes objects, | 
 |    integers in the range from 0 to 255 or None.  Unmapped character | 
 |    ordinals (ones which cause a LookupError) as well as mapped to | 
 |    None are treated as "undefined mapping" and cause an error. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( | 
 |     const char *string,         /* Encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     PyObject *mapping,          /* decoding mapping */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     PyObject *mapping           /* encoding mapping */ | 
 |     ); | 
 |  | 
 | /* --- MBCS codecs for Windows -------------------------------------------- */ | 
 |  | 
 | #ifdef MS_WINDOWS | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( | 
 |     const char *string,         /* MBCS encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( | 
 |     const char *string,         /* MBCS encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     Py_ssize_t *consumed        /* bytes consumed */ | 
 |     ); | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( | 
 |     int code_page,              /* code page number */ | 
 |     const char *string,         /* encoded string */ | 
 |     Py_ssize_t length,          /* size of string */ | 
 |     const char *errors,         /* error handling */ | 
 |     Py_ssize_t *consumed        /* bytes consumed */ | 
 |     ); | 
 | #endif | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( | 
 |     PyObject *unicode           /* Unicode object */ | 
 |     ); | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( | 
 |     int code_page,              /* code page number */ | 
 |     PyObject *unicode,          /* Unicode object */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 | #endif | 
 |  | 
 | #endif /* MS_WINDOWS */ | 
 |  | 
 | /* --- Locale encoding --------------------------------------------------- */ | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | /* Decode a string from the current locale encoding. The decoder is strict if | 
 |    *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' | 
 |    error handler (PEP 383) to escape undecodable bytes. If a byte sequence can | 
 |    be decoded as a surrogate character and *surrogateescape* is not equal to | 
 |    zero, the byte sequence is escaped using the 'surrogateescape' error handler | 
 |    instead of being decoded. *str* must end with a null character but cannot | 
 |    contain embedded null characters. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( | 
 |     const char *str, | 
 |     Py_ssize_t len, | 
 |     const char *errors); | 
 |  | 
 | /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string | 
 |    length using strlen(). */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( | 
 |     const char *str, | 
 |     const char *errors); | 
 |  | 
 | /* Encode a Unicode object to the current locale encoding. The encoder is | 
 |    strict is *surrogateescape* is equal to zero, otherwise the | 
 |    "surrogateescape" error handler is used. Return a bytes object. The string | 
 |    cannot contain embedded null characters. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( | 
 |     PyObject *unicode, | 
 |     const char *errors | 
 |     ); | 
 | #endif | 
 |  | 
 | /* --- File system encoding ---------------------------------------------- */ | 
 |  | 
 | /* ParseTuple converter: encode str objects to bytes using | 
 |    PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); | 
 |  | 
 | /* ParseTuple converter: decode bytes objects to unicode using | 
 |    PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); | 
 |  | 
 | /* Decode a null-terminated string using Py_FileSystemDefaultEncoding | 
 |    and the "surrogateescape" error handler. | 
 |  | 
 |    If Py_FileSystemDefaultEncoding is not set, fall back to the locale | 
 |    encoding. | 
 |  | 
 |    Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( | 
 |     const char *s               /* encoded string */ | 
 |     ); | 
 |  | 
 | /* Decode a string using Py_FileSystemDefaultEncoding | 
 |    and the "surrogateescape" error handler. | 
 |  | 
 |    If Py_FileSystemDefaultEncoding is not set, fall back to the locale | 
 |    encoding. | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( | 
 |     const char *s,               /* encoded string */ | 
 |     Py_ssize_t size              /* size */ | 
 |     ); | 
 |  | 
 | /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the | 
 |    "surrogateescape" error handler, and return bytes. | 
 |  | 
 |    If Py_FileSystemDefaultEncoding is not set, fall back to the locale | 
 |    encoding. | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( | 
 |     PyObject *unicode | 
 |     ); | 
 |  | 
 | /* --- Methods & Slots ---------------------------------------------------- | 
 |  | 
 |    These are capable of handling Unicode objects and strings on input | 
 |    (we refer to them as strings in the descriptions) and return | 
 |    Unicode objects or integers as appropriate. */ | 
 |  | 
 | /* Concat two strings giving a new Unicode string. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_Concat( | 
 |     PyObject *left,             /* Left string */ | 
 |     PyObject *right             /* Right string */ | 
 |     ); | 
 |  | 
 | /* Concat two strings and put the result in *pleft | 
 |    (sets *pleft to NULL on error) */ | 
 |  | 
 | PyAPI_FUNC(void) PyUnicode_Append( | 
 |     PyObject **pleft,           /* Pointer to left string */ | 
 |     PyObject *right             /* Right string */ | 
 |     ); | 
 |  | 
 | /* Concat two strings, put the result in *pleft and drop the right object | 
 |    (sets *pleft to NULL on error) */ | 
 |  | 
 | PyAPI_FUNC(void) PyUnicode_AppendAndDel( | 
 |     PyObject **pleft,           /* Pointer to left string */ | 
 |     PyObject *right             /* Right string */ | 
 |     ); | 
 |  | 
 | /* Split a string giving a list of Unicode strings. | 
 |  | 
 |    If sep is NULL, splitting will be done at all whitespace | 
 |    substrings. Otherwise, splits occur at the given separator. | 
 |  | 
 |    At most maxsplit splits will be done. If negative, no limit is set. | 
 |  | 
 |    Separators are not included in the resulting list. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_Split( | 
 |     PyObject *s,                /* String to split */ | 
 |     PyObject *sep,              /* String separator */ | 
 |     Py_ssize_t maxsplit         /* Maxsplit count */ | 
 |     ); | 
 |  | 
 | /* Dito, but split at line breaks. | 
 |  | 
 |    CRLF is considered to be one line break. Line breaks are not | 
 |    included in the resulting list. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( | 
 |     PyObject *s,                /* String to split */ | 
 |     int keepends                /* If true, line end markers are included */ | 
 |     ); | 
 |  | 
 | /* Partition a string using a given separator. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_Partition( | 
 |     PyObject *s,                /* String to partition */ | 
 |     PyObject *sep               /* String separator */ | 
 |     ); | 
 |  | 
 | /* Partition a string using a given separator, searching from the end of the | 
 |    string. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_RPartition( | 
 |     PyObject *s,                /* String to partition */ | 
 |     PyObject *sep               /* String separator */ | 
 |     ); | 
 |  | 
 | /* Split a string giving a list of Unicode strings. | 
 |  | 
 |    If sep is NULL, splitting will be done at all whitespace | 
 |    substrings. Otherwise, splits occur at the given separator. | 
 |  | 
 |    At most maxsplit splits will be done. But unlike PyUnicode_Split | 
 |    PyUnicode_RSplit splits from the end of the string. If negative, | 
 |    no limit is set. | 
 |  | 
 |    Separators are not included in the resulting list. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_RSplit( | 
 |     PyObject *s,                /* String to split */ | 
 |     PyObject *sep,              /* String separator */ | 
 |     Py_ssize_t maxsplit         /* Maxsplit count */ | 
 |     ); | 
 |  | 
 | /* Translate a string by applying a character mapping table to it and | 
 |    return the resulting Unicode object. | 
 |  | 
 |    The mapping table must map Unicode ordinal integers to Unicode strings, | 
 |    Unicode ordinal integers or None (causing deletion of the character). | 
 |  | 
 |    Mapping tables may be dictionaries or sequences. Unmapped character | 
 |    ordinals (ones which cause a LookupError) are left untouched and | 
 |    are copied as-is. | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject *) PyUnicode_Translate( | 
 |     PyObject *str,              /* String */ | 
 |     PyObject *table,            /* Translate table */ | 
 |     const char *errors          /* error handling */ | 
 |     ); | 
 |  | 
 | /* Join a sequence of strings using the given separator and return | 
 |    the resulting Unicode string. */ | 
 |  | 
 | PyAPI_FUNC(PyObject*) PyUnicode_Join( | 
 |     PyObject *separator,        /* Separator string */ | 
 |     PyObject *seq               /* Sequence object */ | 
 |     ); | 
 |  | 
 | /* Return 1 if substr matches str[start:end] at the given tail end, 0 | 
 |    otherwise. */ | 
 |  | 
 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( | 
 |     PyObject *str,              /* String */ | 
 |     PyObject *substr,           /* Prefix or Suffix string */ | 
 |     Py_ssize_t start,           /* Start index */ | 
 |     Py_ssize_t end,             /* Stop index */ | 
 |     int direction               /* Tail end: -1 prefix, +1 suffix */ | 
 |     ); | 
 |  | 
 | /* Return the first position of substr in str[start:end] using the | 
 |    given search direction or -1 if not found. -2 is returned in case | 
 |    an error occurred and an exception is set. */ | 
 |  | 
 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( | 
 |     PyObject *str,              /* String */ | 
 |     PyObject *substr,           /* Substring to find */ | 
 |     Py_ssize_t start,           /* Start index */ | 
 |     Py_ssize_t end,             /* Stop index */ | 
 |     int direction               /* Find direction: +1 forward, -1 backward */ | 
 |     ); | 
 |  | 
 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 | 
 | /* Like PyUnicode_Find, but search for single character only. */ | 
 | PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( | 
 |     PyObject *str, | 
 |     Py_UCS4 ch, | 
 |     Py_ssize_t start, | 
 |     Py_ssize_t end, | 
 |     int direction | 
 |     ); | 
 | #endif | 
 |  | 
 | /* Count the number of occurrences of substr in str[start:end]. */ | 
 |  | 
 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( | 
 |     PyObject *str,              /* String */ | 
 |     PyObject *substr,           /* Substring to count */ | 
 |     Py_ssize_t start,           /* Start index */ | 
 |     Py_ssize_t end              /* Stop index */ | 
 |     ); | 
 |  | 
 | /* Replace at most maxcount occurrences of substr in str with replstr | 
 |    and return the resulting Unicode object. */ | 
 |  | 
 | PyAPI_FUNC(PyObject *) PyUnicode_Replace( | 
 |     PyObject *str,              /* String */ | 
 |     PyObject *substr,           /* Substring to find */ | 
 |     PyObject *replstr,          /* Substring to replace */ | 
 |     Py_ssize_t maxcount         /* Max. number of replacements to apply; | 
 |                                    -1 = all */ | 
 |     ); | 
 |  | 
 | /* Compare two strings and return -1, 0, 1 for less than, equal, | 
 |    greater than resp. | 
 |    Raise an exception and return -1 on error. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_Compare( | 
 |     PyObject *left,             /* Left string */ | 
 |     PyObject *right             /* Right string */ | 
 |     ); | 
 |  | 
 | /* Compare a Unicode object with C string and return -1, 0, 1 for less than, | 
 |    equal, and greater than, respectively.  It is best to pass only | 
 |    ASCII-encoded strings, but the function interprets the input string as | 
 |    ISO-8859-1 if it contains non-ASCII characters. | 
 |    This function does not raise exceptions. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( | 
 |     PyObject *left, | 
 |     const char *right           /* ASCII-encoded string */ | 
 |     ); | 
 |  | 
 | /* Rich compare two strings and return one of the following: | 
 |  | 
 |    - NULL in case an exception was raised | 
 |    - Py_True or Py_False for successful comparisons | 
 |    - Py_NotImplemented in case the type combination is unknown | 
 |  | 
 |    Possible values for op: | 
 |  | 
 |      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE | 
 |  | 
 | */ | 
 |  | 
 | PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( | 
 |     PyObject *left,             /* Left string */ | 
 |     PyObject *right,            /* Right string */ | 
 |     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ | 
 |     ); | 
 |  | 
 | /* Apply an argument tuple or dictionary to a format string and return | 
 |    the resulting Unicode string. */ | 
 |  | 
 | PyAPI_FUNC(PyObject *) PyUnicode_Format( | 
 |     PyObject *format,           /* Format string */ | 
 |     PyObject *args              /* Argument tuple or dictionary */ | 
 |     ); | 
 |  | 
 | /* Checks whether element is contained in container and return 1/0 | 
 |    accordingly. | 
 |  | 
 |    element has to coerce to a one element Unicode string. -1 is | 
 |    returned in case of an error. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_Contains( | 
 |     PyObject *container,        /* Container string */ | 
 |     PyObject *element           /* Element string */ | 
 |     ); | 
 |  | 
 | /* Checks whether argument is a valid identifier. */ | 
 |  | 
 | PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); | 
 |  | 
 | /* === Characters Type APIs =============================================== */ | 
 |  | 
 | #if defined(Py_DEBUG) && !defined(Py_LIMITED_API) | 
 | PyAPI_FUNC(int) _PyUnicode_CheckConsistency( | 
 |     PyObject *op, | 
 |     int check_content); | 
 | #elif !defined(NDEBUG) | 
 | /* For asserts that call _PyUnicode_CheckConsistency(), which would | 
 |  * otherwise be a problem when building with asserts but without Py_DEBUG. */ | 
 | #define _PyUnicode_CheckConsistency(op, check_content) PyUnicode_Check(op) | 
 | #endif | 
 |  | 
 | #ifndef Py_LIMITED_API | 
 | #  define Py_CPYTHON_UNICODEOBJECT_H | 
 | #  include  "cpython/unicodeobject.h" | 
 | #  undef Py_CPYTHON_UNICODEOBJECT_H | 
 | #endif | 
 |  | 
 | #ifdef __cplusplus | 
 | } | 
 | #endif | 
 | #endif /* !Py_UNICODEOBJECT_H */ |