Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 1 | #ifndef Py_CODECREGISTRY_H |
| 2 | #define Py_CODECREGISTRY_H |
| 3 | #ifdef __cplusplus |
| 4 | extern "C" { |
| 5 | #endif |
| 6 | |
| 7 | /* ------------------------------------------------------------------------ |
| 8 | |
| 9 | Python Codec Registry and support functions |
| 10 | |
| 11 | |
| 12 | Written by Marc-Andre Lemburg (mal@lemburg.com). |
| 13 | |
Guido van Rossum | 16b1ad9 | 2000-08-03 16:24:25 +0000 | [diff] [blame] | 14 | Copyright (c) Corporation for National Research Initiatives. |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 15 | |
| 16 | ------------------------------------------------------------------------ */ |
| 17 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 18 | /* Register a new codec search function. |
| 19 | |
| 20 | As side effect, this tries to load the encodings package, if not |
| 21 | yet done, to make sure that it is always first in the list of |
| 22 | search functions. |
| 23 | |
| 24 | The search_function's refcount is incremented by this function. */ |
| 25 | |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 26 | PyAPI_FUNC(int) PyCodec_Register( |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 27 | PyObject *search_function |
| 28 | ); |
| 29 | |
Hai Shi | d332e7b | 2020-09-29 05:41:11 +0800 | [diff] [blame^] | 30 | /* Unregister a codec search function and clear the registry's cache. |
| 31 | If the search function is not registered, do nothing. |
| 32 | Return 0 on success. Raise an exception and return -1 on error. */ |
| 33 | |
| 34 | PyAPI_FUNC(int) PyCodec_Unregister( |
| 35 | PyObject *search_function |
| 36 | ); |
| 37 | |
Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 38 | /* Codec registry lookup API. |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 39 | |
Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 40 | Looks up the given encoding and returns a CodecInfo object with |
| 41 | function attributes which implement the different aspects of |
| 42 | processing the encoding. |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 43 | |
| 44 | The encoding string is looked up converted to all lower-case |
| 45 | characters. This makes encodings looked up through this mechanism |
| 46 | effectively case-insensitive. |
| 47 | |
Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 48 | If no codec is found, a KeyError is set and NULL returned. |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 49 | |
| 50 | As side effect, this tries to load the encodings package, if not |
| 51 | yet done. This is part of the lazy load strategy for the encodings |
| 52 | package. |
| 53 | |
| 54 | */ |
| 55 | |
Martin v. Löwis | 4d0d471 | 2010-12-03 20:14:31 +0000 | [diff] [blame] | 56 | #ifndef Py_LIMITED_API |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 57 | PyAPI_FUNC(PyObject *) _PyCodec_Lookup( |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 58 | const char *encoding |
| 59 | ); |
Nick Coghlan | 8fad167 | 2014-09-15 23:50:44 +1200 | [diff] [blame] | 60 | |
| 61 | PyAPI_FUNC(int) _PyCodec_Forget( |
| 62 | const char *encoding |
| 63 | ); |
Martin v. Löwis | 4d0d471 | 2010-12-03 20:14:31 +0000 | [diff] [blame] | 64 | #endif |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 65 | |
Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 66 | /* Codec registry encoding check API. |
| 67 | |
| 68 | Returns 1/0 depending on whether there is a registered codec for |
| 69 | the given encoding. |
| 70 | |
| 71 | */ |
| 72 | |
| 73 | PyAPI_FUNC(int) PyCodec_KnownEncoding( |
| 74 | const char *encoding |
| 75 | ); |
| 76 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 77 | /* Generic codec based encoding API. |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 78 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 79 | object is passed through the encoder function found for the given |
| 80 | encoding using the error handling method defined by errors. errors |
| 81 | may be NULL to use the default method defined for the codec. |
Serhiy Storchaka | 009b811 | 2015-03-18 21:53:15 +0200 | [diff] [blame] | 82 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 83 | Raises a LookupError in case no encoder can be found. |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 84 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 85 | */ |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 86 | |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 87 | PyAPI_FUNC(PyObject *) PyCodec_Encode( |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 88 | PyObject *object, |
| 89 | const char *encoding, |
| 90 | const char *errors |
| 91 | ); |
| 92 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 93 | /* Generic codec based decoding API. |
| 94 | |
| 95 | object is passed through the decoder function found for the given |
| 96 | encoding using the error handling method defined by errors. errors |
| 97 | may be NULL to use the default method defined for the codec. |
Serhiy Storchaka | 009b811 | 2015-03-18 21:53:15 +0200 | [diff] [blame] | 98 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 99 | Raises a LookupError in case no encoder can be found. |
| 100 | |
| 101 | */ |
| 102 | |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 103 | PyAPI_FUNC(PyObject *) PyCodec_Decode( |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 104 | PyObject *object, |
| 105 | const char *encoding, |
| 106 | const char *errors |
| 107 | ); |
| 108 | |
Martin v. Löwis | 1c0689c | 2014-01-03 21:36:49 +0100 | [diff] [blame] | 109 | #ifndef Py_LIMITED_API |
Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 110 | /* Text codec specific encoding and decoding API. |
| 111 | |
| 112 | Checks the encoding against a list of codecs which do not |
| 113 | implement a str<->bytes encoding before attempting the |
| 114 | operation. |
| 115 | |
| 116 | Please note that these APIs are internal and should not |
| 117 | be used in Python C extensions. |
| 118 | |
Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 119 | XXX (ncoghlan): should we make these, or something like them, public |
| 120 | in Python 3.5+? |
| 121 | |
Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 122 | */ |
Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 123 | PyAPI_FUNC(PyObject *) _PyCodec_LookupTextEncoding( |
| 124 | const char *encoding, |
| 125 | const char *alternate_command |
| 126 | ); |
Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 127 | |
| 128 | PyAPI_FUNC(PyObject *) _PyCodec_EncodeText( |
| 129 | PyObject *object, |
| 130 | const char *encoding, |
| 131 | const char *errors |
| 132 | ); |
| 133 | |
| 134 | PyAPI_FUNC(PyObject *) _PyCodec_DecodeText( |
| 135 | PyObject *object, |
| 136 | const char *encoding, |
| 137 | const char *errors |
| 138 | ); |
Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 139 | |
| 140 | /* These two aren't actually text encoding specific, but _io.TextIOWrapper |
| 141 | * is the only current API consumer. |
| 142 | */ |
| 143 | PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalDecoder( |
| 144 | PyObject *codec_info, |
| 145 | const char *errors |
| 146 | ); |
| 147 | |
| 148 | PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalEncoder( |
| 149 | PyObject *codec_info, |
| 150 | const char *errors |
| 151 | ); |
Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 152 | #endif |
| 153 | |
| 154 | |
| 155 | |
Serhiy Storchaka | 009b811 | 2015-03-18 21:53:15 +0200 | [diff] [blame] | 156 | /* --- Codec Lookup APIs -------------------------------------------------- |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 157 | |
| 158 | All APIs return a codec object with incremented refcount and are |
| 159 | based on _PyCodec_Lookup(). The same comments w/r to the encoding |
| 160 | name also apply to these APIs. |
| 161 | |
| 162 | */ |
| 163 | |
| 164 | /* Get an encoder function for the given encoding. */ |
| 165 | |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 166 | PyAPI_FUNC(PyObject *) PyCodec_Encoder( |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 167 | const char *encoding |
| 168 | ); |
| 169 | |
| 170 | /* Get a decoder function for the given encoding. */ |
| 171 | |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 172 | PyAPI_FUNC(PyObject *) PyCodec_Decoder( |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 173 | const char *encoding |
| 174 | ); |
| 175 | |
Martin Panter | 7462b649 | 2015-11-02 03:37:02 +0000 | [diff] [blame] | 176 | /* Get an IncrementalEncoder object for the given encoding. */ |
Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 177 | |
| 178 | PyAPI_FUNC(PyObject *) PyCodec_IncrementalEncoder( |
| 179 | const char *encoding, |
| 180 | const char *errors |
| 181 | ); |
| 182 | |
Martin Panter | 7462b649 | 2015-11-02 03:37:02 +0000 | [diff] [blame] | 183 | /* Get an IncrementalDecoder object function for the given encoding. */ |
Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 184 | |
| 185 | PyAPI_FUNC(PyObject *) PyCodec_IncrementalDecoder( |
| 186 | const char *encoding, |
| 187 | const char *errors |
| 188 | ); |
| 189 | |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 190 | /* Get a StreamReader factory function for the given encoding. */ |
| 191 | |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 192 | PyAPI_FUNC(PyObject *) PyCodec_StreamReader( |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 193 | const char *encoding, |
| 194 | PyObject *stream, |
| 195 | const char *errors |
| 196 | ); |
| 197 | |
| 198 | /* Get a StreamWriter factory function for the given encoding. */ |
| 199 | |
Mark Hammond | 91a681d | 2002-08-12 07:21:58 +0000 | [diff] [blame] | 200 | PyAPI_FUNC(PyObject *) PyCodec_StreamWriter( |
Fred Drake | 3ac3edc | 2000-05-09 19:51:10 +0000 | [diff] [blame] | 201 | const char *encoding, |
| 202 | PyObject *stream, |
| 203 | const char *errors |
| 204 | ); |
| 205 | |
Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 206 | /* Unicode encoding error handling callback registry API */ |
| 207 | |
Georg Brandl | bab3378 | 2010-11-20 13:44:41 +0000 | [diff] [blame] | 208 | /* Register the error handling callback function error under the given |
Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 209 | name. This function will be called by the codec when it encounters |
| 210 | unencodable characters/undecodable bytes and doesn't know the |
| 211 | callback name, when name is specified as the error parameter |
| 212 | in the call to the encode/decode function. |
| 213 | Return 0 on success, -1 on error */ |
| 214 | PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error); |
| 215 | |
Georg Brandl | bab3378 | 2010-11-20 13:44:41 +0000 | [diff] [blame] | 216 | /* Lookup the error handling callback function registered under the given |
| 217 | name. As a special case NULL can be passed, in which case |
Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 218 | the error handling callback for "strict" will be returned. */ |
| 219 | PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name); |
| 220 | |
| 221 | /* raise exc as an exception */ |
| 222 | PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc); |
| 223 | |
| 224 | /* ignore the unicode error, skipping the faulty input */ |
| 225 | PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc); |
| 226 | |
Georg Brandl | bab3378 | 2010-11-20 13:44:41 +0000 | [diff] [blame] | 227 | /* replace the unicode encode error with ? or U+FFFD */ |
Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 228 | PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc); |
| 229 | |
| 230 | /* replace the unicode encode error with XML character references */ |
| 231 | PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc); |
| 232 | |
| 233 | /* replace the unicode encode error with backslash escapes (\x, \u and \U) */ |
| 234 | PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc); |
| 235 | |
Serhiy Storchaka | 34d0ac8 | 2016-12-27 14:57:39 +0200 | [diff] [blame] | 236 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 |
Serhiy Storchaka | 166ebc4 | 2014-11-25 13:57:17 +0200 | [diff] [blame] | 237 | /* replace the unicode encode error with backslash escapes (\N, \x, \u and \U) */ |
| 238 | PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc); |
Serhiy Storchaka | 34d0ac8 | 2016-12-27 14:57:39 +0200 | [diff] [blame] | 239 | #endif |
Serhiy Storchaka | 166ebc4 | 2014-11-25 13:57:17 +0200 | [diff] [blame] | 240 | |
Serhiy Storchaka | 34d0ac8 | 2016-12-27 14:57:39 +0200 | [diff] [blame] | 241 | #ifndef Py_LIMITED_API |
Antoine Pitrou | e606983 | 2011-10-15 16:38:20 +0200 | [diff] [blame] | 242 | PyAPI_DATA(const char *) Py_hexdigits; |
Serhiy Storchaka | 34d0ac8 | 2016-12-27 14:57:39 +0200 | [diff] [blame] | 243 | #endif |
Victor Stinner | f5cff56 | 2011-10-14 02:13:11 +0200 | [diff] [blame] | 244 | |
Guido van Rossum | 3094484 | 2000-03-10 22:32:23 +0000 | [diff] [blame] | 245 | #ifdef __cplusplus |
| 246 | } |
| 247 | #endif |
| 248 | #endif /* !Py_CODECREGISTRY_H */ |