| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 1 | /* ------------------------------------------------------------------------ | 
 | 2 |  | 
 | 3 |    Python Codec Registry and support functions | 
 | 4 |  | 
 | 5 | Written by Marc-Andre Lemburg (mal@lemburg.com). | 
 | 6 |  | 
| Guido van Rossum | 16b1ad9 | 2000-08-03 16:24:25 +0000 | [diff] [blame] | 7 | Copyright (c) Corporation for National Research Initiatives. | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 8 |  | 
 | 9 |    ------------------------------------------------------------------------ */ | 
 | 10 |  | 
 | 11 | #include "Python.h" | 
 | 12 | #include <ctype.h> | 
 | 13 |  | 
| Victor Stinner | f5cff56 | 2011-10-14 02:13:11 +0200 | [diff] [blame] | 14 | const char *Py_hexdigits = "0123456789abcdef"; | 
 | 15 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 16 | /* --- Codec Registry ----------------------------------------------------- */ | 
 | 17 |  | 
 | 18 | /* Import the standard encodings package which will register the first | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 19 |    codec search function. | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 20 |  | 
 | 21 |    This is done in a lazy way so that the Unicode implementation does | 
 | 22 |    not downgrade startup time of scripts not needing it. | 
 | 23 |  | 
| Guido van Rossum | b95de4f | 2000-03-31 17:25:23 +0000 | [diff] [blame] | 24 |    ImportErrors are silently ignored by this function. Only one try is | 
 | 25 |    made. | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 26 |  | 
 | 27 | */ | 
 | 28 |  | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 29 | static int _PyCodecRegistry_Init(void); /* Forward */ | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 30 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 31 | int PyCodec_Register(PyObject *search_function) | 
 | 32 | { | 
| Nicholas Bastin | e5662ae | 2004-03-24 22:22:12 +0000 | [diff] [blame] | 33 |     PyInterpreterState *interp = PyThreadState_GET()->interp; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 34 |     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 35 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 36 |     if (search_function == NULL) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 37 |         PyErr_BadArgument(); | 
 | 38 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 39 |     } | 
 | 40 |     if (!PyCallable_Check(search_function)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 41 |         PyErr_SetString(PyExc_TypeError, "argument must be callable"); | 
 | 42 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 43 |     } | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 44 |     return PyList_Append(interp->codec_search_path, search_function); | 
| Guido van Rossum | b95de4f | 2000-03-31 17:25:23 +0000 | [diff] [blame] | 45 |  | 
 | 46 |  onError: | 
 | 47 |     return -1; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 48 | } | 
 | 49 |  | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 50 | /* Convert a string to a normalized Python string: all characters are | 
 | 51 |    converted to lower case, spaces are replaced with underscores. */ | 
 | 52 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 53 | static | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 54 | PyObject *normalizestring(const char *string) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 55 | { | 
| Antoine Pitrou | 9ed5f27 | 2013-08-13 20:18:52 +0200 | [diff] [blame] | 56 |     size_t i; | 
| Guido van Rossum | 582acec | 2000-06-28 22:07:35 +0000 | [diff] [blame] | 57 |     size_t len = strlen(string); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 58 |     char *p; | 
 | 59 |     PyObject *v; | 
| Guido van Rossum | 21431e8 | 2007-10-19 21:48:41 +0000 | [diff] [blame] | 60 |  | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 61 |     if (len > PY_SSIZE_T_MAX) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 62 |         PyErr_SetString(PyExc_OverflowError, "string is too large"); | 
 | 63 |         return NULL; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 64 |     } | 
| Guido van Rossum | 21431e8 | 2007-10-19 21:48:41 +0000 | [diff] [blame] | 65 |  | 
 | 66 |     p = PyMem_Malloc(len + 1); | 
 | 67 |     if (p == NULL) | 
| Victor Stinner | cc35159 | 2013-07-12 00:02:55 +0200 | [diff] [blame] | 68 |         return PyErr_NoMemory(); | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 69 |     for (i = 0; i < len; i++) { | 
| Antoine Pitrou | 9ed5f27 | 2013-08-13 20:18:52 +0200 | [diff] [blame] | 70 |         char ch = string[i]; | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 71 |         if (ch == ' ') | 
 | 72 |             ch = '-'; | 
 | 73 |         else | 
| Antoine Pitrou | cf9d3c0 | 2011-07-24 02:27:04 +0200 | [diff] [blame] | 74 |             ch = Py_TOLOWER(Py_CHARMASK(ch)); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 75 |         p[i] = ch; | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 76 |     } | 
| Guido van Rossum | 21431e8 | 2007-10-19 21:48:41 +0000 | [diff] [blame] | 77 |     p[i] = '\0'; | 
 | 78 |     v = PyUnicode_FromString(p); | 
 | 79 |     if (v == NULL) | 
 | 80 |         return NULL; | 
 | 81 |     PyMem_Free(p); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 82 |     return v; | 
 | 83 | } | 
 | 84 |  | 
 | 85 | /* Lookup the given encoding and return a tuple providing the codec | 
 | 86 |    facilities. | 
 | 87 |  | 
 | 88 |    The encoding string is looked up converted to all lower-case | 
 | 89 |    characters. This makes encodings looked up through this mechanism | 
 | 90 |    effectively case-insensitive. | 
 | 91 |  | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 92 |    If no codec is found, a LookupError is set and NULL returned. | 
| Guido van Rossum | b95de4f | 2000-03-31 17:25:23 +0000 | [diff] [blame] | 93 |  | 
 | 94 |    As side effect, this tries to load the encodings package, if not | 
 | 95 |    yet done. This is part of the lazy load strategy for the encodings | 
 | 96 |    package. | 
 | 97 |  | 
 | 98 | */ | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 99 |  | 
 | 100 | PyObject *_PyCodec_Lookup(const char *encoding) | 
 | 101 | { | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 102 |     PyInterpreterState *interp; | 
| Guido van Rossum | 5ba3c84 | 2000-03-24 20:52:23 +0000 | [diff] [blame] | 103 |     PyObject *result, *args = NULL, *v; | 
| Thomas Wouters | 477c8d5 | 2006-05-27 19:21:47 +0000 | [diff] [blame] | 104 |     Py_ssize_t i, len; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 105 |  | 
| Fred Drake | 766de83 | 2000-05-09 19:55:59 +0000 | [diff] [blame] | 106 |     if (encoding == NULL) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 107 |         PyErr_BadArgument(); | 
 | 108 |         goto onError; | 
| Fred Drake | 766de83 | 2000-05-09 19:55:59 +0000 | [diff] [blame] | 109 |     } | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 110 |  | 
| Nicholas Bastin | e5662ae | 2004-03-24 22:22:12 +0000 | [diff] [blame] | 111 |     interp = PyThreadState_GET()->interp; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 112 |     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 113 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 114 |  | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 115 |     /* Convert the encoding to a normalized Python string: all | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 116 |        characters are converted to lower case, spaces and hyphens are | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 117 |        replaced with underscores. */ | 
 | 118 |     v = normalizestring(encoding); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 119 |     if (v == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 120 |         goto onError; | 
| Guido van Rossum | 21431e8 | 2007-10-19 21:48:41 +0000 | [diff] [blame] | 121 |     PyUnicode_InternInPlace(&v); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 122 |  | 
 | 123 |     /* First, try to lookup the name in the registry dictionary */ | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 124 |     result = PyDict_GetItem(interp->codec_search_cache, v); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 125 |     if (result != NULL) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 126 |         Py_INCREF(result); | 
 | 127 |         Py_DECREF(v); | 
 | 128 |         return result; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 129 |     } | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 130 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 131 |     /* Next, scan the search functions in order of registration */ | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 132 |     args = PyTuple_New(1); | 
 | 133 |     if (args == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 134 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 135 |     PyTuple_SET_ITEM(args,0,v); | 
| Guido van Rossum | 5ba3c84 | 2000-03-24 20:52:23 +0000 | [diff] [blame] | 136 |  | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 137 |     len = PyList_Size(interp->codec_search_path); | 
| Guido van Rossum | 5ba3c84 | 2000-03-24 20:52:23 +0000 | [diff] [blame] | 138 |     if (len < 0) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 139 |         goto onError; | 
| Guido van Rossum | b95de4f | 2000-03-31 17:25:23 +0000 | [diff] [blame] | 140 |     if (len == 0) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 141 |         PyErr_SetString(PyExc_LookupError, | 
 | 142 |                         "no codec search functions registered: " | 
 | 143 |                         "can't find encoding"); | 
 | 144 |         goto onError; | 
| Guido van Rossum | b95de4f | 2000-03-31 17:25:23 +0000 | [diff] [blame] | 145 |     } | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 146 |  | 
 | 147 |     for (i = 0; i < len; i++) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 148 |         PyObject *func; | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 149 |  | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 150 |         func = PyList_GetItem(interp->codec_search_path, i); | 
 | 151 |         if (func == NULL) | 
 | 152 |             goto onError; | 
 | 153 |         result = PyEval_CallObject(func, args); | 
 | 154 |         if (result == NULL) | 
 | 155 |             goto onError; | 
 | 156 |         if (result == Py_None) { | 
 | 157 |             Py_DECREF(result); | 
 | 158 |             continue; | 
 | 159 |         } | 
 | 160 |         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { | 
 | 161 |             PyErr_SetString(PyExc_TypeError, | 
 | 162 |                             "codec search functions must return 4-tuples"); | 
 | 163 |             Py_DECREF(result); | 
 | 164 |             goto onError; | 
 | 165 |         } | 
 | 166 |         break; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 167 |     } | 
 | 168 |     if (i == len) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 169 |         /* XXX Perhaps we should cache misses too ? */ | 
 | 170 |         PyErr_Format(PyExc_LookupError, | 
| Martin v. Löwis | eb42b02 | 2002-09-26 16:01:24 +0000 | [diff] [blame] | 171 |                      "unknown encoding: %s", encoding); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 172 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 173 |     } | 
 | 174 |  | 
 | 175 |     /* Cache and return the result */ | 
| Neal Norwitz | 9edcc2e | 2007-08-11 04:58:26 +0000 | [diff] [blame] | 176 |     if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 177 |         Py_DECREF(result); | 
 | 178 |         goto onError; | 
| Neal Norwitz | 9edcc2e | 2007-08-11 04:58:26 +0000 | [diff] [blame] | 179 |     } | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 180 |     Py_DECREF(args); | 
 | 181 |     return result; | 
 | 182 |  | 
 | 183 |  onError: | 
 | 184 |     Py_XDECREF(args); | 
 | 185 |     return NULL; | 
 | 186 | } | 
 | 187 |  | 
| Nick Coghlan | 8fad167 | 2014-09-15 23:50:44 +1200 | [diff] [blame^] | 188 | int _PyCodec_Forget(const char *encoding) | 
 | 189 | { | 
 | 190 |     PyInterpreterState *interp; | 
 | 191 |     PyObject *v; | 
 | 192 |     int result; | 
 | 193 |  | 
 | 194 |     interp = PyThreadState_GET()->interp; | 
 | 195 |     if (interp->codec_search_path == NULL) { | 
 | 196 |         return -1; | 
 | 197 |     } | 
 | 198 |  | 
 | 199 |     /* Convert the encoding to a normalized Python string: all | 
 | 200 |        characters are converted to lower case, spaces and hyphens are | 
 | 201 |        replaced with underscores. */ | 
 | 202 |     v = normalizestring(encoding); | 
 | 203 |     if (v == NULL) { | 
 | 204 |         return -1; | 
 | 205 |     } | 
 | 206 |  | 
 | 207 |     /* Drop the named codec from the internal cache */ | 
 | 208 |     result = PyDict_DelItem(interp->codec_search_cache, v); | 
 | 209 |     Py_DECREF(v); | 
 | 210 |  | 
 | 211 |     return result; | 
 | 212 | } | 
 | 213 |  | 
| Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 214 | /* Codec registry encoding check API. */ | 
 | 215 |  | 
 | 216 | int PyCodec_KnownEncoding(const char *encoding) | 
 | 217 | { | 
 | 218 |     PyObject *codecs; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 219 |  | 
| Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 220 |     codecs = _PyCodec_Lookup(encoding); | 
 | 221 |     if (!codecs) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 222 |         PyErr_Clear(); | 
 | 223 |         return 0; | 
| Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 224 |     } | 
 | 225 |     else { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 226 |         Py_DECREF(codecs); | 
 | 227 |         return 1; | 
| Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 228 |     } | 
 | 229 | } | 
 | 230 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 231 | static | 
 | 232 | PyObject *args_tuple(PyObject *object, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 233 |                      const char *errors) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 234 | { | 
 | 235 |     PyObject *args; | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 236 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 237 |     args = PyTuple_New(1 + (errors != NULL)); | 
 | 238 |     if (args == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 239 |         return NULL; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 240 |     Py_INCREF(object); | 
 | 241 |     PyTuple_SET_ITEM(args,0,object); | 
 | 242 |     if (errors) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 243 |         PyObject *v; | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 244 |  | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 245 |         v = PyUnicode_FromString(errors); | 
 | 246 |         if (v == NULL) { | 
 | 247 |             Py_DECREF(args); | 
 | 248 |             return NULL; | 
 | 249 |         } | 
 | 250 |         PyTuple_SET_ITEM(args, 1, v); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 251 |     } | 
 | 252 |     return args; | 
 | 253 | } | 
 | 254 |  | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 255 | /* Helper function to get a codec item */ | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 256 |  | 
 | 257 | static | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 258 | PyObject *codec_getitem(const char *encoding, int index) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 259 | { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 260 |     PyObject *codecs; | 
 | 261 |     PyObject *v; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 262 |  | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 263 |     codecs = _PyCodec_Lookup(encoding); | 
 | 264 |     if (codecs == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 265 |         return NULL; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 266 |     v = PyTuple_GET_ITEM(codecs, index); | 
 | 267 |     Py_DECREF(codecs); | 
 | 268 |     Py_INCREF(v); | 
 | 269 |     return v; | 
 | 270 | } | 
 | 271 |  | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 272 | /* Helper functions to create an incremental codec. */ | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 273 | static | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 274 | PyObject *codec_makeincrementalcodec(PyObject *codec_info, | 
 | 275 |                                      const char *errors, | 
 | 276 |                                      const char *attrname) | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 277 | { | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 278 |     PyObject *ret, *inccodec; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 279 |  | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 280 |     inccodec = PyObject_GetAttrString(codec_info, attrname); | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 281 |     if (inccodec == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 282 |         return NULL; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 283 |     if (errors) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 284 |         ret = PyObject_CallFunction(inccodec, "s", errors); | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 285 |     else | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 286 |         ret = PyObject_CallFunction(inccodec, NULL); | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 287 |     Py_DECREF(inccodec); | 
 | 288 |     return ret; | 
 | 289 | } | 
 | 290 |  | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 291 | static | 
 | 292 | PyObject *codec_getincrementalcodec(const char *encoding, | 
 | 293 |                                     const char *errors, | 
 | 294 |                                     const char *attrname) | 
 | 295 | { | 
 | 296 |     PyObject *codec_info, *ret; | 
 | 297 |  | 
 | 298 |     codec_info = _PyCodec_Lookup(encoding); | 
 | 299 |     if (codec_info == NULL) | 
 | 300 |         return NULL; | 
 | 301 |     ret = codec_makeincrementalcodec(codec_info, errors, attrname); | 
 | 302 |     Py_DECREF(codec_info); | 
 | 303 |     return ret; | 
 | 304 | } | 
 | 305 |  | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 306 | /* Helper function to create a stream codec. */ | 
 | 307 |  | 
 | 308 | static | 
 | 309 | PyObject *codec_getstreamcodec(const char *encoding, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 310 |                                PyObject *stream, | 
 | 311 |                                const char *errors, | 
 | 312 |                                const int index) | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 313 | { | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 314 |     PyObject *codecs, *streamcodec, *codeccls; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 315 |  | 
 | 316 |     codecs = _PyCodec_Lookup(encoding); | 
 | 317 |     if (codecs == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 318 |         return NULL; | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 319 |  | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 320 |     codeccls = PyTuple_GET_ITEM(codecs, index); | 
 | 321 |     if (errors != NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 322 |         streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors); | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 323 |     else | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 324 |         streamcodec = PyObject_CallFunction(codeccls, "O", stream); | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 325 |     Py_DECREF(codecs); | 
 | 326 |     return streamcodec; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 327 | } | 
 | 328 |  | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 329 | /* Helpers to work with the result of _PyCodec_Lookup | 
 | 330 |  | 
 | 331 |  */ | 
 | 332 | PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info, | 
 | 333 |                                              const char *errors) | 
 | 334 | { | 
 | 335 |     return codec_makeincrementalcodec(codec_info, errors, | 
 | 336 |                                       "incrementaldecoder"); | 
 | 337 | } | 
 | 338 |  | 
 | 339 | PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info, | 
 | 340 |                                              const char *errors) | 
 | 341 | { | 
 | 342 |     return codec_makeincrementalcodec(codec_info, errors, | 
 | 343 |                                       "incrementalencoder"); | 
 | 344 | } | 
 | 345 |  | 
 | 346 |  | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 347 | /* Convenience APIs to query the Codec registry. | 
 | 348 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 349 |    All APIs return a codec object with incremented refcount. | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 350 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 351 |  */ | 
 | 352 |  | 
 | 353 | PyObject *PyCodec_Encoder(const char *encoding) | 
 | 354 | { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 355 |     return codec_getitem(encoding, 0); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 356 | } | 
 | 357 |  | 
 | 358 | PyObject *PyCodec_Decoder(const char *encoding) | 
 | 359 | { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 360 |     return codec_getitem(encoding, 1); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 361 | } | 
 | 362 |  | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 363 | PyObject *PyCodec_IncrementalEncoder(const char *encoding, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 364 |                                      const char *errors) | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 365 | { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 366 |     return codec_getincrementalcodec(encoding, errors, "incrementalencoder"); | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 367 | } | 
 | 368 |  | 
 | 369 | PyObject *PyCodec_IncrementalDecoder(const char *encoding, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 370 |                                      const char *errors) | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 371 | { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 372 |     return codec_getincrementalcodec(encoding, errors, "incrementaldecoder"); | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 373 | } | 
 | 374 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 375 | PyObject *PyCodec_StreamReader(const char *encoding, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 376 |                                PyObject *stream, | 
 | 377 |                                const char *errors) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 378 | { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 379 |     return codec_getstreamcodec(encoding, stream, errors, 2); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 380 | } | 
 | 381 |  | 
 | 382 | PyObject *PyCodec_StreamWriter(const char *encoding, | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 383 |                                PyObject *stream, | 
 | 384 |                                const char *errors) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 385 | { | 
| Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 386 |     return codec_getstreamcodec(encoding, stream, errors, 3); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 387 | } | 
 | 388 |  | 
| Nick Coghlan | 8b097b4 | 2013-11-13 23:49:21 +1000 | [diff] [blame] | 389 | /* Helper that tries to ensure the reported exception chain indicates the | 
 | 390 |  * codec that was invoked to trigger the failure without changing the type | 
 | 391 |  * of the exception raised. | 
 | 392 |  */ | 
 | 393 | static void | 
 | 394 | wrap_codec_error(const char *operation, | 
 | 395 |                  const char *encoding) | 
 | 396 | { | 
 | 397 |     /* TrySetFromCause will replace the active exception with a suitably | 
 | 398 |      * updated clone if it can, otherwise it will leave the original | 
 | 399 |      * exception alone. | 
 | 400 |      */ | 
 | 401 |     _PyErr_TrySetFromCause("%s with '%s' codec failed", | 
 | 402 |                            operation, encoding); | 
 | 403 | } | 
 | 404 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 405 | /* Encode an object (e.g. an Unicode object) using the given encoding | 
 | 406 |    and return the resulting encoded object (usually a Python string). | 
 | 407 |  | 
 | 408 |    errors is passed to the encoder factory as argument if non-NULL. */ | 
 | 409 |  | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 410 | static PyObject * | 
 | 411 | _PyCodec_EncodeInternal(PyObject *object, | 
 | 412 |                         PyObject *encoder, | 
 | 413 |                         const char *encoding, | 
 | 414 |                         const char *errors) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 415 | { | 
| Neal Norwitz | 3715c3e | 2005-11-24 22:09:18 +0000 | [diff] [blame] | 416 |     PyObject *args = NULL, *result = NULL; | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 417 |     PyObject *v = NULL; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 418 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 419 |     args = args_tuple(object, errors); | 
 | 420 |     if (args == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 421 |         goto onError; | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 422 |  | 
 | 423 |     result = PyEval_CallObject(encoder, args); | 
| Nick Coghlan | c4c2580 | 2013-11-15 21:47:37 +1000 | [diff] [blame] | 424 |     if (result == NULL) { | 
 | 425 |         wrap_codec_error("encoding", encoding); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 426 |         goto onError; | 
| Nick Coghlan | c4c2580 | 2013-11-15 21:47:37 +1000 | [diff] [blame] | 427 |     } | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 428 |  | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 429 |     if (!PyTuple_Check(result) || | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 430 |         PyTuple_GET_SIZE(result) != 2) { | 
 | 431 |         PyErr_SetString(PyExc_TypeError, | 
 | 432 |                         "encoder must return a tuple (object, integer)"); | 
 | 433 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 434 |     } | 
| Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 435 |     v = PyTuple_GET_ITEM(result,0); | 
 | 436 |     Py_INCREF(v); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 437 |     /* We don't check or use the second (integer) entry. */ | 
 | 438 |  | 
| Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 439 |     Py_DECREF(args); | 
 | 440 |     Py_DECREF(encoder); | 
 | 441 |     Py_DECREF(result); | 
 | 442 |     return v; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 443 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 444 |  onError: | 
| Neal Norwitz | 3715c3e | 2005-11-24 22:09:18 +0000 | [diff] [blame] | 445 |     Py_XDECREF(result); | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 446 |     Py_XDECREF(args); | 
 | 447 |     Py_XDECREF(encoder); | 
| Marc-André Lemburg | b2750b5 | 2008-06-06 12:18:17 +0000 | [diff] [blame] | 448 |     return NULL; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 449 | } | 
 | 450 |  | 
 | 451 | /* Decode an object (usually a Python string) using the given encoding | 
 | 452 |    and return an equivalent object (e.g. an Unicode object). | 
 | 453 |  | 
 | 454 |    errors is passed to the decoder factory as argument if non-NULL. */ | 
 | 455 |  | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 456 | static PyObject * | 
 | 457 | _PyCodec_DecodeInternal(PyObject *object, | 
 | 458 |                         PyObject *decoder, | 
 | 459 |                         const char *encoding, | 
 | 460 |                         const char *errors) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 461 | { | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 462 |     PyObject *args = NULL, *result = NULL; | 
 | 463 |     PyObject *v; | 
 | 464 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 465 |     args = args_tuple(object, errors); | 
 | 466 |     if (args == NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 467 |         goto onError; | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 468 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 469 |     result = PyEval_CallObject(decoder,args); | 
| Nick Coghlan | c4c2580 | 2013-11-15 21:47:37 +1000 | [diff] [blame] | 470 |     if (result == NULL) { | 
 | 471 |         wrap_codec_error("decoding", encoding); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 472 |         goto onError; | 
| Nick Coghlan | c4c2580 | 2013-11-15 21:47:37 +1000 | [diff] [blame] | 473 |     } | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 474 |     if (!PyTuple_Check(result) || | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 475 |         PyTuple_GET_SIZE(result) != 2) { | 
 | 476 |         PyErr_SetString(PyExc_TypeError, | 
 | 477 |                         "decoder must return a tuple (object,integer)"); | 
 | 478 |         goto onError; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 479 |     } | 
 | 480 |     v = PyTuple_GET_ITEM(result,0); | 
 | 481 |     Py_INCREF(v); | 
 | 482 |     /* We don't check or use the second (integer) entry. */ | 
 | 483 |  | 
 | 484 |     Py_DECREF(args); | 
 | 485 |     Py_DECREF(decoder); | 
 | 486 |     Py_DECREF(result); | 
 | 487 |     return v; | 
| Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 488 |  | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 489 |  onError: | 
 | 490 |     Py_XDECREF(args); | 
 | 491 |     Py_XDECREF(decoder); | 
 | 492 |     Py_XDECREF(result); | 
 | 493 |     return NULL; | 
 | 494 | } | 
 | 495 |  | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 496 | /* Generic encoding/decoding API */ | 
 | 497 | PyObject *PyCodec_Encode(PyObject *object, | 
 | 498 |                          const char *encoding, | 
 | 499 |                          const char *errors) | 
 | 500 | { | 
 | 501 |     PyObject *encoder; | 
 | 502 |  | 
 | 503 |     encoder = PyCodec_Encoder(encoding); | 
 | 504 |     if (encoder == NULL) | 
 | 505 |         return NULL; | 
 | 506 |  | 
 | 507 |     return _PyCodec_EncodeInternal(object, encoder, encoding, errors); | 
 | 508 | } | 
 | 509 |  | 
 | 510 | PyObject *PyCodec_Decode(PyObject *object, | 
 | 511 |                          const char *encoding, | 
 | 512 |                          const char *errors) | 
 | 513 | { | 
 | 514 |     PyObject *decoder; | 
 | 515 |  | 
 | 516 |     decoder = PyCodec_Decoder(encoding); | 
 | 517 |     if (decoder == NULL) | 
 | 518 |         return NULL; | 
 | 519 |  | 
 | 520 |     return _PyCodec_DecodeInternal(object, decoder, encoding, errors); | 
 | 521 | } | 
 | 522 |  | 
 | 523 | /* Text encoding/decoding API */ | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 524 | PyObject * _PyCodec_LookupTextEncoding(const char *encoding, | 
 | 525 |                                        const char *alternate_command) | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 526 | { | 
 | 527 |     _Py_IDENTIFIER(_is_text_encoding); | 
 | 528 |     PyObject *codec; | 
 | 529 |     PyObject *attr; | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 530 |     int is_text_codec; | 
 | 531 |  | 
 | 532 |     codec = _PyCodec_Lookup(encoding); | 
 | 533 |     if (codec == NULL) | 
 | 534 |         return NULL; | 
 | 535 |  | 
 | 536 |     /* Backwards compatibility: assume any raw tuple describes a text | 
 | 537 |      * encoding, and the same for anything lacking the private | 
 | 538 |      * attribute. | 
 | 539 |      */ | 
 | 540 |     if (!PyTuple_CheckExact(codec)) { | 
 | 541 |         attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding); | 
 | 542 |         if (attr == NULL) { | 
 | 543 |             if (PyErr_ExceptionMatches(PyExc_AttributeError)) { | 
 | 544 |                 PyErr_Clear(); | 
 | 545 |             } else { | 
 | 546 |                 Py_DECREF(codec); | 
 | 547 |                 return NULL; | 
 | 548 |             } | 
 | 549 |         } else { | 
 | 550 |             is_text_codec = PyObject_IsTrue(attr); | 
 | 551 |             Py_DECREF(attr); | 
 | 552 |             if (!is_text_codec) { | 
 | 553 |                 Py_DECREF(codec); | 
 | 554 |                 PyErr_Format(PyExc_LookupError, | 
 | 555 |                              "'%.400s' is not a text encoding; " | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 556 |                              "use %s to handle arbitrary codecs", | 
 | 557 |                              encoding, alternate_command); | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 558 |                 return NULL; | 
 | 559 |             } | 
 | 560 |         } | 
 | 561 |     } | 
 | 562 |  | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 563 |     /* This appears to be a valid text encoding */ | 
 | 564 |     return codec; | 
 | 565 | } | 
 | 566 |  | 
 | 567 |  | 
 | 568 | static | 
 | 569 | PyObject *codec_getitem_checked(const char *encoding, | 
 | 570 |                                 const char *alternate_command, | 
 | 571 |                                 int index) | 
 | 572 | { | 
 | 573 |     PyObject *codec; | 
 | 574 |     PyObject *v; | 
 | 575 |  | 
 | 576 |     codec = _PyCodec_LookupTextEncoding(encoding, alternate_command); | 
 | 577 |     if (codec == NULL) | 
 | 578 |         return NULL; | 
 | 579 |  | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 580 |     v = PyTuple_GET_ITEM(codec, index); | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 581 |     Py_INCREF(v); | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 582 |     Py_DECREF(codec); | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 583 |     return v; | 
 | 584 | } | 
 | 585 |  | 
 | 586 | static PyObject * _PyCodec_TextEncoder(const char *encoding) | 
 | 587 | { | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 588 |     return codec_getitem_checked(encoding, "codecs.encode()", 0); | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 589 | } | 
 | 590 |  | 
 | 591 | static PyObject * _PyCodec_TextDecoder(const char *encoding) | 
 | 592 | { | 
| Nick Coghlan | a9b1524 | 2014-02-04 22:11:18 +1000 | [diff] [blame] | 593 |     return codec_getitem_checked(encoding, "codecs.decode()", 1); | 
| Nick Coghlan | c72e4e6 | 2013-11-22 22:39:36 +1000 | [diff] [blame] | 594 | } | 
 | 595 |  | 
 | 596 | PyObject *_PyCodec_EncodeText(PyObject *object, | 
 | 597 |                               const char *encoding, | 
 | 598 |                               const char *errors) | 
 | 599 | { | 
 | 600 |     PyObject *encoder; | 
 | 601 |  | 
 | 602 |     encoder = _PyCodec_TextEncoder(encoding); | 
 | 603 |     if (encoder == NULL) | 
 | 604 |         return NULL; | 
 | 605 |  | 
 | 606 |     return _PyCodec_EncodeInternal(object, encoder, encoding, errors); | 
 | 607 | } | 
 | 608 |  | 
 | 609 | PyObject *_PyCodec_DecodeText(PyObject *object, | 
 | 610 |                               const char *encoding, | 
 | 611 |                               const char *errors) | 
 | 612 | { | 
 | 613 |     PyObject *decoder; | 
 | 614 |  | 
 | 615 |     decoder = _PyCodec_TextDecoder(encoding); | 
 | 616 |     if (decoder == NULL) | 
 | 617 |         return NULL; | 
 | 618 |  | 
 | 619 |     return _PyCodec_DecodeInternal(object, decoder, encoding, errors); | 
 | 620 | } | 
 | 621 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 622 | /* Register the error handling callback function error under the name | 
 | 623 |    name. This function will be called by the codec when it encounters | 
 | 624 |    an unencodable characters/undecodable bytes and doesn't know the | 
 | 625 |    callback name, when name is specified as the error parameter | 
 | 626 |    in the call to the encode/decode function. | 
 | 627 |    Return 0 on success, -1 on error */ | 
 | 628 | int PyCodec_RegisterError(const char *name, PyObject *error) | 
 | 629 | { | 
| Nicholas Bastin | e5662ae | 2004-03-24 22:22:12 +0000 | [diff] [blame] | 630 |     PyInterpreterState *interp = PyThreadState_GET()->interp; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 631 |     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 632 |         return -1; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 633 |     if (!PyCallable_Check(error)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 634 |         PyErr_SetString(PyExc_TypeError, "handler must be callable"); | 
 | 635 |         return -1; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 636 |     } | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 637 |     return PyDict_SetItemString(interp->codec_error_registry, | 
| Serhiy Storchaka | c679227 | 2013-10-19 21:03:34 +0300 | [diff] [blame] | 638 |                                 name, error); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 639 | } | 
 | 640 |  | 
 | 641 | /* Lookup the error handling callback function registered under the | 
 | 642 |    name error. As a special case NULL can be passed, in which case | 
 | 643 |    the error handling callback for strict encoding will be returned. */ | 
 | 644 | PyObject *PyCodec_LookupError(const char *name) | 
 | 645 | { | 
 | 646 |     PyObject *handler = NULL; | 
 | 647 |  | 
| Nicholas Bastin | e5662ae | 2004-03-24 22:22:12 +0000 | [diff] [blame] | 648 |     PyInterpreterState *interp = PyThreadState_GET()->interp; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 649 |     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 650 |         return NULL; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 651 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 652 |     if (name==NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 653 |         name = "strict"; | 
| Serhiy Storchaka | c679227 | 2013-10-19 21:03:34 +0300 | [diff] [blame] | 654 |     handler = PyDict_GetItemString(interp->codec_error_registry, name); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 655 |     if (!handler) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 656 |         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 657 |     else | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 658 |         Py_INCREF(handler); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 659 |     return handler; | 
 | 660 | } | 
 | 661 |  | 
 | 662 | static void wrong_exception_type(PyObject *exc) | 
 | 663 | { | 
| Martin v. Löwis | bd928fe | 2011-10-14 10:20:37 +0200 | [diff] [blame] | 664 |     _Py_IDENTIFIER(__class__); | 
 | 665 |     _Py_IDENTIFIER(__name__); | 
| Martin v. Löwis | 1ee1b6f | 2011-10-10 18:11:30 +0200 | [diff] [blame] | 666 |     PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 667 |     if (type != NULL) { | 
| Martin v. Löwis | 1ee1b6f | 2011-10-10 18:11:30 +0200 | [diff] [blame] | 668 |         PyObject *name = _PyObject_GetAttrId(type, &PyId___name__); | 
| Walter Dörwald | 573c08c | 2007-05-25 15:46:59 +0000 | [diff] [blame] | 669 |         Py_DECREF(type); | 
 | 670 |         if (name != NULL) { | 
 | 671 |             PyErr_Format(PyExc_TypeError, | 
 | 672 |                          "don't know how to handle %S in error callback", name); | 
 | 673 |             Py_DECREF(name); | 
 | 674 |         } | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 675 |     } | 
 | 676 | } | 
 | 677 |  | 
 | 678 | PyObject *PyCodec_StrictErrors(PyObject *exc) | 
 | 679 | { | 
| Brett Cannon | bf36409 | 2006-03-01 04:25:17 +0000 | [diff] [blame] | 680 |     if (PyExceptionInstance_Check(exc)) | 
 | 681 |         PyErr_SetObject(PyExceptionInstance_Class(exc), exc); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 682 |     else | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 683 |         PyErr_SetString(PyExc_TypeError, "codec must pass exception instance"); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 684 |     return NULL; | 
 | 685 | } | 
 | 686 |  | 
 | 687 |  | 
 | 688 | PyObject *PyCodec_IgnoreErrors(PyObject *exc) | 
 | 689 | { | 
| Martin v. Löwis | 18e1655 | 2006-02-15 17:27:45 +0000 | [diff] [blame] | 690 |     Py_ssize_t end; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 691 |     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 692 |         if (PyUnicodeEncodeError_GetEnd(exc, &end)) | 
 | 693 |             return NULL; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 694 |     } | 
 | 695 |     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 696 |         if (PyUnicodeDecodeError_GetEnd(exc, &end)) | 
 | 697 |             return NULL; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 698 |     } | 
 | 699 |     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 700 |         if (PyUnicodeTranslateError_GetEnd(exc, &end)) | 
 | 701 |             return NULL; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 702 |     } | 
 | 703 |     else { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 704 |         wrong_exception_type(exc); | 
 | 705 |         return NULL; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 706 |     } | 
| Victor Stinner | ee45009 | 2011-12-01 02:52:11 +0100 | [diff] [blame] | 707 |     return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 708 | } | 
 | 709 |  | 
 | 710 |  | 
 | 711 | PyObject *PyCodec_ReplaceErrors(PyObject *exc) | 
 | 712 | { | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 713 |     Py_ssize_t start, end, i, len; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 714 |  | 
 | 715 |     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 716 |         PyObject *res; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 717 |         int kind; | 
 | 718 |         void *data; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 719 |         if (PyUnicodeEncodeError_GetStart(exc, &start)) | 
 | 720 |             return NULL; | 
 | 721 |         if (PyUnicodeEncodeError_GetEnd(exc, &end)) | 
 | 722 |             return NULL; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 723 |         len = end - start; | 
 | 724 |         res = PyUnicode_New(len, '?'); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 725 |         if (res == NULL) | 
 | 726 |             return NULL; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 727 |         kind = PyUnicode_KIND(res); | 
 | 728 |         data = PyUnicode_DATA(res); | 
 | 729 |         for (i = 0; i < len; ++i) | 
 | 730 |             PyUnicode_WRITE(kind, data, i, '?'); | 
| Victor Stinner | 8f82506 | 2012-04-27 13:55:39 +0200 | [diff] [blame] | 731 |         assert(_PyUnicode_CheckConsistency(res, 1)); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 732 |         return Py_BuildValue("(Nn)", res, end); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 733 |     } | 
 | 734 |     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 735 |         if (PyUnicodeDecodeError_GetEnd(exc, &end)) | 
 | 736 |             return NULL; | 
| Victor Stinner | 1a15aba | 2011-10-02 19:00:15 +0200 | [diff] [blame] | 737 |         return Py_BuildValue("(Cn)", | 
 | 738 |                              (int)Py_UNICODE_REPLACEMENT_CHARACTER, | 
 | 739 |                              end); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 740 |     } | 
 | 741 |     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 742 |         PyObject *res; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 743 |         int kind; | 
 | 744 |         void *data; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 745 |         if (PyUnicodeTranslateError_GetStart(exc, &start)) | 
 | 746 |             return NULL; | 
 | 747 |         if (PyUnicodeTranslateError_GetEnd(exc, &end)) | 
 | 748 |             return NULL; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 749 |         len = end - start; | 
 | 750 |         res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 751 |         if (res == NULL) | 
 | 752 |             return NULL; | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 753 |         kind = PyUnicode_KIND(res); | 
 | 754 |         data = PyUnicode_DATA(res); | 
 | 755 |         for (i=0; i < len; i++) | 
 | 756 |             PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER); | 
| Victor Stinner | 8f82506 | 2012-04-27 13:55:39 +0200 | [diff] [blame] | 757 |         assert(_PyUnicode_CheckConsistency(res, 1)); | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 758 |         return Py_BuildValue("(Nn)", res, end); | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 759 |     } | 
 | 760 |     else { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 761 |         wrong_exception_type(exc); | 
 | 762 |         return NULL; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 763 |     } | 
 | 764 | } | 
 | 765 |  | 
 | 766 | PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) | 
 | 767 | { | 
 | 768 |     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 769 |         PyObject *restuple; | 
 | 770 |         PyObject *object; | 
| Victor Stinner | b31f1bc | 2011-11-04 21:29:10 +0100 | [diff] [blame] | 771 |         Py_ssize_t i; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 772 |         Py_ssize_t start; | 
 | 773 |         Py_ssize_t end; | 
 | 774 |         PyObject *res; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 775 |         unsigned char *outp; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 776 |         int ressize; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 777 |         Py_UCS4 ch; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 778 |         if (PyUnicodeEncodeError_GetStart(exc, &start)) | 
 | 779 |             return NULL; | 
 | 780 |         if (PyUnicodeEncodeError_GetEnd(exc, &end)) | 
 | 781 |             return NULL; | 
 | 782 |         if (!(object = PyUnicodeEncodeError_GetObject(exc))) | 
 | 783 |             return NULL; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 784 |         for (i = start, ressize = 0; i < end; ++i) { | 
 | 785 |             /* object is guaranteed to be "ready" */ | 
 | 786 |             ch = PyUnicode_READ_CHAR(object, i); | 
 | 787 |             if (ch<10) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 788 |                 ressize += 2+1+1; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 789 |             else if (ch<100) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 790 |                 ressize += 2+2+1; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 791 |             else if (ch<1000) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 792 |                 ressize += 2+3+1; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 793 |             else if (ch<10000) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 794 |                 ressize += 2+4+1; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 795 |             else if (ch<100000) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 796 |                 ressize += 2+5+1; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 797 |             else if (ch<1000000) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 798 |                 ressize += 2+6+1; | 
 | 799 |             else | 
 | 800 |                 ressize += 2+7+1; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 801 |         } | 
 | 802 |         /* allocate replacement */ | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 803 |         res = PyUnicode_New(ressize, 127); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 804 |         if (res == NULL) { | 
 | 805 |             Py_DECREF(object); | 
 | 806 |             return NULL; | 
 | 807 |         } | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 808 |         outp = PyUnicode_1BYTE_DATA(res); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 809 |         /* generate replacement */ | 
| Victor Stinner | b31f1bc | 2011-11-04 21:29:10 +0100 | [diff] [blame] | 810 |         for (i = start; i < end; ++i) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 811 |             int digits; | 
 | 812 |             int base; | 
| Martin v. Löwis | 8ba7930 | 2011-11-04 12:26:49 +0100 | [diff] [blame] | 813 |             ch = PyUnicode_READ_CHAR(object, i); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 814 |             *outp++ = '&'; | 
 | 815 |             *outp++ = '#'; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 816 |             if (ch<10) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 817 |                 digits = 1; | 
 | 818 |                 base = 1; | 
 | 819 |             } | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 820 |             else if (ch<100) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 821 |                 digits = 2; | 
 | 822 |                 base = 10; | 
 | 823 |             } | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 824 |             else if (ch<1000) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 825 |                 digits = 3; | 
 | 826 |                 base = 100; | 
 | 827 |             } | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 828 |             else if (ch<10000) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 829 |                 digits = 4; | 
 | 830 |                 base = 1000; | 
 | 831 |             } | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 832 |             else if (ch<100000) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 833 |                 digits = 5; | 
 | 834 |                 base = 10000; | 
 | 835 |             } | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 836 |             else if (ch<1000000) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 837 |                 digits = 6; | 
 | 838 |                 base = 100000; | 
 | 839 |             } | 
 | 840 |             else { | 
 | 841 |                 digits = 7; | 
 | 842 |                 base = 1000000; | 
 | 843 |             } | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 844 |             while (digits-->0) { | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 845 |                 *outp++ = '0' + ch/base; | 
 | 846 |                 ch %= base; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 847 |                 base /= 10; | 
 | 848 |             } | 
 | 849 |             *outp++ = ';'; | 
 | 850 |         } | 
| Victor Stinner | 8f82506 | 2012-04-27 13:55:39 +0200 | [diff] [blame] | 851 |         assert(_PyUnicode_CheckConsistency(res, 1)); | 
 | 852 |         restuple = Py_BuildValue("(Nn)", res, end); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 853 |         Py_DECREF(object); | 
 | 854 |         return restuple; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 855 |     } | 
 | 856 |     else { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 857 |         wrong_exception_type(exc); | 
 | 858 |         return NULL; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 859 |     } | 
 | 860 | } | 
 | 861 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 862 | PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) | 
 | 863 | { | 
 | 864 |     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 865 |         PyObject *restuple; | 
 | 866 |         PyObject *object; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 867 |         Py_ssize_t i; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 868 |         Py_ssize_t start; | 
 | 869 |         Py_ssize_t end; | 
 | 870 |         PyObject *res; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 871 |         unsigned char *outp; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 872 |         int ressize; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 873 |         Py_UCS4 c; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 874 |         if (PyUnicodeEncodeError_GetStart(exc, &start)) | 
 | 875 |             return NULL; | 
 | 876 |         if (PyUnicodeEncodeError_GetEnd(exc, &end)) | 
 | 877 |             return NULL; | 
 | 878 |         if (!(object = PyUnicodeEncodeError_GetObject(exc))) | 
 | 879 |             return NULL; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 880 |         for (i = start, ressize = 0; i < end; ++i) { | 
 | 881 |             /* object is guaranteed to be "ready" */ | 
 | 882 |             c = PyUnicode_READ_CHAR(object, i); | 
 | 883 |             if (c >= 0x10000) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 884 |                 ressize += 1+1+8; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 885 |             } | 
 | 886 |             else if (c >= 0x100) { | 
 | 887 |                 ressize += 1+1+4; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 888 |             } | 
 | 889 |             else | 
 | 890 |                 ressize += 1+1+2; | 
 | 891 |         } | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 892 |         res = PyUnicode_New(ressize, 127); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 893 |         if (res==NULL) | 
 | 894 |             return NULL; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 895 |         for (i = start, outp = PyUnicode_1BYTE_DATA(res); | 
 | 896 |             i < end; ++i) { | 
 | 897 |             c = PyUnicode_READ_CHAR(object, i); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 898 |             *outp++ = '\\'; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 899 |             if (c >= 0x00010000) { | 
 | 900 |                 *outp++ = 'U'; | 
| Victor Stinner | f5cff56 | 2011-10-14 02:13:11 +0200 | [diff] [blame] | 901 |                 *outp++ = Py_hexdigits[(c>>28)&0xf]; | 
 | 902 |                 *outp++ = Py_hexdigits[(c>>24)&0xf]; | 
 | 903 |                 *outp++ = Py_hexdigits[(c>>20)&0xf]; | 
 | 904 |                 *outp++ = Py_hexdigits[(c>>16)&0xf]; | 
 | 905 |                 *outp++ = Py_hexdigits[(c>>12)&0xf]; | 
 | 906 |                 *outp++ = Py_hexdigits[(c>>8)&0xf]; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 907 |             } | 
| Antoine Pitrou | e4a1892 | 2010-09-09 20:30:23 +0000 | [diff] [blame] | 908 |             else if (c >= 0x100) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 909 |                 *outp++ = 'u'; | 
| Victor Stinner | f5cff56 | 2011-10-14 02:13:11 +0200 | [diff] [blame] | 910 |                 *outp++ = Py_hexdigits[(c>>12)&0xf]; | 
 | 911 |                 *outp++ = Py_hexdigits[(c>>8)&0xf]; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 912 |             } | 
 | 913 |             else | 
 | 914 |                 *outp++ = 'x'; | 
| Victor Stinner | f5cff56 | 2011-10-14 02:13:11 +0200 | [diff] [blame] | 915 |             *outp++ = Py_hexdigits[(c>>4)&0xf]; | 
 | 916 |             *outp++ = Py_hexdigits[c&0xf]; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 917 |         } | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 918 |  | 
| Victor Stinner | 8f82506 | 2012-04-27 13:55:39 +0200 | [diff] [blame] | 919 |         assert(_PyUnicode_CheckConsistency(res, 1)); | 
 | 920 |         restuple = Py_BuildValue("(Nn)", res, end); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 921 |         Py_DECREF(object); | 
 | 922 |         return restuple; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 923 |     } | 
 | 924 |     else { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 925 |         wrong_exception_type(exc); | 
 | 926 |         return NULL; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 927 |     } | 
 | 928 | } | 
 | 929 |  | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 930 | #define ENC_UTF8        0 | 
 | 931 | #define ENC_UTF16BE     1 | 
 | 932 | #define ENC_UTF16LE     2 | 
 | 933 | #define ENC_UTF32BE     3 | 
 | 934 | #define ENC_UTF32LE     4 | 
 | 935 |  | 
 | 936 | static int | 
 | 937 | get_standard_encoding(const char *encoding, int *bytelength) | 
 | 938 | { | 
 | 939 |     if (Py_TOLOWER(encoding[0]) == 'u' && | 
 | 940 |         Py_TOLOWER(encoding[1]) == 't' && | 
 | 941 |         Py_TOLOWER(encoding[2]) == 'f') { | 
 | 942 |         encoding += 3; | 
 | 943 |         if (*encoding == '-' || *encoding == '_' ) | 
 | 944 |             encoding++; | 
 | 945 |         if (encoding[0] == '1' && encoding[1] == '6') { | 
 | 946 |             encoding += 2; | 
 | 947 |             *bytelength = 2; | 
 | 948 |             if (*encoding == '\0') { | 
 | 949 | #ifdef WORDS_BIGENDIAN | 
 | 950 |                 return ENC_UTF16BE; | 
 | 951 | #else | 
 | 952 |                 return ENC_UTF16LE; | 
 | 953 | #endif | 
 | 954 |             } | 
 | 955 |             if (*encoding == '-' || *encoding == '_' ) | 
 | 956 |                 encoding++; | 
 | 957 |             if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { | 
 | 958 |                 if (Py_TOLOWER(encoding[0]) == 'b') | 
 | 959 |                     return ENC_UTF16BE; | 
 | 960 |                 if (Py_TOLOWER(encoding[0]) == 'l') | 
 | 961 |                     return ENC_UTF16LE; | 
 | 962 |             } | 
 | 963 |         } | 
 | 964 |         else if (encoding[0] == '3' && encoding[1] == '2') { | 
 | 965 |             encoding += 2; | 
 | 966 |             *bytelength = 4; | 
 | 967 |             if (*encoding == '\0') { | 
 | 968 | #ifdef WORDS_BIGENDIAN | 
 | 969 |                 return ENC_UTF32BE; | 
 | 970 | #else | 
 | 971 |                 return ENC_UTF32LE; | 
 | 972 | #endif | 
 | 973 |             } | 
 | 974 |             if (*encoding == '-' || *encoding == '_' ) | 
 | 975 |                 encoding++; | 
 | 976 |             if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { | 
 | 977 |                 if (Py_TOLOWER(encoding[0]) == 'b') | 
 | 978 |                     return ENC_UTF32BE; | 
 | 979 |                 if (Py_TOLOWER(encoding[0]) == 'l') | 
 | 980 |                     return ENC_UTF32LE; | 
 | 981 |             } | 
 | 982 |         } | 
 | 983 |     } | 
 | 984 |     /* utf-8 */ | 
 | 985 |     *bytelength = 3; | 
 | 986 |     return ENC_UTF8; | 
 | 987 | } | 
 | 988 |  | 
| Martin v. Löwis | aef3fb0 | 2009-05-02 19:27:30 +0000 | [diff] [blame] | 989 | /* This handler is declared static until someone demonstrates | 
 | 990 |    a need to call it directly. */ | 
 | 991 | static PyObject * | 
| Martin v. Löwis | e0a2b72 | 2009-05-10 08:08:56 +0000 | [diff] [blame] | 992 | PyCodec_SurrogatePassErrors(PyObject *exc) | 
| Martin v. Löwis | db12d45 | 2009-05-02 18:52:14 +0000 | [diff] [blame] | 993 | { | 
 | 994 |     PyObject *restuple; | 
 | 995 |     PyObject *object; | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 996 |     PyObject *encode; | 
 | 997 |     char *encoding; | 
 | 998 |     int code; | 
 | 999 |     int bytelength; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 1000 |     Py_ssize_t i; | 
| Martin v. Löwis | db12d45 | 2009-05-02 18:52:14 +0000 | [diff] [blame] | 1001 |     Py_ssize_t start; | 
 | 1002 |     Py_ssize_t end; | 
 | 1003 |     PyObject *res; | 
 | 1004 |     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1005 |         unsigned char *outp; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1006 |         if (PyUnicodeEncodeError_GetStart(exc, &start)) | 
 | 1007 |             return NULL; | 
 | 1008 |         if (PyUnicodeEncodeError_GetEnd(exc, &end)) | 
 | 1009 |             return NULL; | 
 | 1010 |         if (!(object = PyUnicodeEncodeError_GetObject(exc))) | 
 | 1011 |             return NULL; | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1012 |         if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) { | 
 | 1013 |             Py_DECREF(object); | 
 | 1014 |             return NULL; | 
 | 1015 |         } | 
 | 1016 |         if (!(encoding = PyUnicode_AsUTF8(encode))) { | 
 | 1017 |             Py_DECREF(object); | 
 | 1018 |             Py_DECREF(encode); | 
 | 1019 |             return NULL; | 
 | 1020 |         } | 
 | 1021 |         code = get_standard_encoding(encoding, &bytelength); | 
 | 1022 |         Py_DECREF(encode); | 
 | 1023 |  | 
 | 1024 |         res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start)); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1025 |         if (!res) { | 
 | 1026 |             Py_DECREF(object); | 
 | 1027 |             return NULL; | 
 | 1028 |         } | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1029 |         outp = (unsigned char*)PyBytes_AsString(res); | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 1030 |         for (i = start; i < end; i++) { | 
 | 1031 |             /* object is guaranteed to be "ready" */ | 
 | 1032 |             Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); | 
| Victor Stinner | 76df43d | 2012-10-30 01:42:39 +0100 | [diff] [blame] | 1033 |             if (!Py_UNICODE_IS_SURROGATE(ch)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1034 |                 /* Not a surrogate, fail with original exception */ | 
 | 1035 |                 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); | 
 | 1036 |                 Py_DECREF(res); | 
 | 1037 |                 Py_DECREF(object); | 
 | 1038 |                 return NULL; | 
 | 1039 |             } | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1040 |             switch (code) { | 
 | 1041 |             case ENC_UTF8: | 
 | 1042 |                 *outp++ = (unsigned char)(0xe0 | (ch >> 12)); | 
 | 1043 |                 *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f)); | 
 | 1044 |                 *outp++ = (unsigned char)(0x80 | (ch & 0x3f)); | 
 | 1045 |                 break; | 
 | 1046 |             case ENC_UTF16LE: | 
 | 1047 |                 *outp++ = (unsigned char) ch; | 
 | 1048 |                 *outp++ = (unsigned char)(ch >> 8); | 
 | 1049 |                 break; | 
 | 1050 |             case ENC_UTF16BE: | 
 | 1051 |                 *outp++ = (unsigned char)(ch >> 8); | 
 | 1052 |                 *outp++ = (unsigned char) ch; | 
 | 1053 |                 break; | 
 | 1054 |             case ENC_UTF32LE: | 
 | 1055 |                 *outp++ = (unsigned char) ch; | 
 | 1056 |                 *outp++ = (unsigned char)(ch >> 8); | 
 | 1057 |                 *outp++ = (unsigned char)(ch >> 16); | 
 | 1058 |                 *outp++ = (unsigned char)(ch >> 24); | 
 | 1059 |                 break; | 
 | 1060 |             case ENC_UTF32BE: | 
 | 1061 |                 *outp++ = (unsigned char)(ch >> 24); | 
 | 1062 |                 *outp++ = (unsigned char)(ch >> 16); | 
 | 1063 |                 *outp++ = (unsigned char)(ch >> 8); | 
 | 1064 |                 *outp++ = (unsigned char) ch; | 
 | 1065 |                 break; | 
 | 1066 |             } | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1067 |         } | 
 | 1068 |         restuple = Py_BuildValue("(On)", res, end); | 
 | 1069 |         Py_DECREF(res); | 
 | 1070 |         Py_DECREF(object); | 
 | 1071 |         return restuple; | 
| Martin v. Löwis | db12d45 | 2009-05-02 18:52:14 +0000 | [diff] [blame] | 1072 |     } | 
 | 1073 |     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1074 |         unsigned char *p; | 
| Victor Stinner | c06bb7a | 2011-11-04 21:36:35 +0100 | [diff] [blame] | 1075 |         Py_UCS4 ch = 0; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1076 |         if (PyUnicodeDecodeError_GetStart(exc, &start)) | 
 | 1077 |             return NULL; | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1078 |         if (PyUnicodeDecodeError_GetEnd(exc, &end)) | 
 | 1079 |             return NULL; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1080 |         if (!(object = PyUnicodeDecodeError_GetObject(exc))) | 
 | 1081 |             return NULL; | 
 | 1082 |         if (!(p = (unsigned char*)PyBytes_AsString(object))) { | 
 | 1083 |             Py_DECREF(object); | 
 | 1084 |             return NULL; | 
 | 1085 |         } | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1086 |         if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) { | 
 | 1087 |             Py_DECREF(object); | 
 | 1088 |             return NULL; | 
 | 1089 |         } | 
 | 1090 |         if (!(encoding = PyUnicode_AsUTF8(encode))) { | 
 | 1091 |             Py_DECREF(object); | 
 | 1092 |             Py_DECREF(encode); | 
 | 1093 |             return NULL; | 
 | 1094 |         } | 
 | 1095 |         code = get_standard_encoding(encoding, &bytelength); | 
 | 1096 |         Py_DECREF(encode); | 
 | 1097 |  | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1098 |         /* Try decoding a single surrogate character. If | 
 | 1099 |            there are more, let the codec call us again. */ | 
 | 1100 |         p += start; | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1101 |         if (PyBytes_GET_SIZE(object) - start >= bytelength) { | 
 | 1102 |             switch (code) { | 
 | 1103 |             case ENC_UTF8: | 
 | 1104 |                 if ((p[0] & 0xf0) == 0xe0 && | 
 | 1105 |                     (p[1] & 0xc0) == 0x80 && | 
 | 1106 |                     (p[2] & 0xc0) == 0x80) { | 
 | 1107 |                     /* it's a three-byte code */ | 
 | 1108 |                     ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f); | 
 | 1109 |                 } | 
 | 1110 |                 break; | 
 | 1111 |             case ENC_UTF16LE: | 
 | 1112 |                 ch = p[1] << 8 | p[0]; | 
 | 1113 |                 break; | 
 | 1114 |             case ENC_UTF16BE: | 
 | 1115 |                 ch = p[0] << 8 | p[1]; | 
 | 1116 |                 break; | 
 | 1117 |             case ENC_UTF32LE: | 
 | 1118 |                 ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]; | 
 | 1119 |                 break; | 
 | 1120 |             case ENC_UTF32BE: | 
 | 1121 |                 ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; | 
 | 1122 |                 break; | 
 | 1123 |             } | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1124 |         } | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1125 |  | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1126 |         Py_DECREF(object); | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1127 |         if (!Py_UNICODE_IS_SURROGATE(ch)) { | 
 | 1128 |             /* it's not a surrogate - fail */ | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1129 |             PyErr_SetObject(PyExceptionInstance_Class(exc), exc); | 
 | 1130 |             return NULL; | 
 | 1131 |         } | 
| Victor Stinner | c06bb7a | 2011-11-04 21:36:35 +0100 | [diff] [blame] | 1132 |         res = PyUnicode_FromOrdinal(ch); | 
 | 1133 |         if (res == NULL) | 
 | 1134 |             return NULL; | 
| Serhiy Storchaka | 58cf607 | 2013-11-19 11:32:41 +0200 | [diff] [blame] | 1135 |         return Py_BuildValue("(Nn)", res, start + bytelength); | 
| Martin v. Löwis | db12d45 | 2009-05-02 18:52:14 +0000 | [diff] [blame] | 1136 |     } | 
 | 1137 |     else { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1138 |         wrong_exception_type(exc); | 
 | 1139 |         return NULL; | 
| Martin v. Löwis | db12d45 | 2009-05-02 18:52:14 +0000 | [diff] [blame] | 1140 |     } | 
 | 1141 | } | 
 | 1142 |  | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1143 | static PyObject * | 
| Martin v. Löwis | 43c5778 | 2009-05-10 08:15:24 +0000 | [diff] [blame] | 1144 | PyCodec_SurrogateEscapeErrors(PyObject *exc) | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1145 | { | 
 | 1146 |     PyObject *restuple; | 
 | 1147 |     PyObject *object; | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 1148 |     Py_ssize_t i; | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1149 |     Py_ssize_t start; | 
 | 1150 |     Py_ssize_t end; | 
 | 1151 |     PyObject *res; | 
 | 1152 |     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1153 |         char *outp; | 
 | 1154 |         if (PyUnicodeEncodeError_GetStart(exc, &start)) | 
 | 1155 |             return NULL; | 
 | 1156 |         if (PyUnicodeEncodeError_GetEnd(exc, &end)) | 
 | 1157 |             return NULL; | 
 | 1158 |         if (!(object = PyUnicodeEncodeError_GetObject(exc))) | 
 | 1159 |             return NULL; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1160 |         res = PyBytes_FromStringAndSize(NULL, end-start); | 
 | 1161 |         if (!res) { | 
 | 1162 |             Py_DECREF(object); | 
 | 1163 |             return NULL; | 
 | 1164 |         } | 
 | 1165 |         outp = PyBytes_AsString(res); | 
| Martin v. Löwis | b09af03 | 2011-11-04 11:16:41 +0100 | [diff] [blame] | 1166 |         for (i = start; i < end; i++) { | 
 | 1167 |             /* object is guaranteed to be "ready" */ | 
 | 1168 |             Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1169 |             if (ch < 0xdc80 || ch > 0xdcff) { | 
 | 1170 |                 /* Not a UTF-8b surrogate, fail with original exception */ | 
 | 1171 |                 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); | 
 | 1172 |                 Py_DECREF(res); | 
 | 1173 |                 Py_DECREF(object); | 
 | 1174 |                 return NULL; | 
 | 1175 |             } | 
 | 1176 |             *outp++ = ch - 0xdc00; | 
 | 1177 |         } | 
 | 1178 |         restuple = Py_BuildValue("(On)", res, end); | 
 | 1179 |         Py_DECREF(res); | 
 | 1180 |         Py_DECREF(object); | 
 | 1181 |         return restuple; | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1182 |     } | 
 | 1183 |     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { | 
| Victor Stinner | c06bb7a | 2011-11-04 21:36:35 +0100 | [diff] [blame] | 1184 |         PyObject *str; | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1185 |         unsigned char *p; | 
| Victor Stinner | c06bb7a | 2011-11-04 21:36:35 +0100 | [diff] [blame] | 1186 |         Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1187 |         int consumed = 0; | 
 | 1188 |         if (PyUnicodeDecodeError_GetStart(exc, &start)) | 
 | 1189 |             return NULL; | 
 | 1190 |         if (PyUnicodeDecodeError_GetEnd(exc, &end)) | 
 | 1191 |             return NULL; | 
 | 1192 |         if (!(object = PyUnicodeDecodeError_GetObject(exc))) | 
 | 1193 |             return NULL; | 
 | 1194 |         if (!(p = (unsigned char*)PyBytes_AsString(object))) { | 
 | 1195 |             Py_DECREF(object); | 
 | 1196 |             return NULL; | 
 | 1197 |         } | 
 | 1198 |         while (consumed < 4 && consumed < end-start) { | 
 | 1199 |             /* Refuse to escape ASCII bytes. */ | 
 | 1200 |             if (p[start+consumed] < 128) | 
 | 1201 |                 break; | 
 | 1202 |             ch[consumed] = 0xdc00 + p[start+consumed]; | 
 | 1203 |             consumed++; | 
 | 1204 |         } | 
 | 1205 |         Py_DECREF(object); | 
 | 1206 |         if (!consumed) { | 
 | 1207 |             /* codec complained about ASCII byte. */ | 
 | 1208 |             PyErr_SetObject(PyExceptionInstance_Class(exc), exc); | 
 | 1209 |             return NULL; | 
 | 1210 |         } | 
| Victor Stinner | c06bb7a | 2011-11-04 21:36:35 +0100 | [diff] [blame] | 1211 |         str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed); | 
 | 1212 |         if (str == NULL) | 
 | 1213 |             return NULL; | 
 | 1214 |         return Py_BuildValue("(Nn)", str, start+consumed); | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1215 |     } | 
 | 1216 |     else { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1217 |         wrong_exception_type(exc); | 
 | 1218 |         return NULL; | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1219 |     } | 
 | 1220 | } | 
 | 1221 |  | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1222 |  | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 1223 | static PyObject *strict_errors(PyObject *self, PyObject *exc) | 
 | 1224 | { | 
 | 1225 |     return PyCodec_StrictErrors(exc); | 
 | 1226 | } | 
 | 1227 |  | 
 | 1228 |  | 
 | 1229 | static PyObject *ignore_errors(PyObject *self, PyObject *exc) | 
 | 1230 | { | 
 | 1231 |     return PyCodec_IgnoreErrors(exc); | 
 | 1232 | } | 
 | 1233 |  | 
 | 1234 |  | 
 | 1235 | static PyObject *replace_errors(PyObject *self, PyObject *exc) | 
 | 1236 | { | 
 | 1237 |     return PyCodec_ReplaceErrors(exc); | 
 | 1238 | } | 
 | 1239 |  | 
 | 1240 |  | 
 | 1241 | static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc) | 
 | 1242 | { | 
 | 1243 |     return PyCodec_XMLCharRefReplaceErrors(exc); | 
 | 1244 | } | 
 | 1245 |  | 
 | 1246 |  | 
 | 1247 | static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) | 
 | 1248 | { | 
 | 1249 |     return PyCodec_BackslashReplaceErrors(exc); | 
 | 1250 | } | 
 | 1251 |  | 
| Martin v. Löwis | e0a2b72 | 2009-05-10 08:08:56 +0000 | [diff] [blame] | 1252 | static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc) | 
| Martin v. Löwis | db12d45 | 2009-05-02 18:52:14 +0000 | [diff] [blame] | 1253 | { | 
| Martin v. Löwis | e0a2b72 | 2009-05-10 08:08:56 +0000 | [diff] [blame] | 1254 |     return PyCodec_SurrogatePassErrors(exc); | 
| Martin v. Löwis | db12d45 | 2009-05-02 18:52:14 +0000 | [diff] [blame] | 1255 | } | 
 | 1256 |  | 
| Martin v. Löwis | 43c5778 | 2009-05-10 08:15:24 +0000 | [diff] [blame] | 1257 | static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1258 | { | 
| Martin v. Löwis | 43c5778 | 2009-05-10 08:15:24 +0000 | [diff] [blame] | 1259 |     return PyCodec_SurrogateEscapeErrors(exc); | 
| Martin v. Löwis | 011e842 | 2009-05-05 04:43:17 +0000 | [diff] [blame] | 1260 | } | 
 | 1261 |  | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1262 | static int _PyCodecRegistry_Init(void) | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 1263 | { | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 1264 |     static struct { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1265 |         char *name; | 
 | 1266 |         PyMethodDef def; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 1267 |     } methods[] = | 
 | 1268 |     { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1269 |         { | 
 | 1270 |             "strict", | 
 | 1271 |             { | 
 | 1272 |                 "strict_errors", | 
 | 1273 |                 strict_errors, | 
 | 1274 |                 METH_O, | 
 | 1275 |                 PyDoc_STR("Implements the 'strict' error handling, which " | 
 | 1276 |                           "raises a UnicodeError on coding errors.") | 
 | 1277 |             } | 
 | 1278 |         }, | 
 | 1279 |         { | 
 | 1280 |             "ignore", | 
 | 1281 |             { | 
 | 1282 |                 "ignore_errors", | 
 | 1283 |                 ignore_errors, | 
 | 1284 |                 METH_O, | 
 | 1285 |                 PyDoc_STR("Implements the 'ignore' error handling, which " | 
 | 1286 |                           "ignores malformed data and continues.") | 
 | 1287 |             } | 
 | 1288 |         }, | 
 | 1289 |         { | 
 | 1290 |             "replace", | 
 | 1291 |             { | 
 | 1292 |                 "replace_errors", | 
 | 1293 |                 replace_errors, | 
 | 1294 |                 METH_O, | 
 | 1295 |                 PyDoc_STR("Implements the 'replace' error handling, which " | 
 | 1296 |                           "replaces malformed data with a replacement marker.") | 
 | 1297 |             } | 
 | 1298 |         }, | 
 | 1299 |         { | 
 | 1300 |             "xmlcharrefreplace", | 
 | 1301 |             { | 
 | 1302 |                 "xmlcharrefreplace_errors", | 
 | 1303 |                 xmlcharrefreplace_errors, | 
 | 1304 |                 METH_O, | 
 | 1305 |                 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, " | 
 | 1306 |                           "which replaces an unencodable character with the " | 
 | 1307 |                           "appropriate XML character reference.") | 
 | 1308 |             } | 
 | 1309 |         }, | 
 | 1310 |         { | 
 | 1311 |             "backslashreplace", | 
 | 1312 |             { | 
 | 1313 |                 "backslashreplace_errors", | 
 | 1314 |                 backslashreplace_errors, | 
 | 1315 |                 METH_O, | 
 | 1316 |                 PyDoc_STR("Implements the 'backslashreplace' error handling, " | 
 | 1317 |                           "which replaces an unencodable character with a " | 
 | 1318 |                           "backslashed escape sequence.") | 
 | 1319 |             } | 
 | 1320 |         }, | 
 | 1321 |         { | 
 | 1322 |             "surrogatepass", | 
 | 1323 |             { | 
 | 1324 |                 "surrogatepass", | 
 | 1325 |                 surrogatepass_errors, | 
 | 1326 |                 METH_O | 
 | 1327 |             } | 
 | 1328 |         }, | 
 | 1329 |         { | 
 | 1330 |             "surrogateescape", | 
 | 1331 |             { | 
 | 1332 |                 "surrogateescape", | 
 | 1333 |                 surrogateescape_errors, | 
 | 1334 |                 METH_O | 
 | 1335 |             } | 
 | 1336 |         } | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 1337 |     }; | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 1338 |  | 
| Nicholas Bastin | e5662ae | 2004-03-24 22:22:12 +0000 | [diff] [blame] | 1339 |     PyInterpreterState *interp = PyThreadState_GET()->interp; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1340 |     PyObject *mod; | 
| Neal Norwitz | 739a8f8 | 2004-07-08 01:55:58 +0000 | [diff] [blame] | 1341 |     unsigned i; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1342 |  | 
 | 1343 |     if (interp->codec_search_path != NULL) | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1344 |         return 0; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1345 |  | 
 | 1346 |     interp->codec_search_path = PyList_New(0); | 
 | 1347 |     interp->codec_search_cache = PyDict_New(); | 
 | 1348 |     interp->codec_error_registry = PyDict_New(); | 
 | 1349 |  | 
 | 1350 |     if (interp->codec_error_registry) { | 
| Victor Stinner | 6394188 | 2011-09-29 00:42:28 +0200 | [diff] [blame] | 1351 |         for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { | 
| Andrew Svetlov | 3ba3a3e | 2012-12-25 13:32:35 +0200 | [diff] [blame] | 1352 |             PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1353 |             int res; | 
 | 1354 |             if (!func) | 
 | 1355 |                 Py_FatalError("can't initialize codec error registry"); | 
 | 1356 |             res = PyCodec_RegisterError(methods[i].name, func); | 
 | 1357 |             Py_DECREF(func); | 
 | 1358 |             if (res) | 
 | 1359 |                 Py_FatalError("can't initialize codec error registry"); | 
 | 1360 |         } | 
| Walter Dörwald | 3aeb632 | 2002-09-02 13:14:32 +0000 | [diff] [blame] | 1361 |     } | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 1362 |  | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1363 |     if (interp->codec_search_path == NULL || | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1364 |         interp->codec_search_cache == NULL || | 
 | 1365 |         interp->codec_error_registry == NULL) | 
 | 1366 |         Py_FatalError("can't initialize codec registry"); | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1367 |  | 
| Christian Heimes | 819b8bf | 2008-01-03 23:05:47 +0000 | [diff] [blame] | 1368 |     mod = PyImport_ImportModuleNoBlock("encodings"); | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1369 |     if (mod == NULL) { | 
| Antoine Pitrou | f95a1b3 | 2010-05-09 15:52:27 +0000 | [diff] [blame] | 1370 |         return -1; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1371 |     } | 
 | 1372 |     Py_DECREF(mod); | 
| Christian Heimes | 6a27efa | 2008-10-30 21:48:26 +0000 | [diff] [blame] | 1373 |     interp->codecs_initialized = 1; | 
| Gustavo Niemeyer | 5ddd4c3 | 2003-03-19 00:35:36 +0000 | [diff] [blame] | 1374 |     return 0; | 
| Guido van Rossum | feee4b9 | 2000-03-10 22:57:27 +0000 | [diff] [blame] | 1375 | } |