Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 1 | /* |
| 2 | An implementation of the new I/O lib as defined by PEP 3116 - "New I/O" |
Brett Cannon | efb00c0 | 2012-02-29 18:31:31 -0500 | [diff] [blame] | 3 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 4 | Classes defined here: UnsupportedOperation, BlockingIOError. |
| 5 | Functions defined here: open(). |
Brett Cannon | efb00c0 | 2012-02-29 18:31:31 -0500 | [diff] [blame] | 6 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 7 | Mostly written by Amaury Forgeot d'Arc |
| 8 | */ |
| 9 | |
| 10 | #define PY_SSIZE_T_CLEAN |
| 11 | #include "Python.h" |
| 12 | #include "structmember.h" |
| 13 | #include "_iomodule.h" |
| 14 | |
| 15 | #ifdef HAVE_SYS_TYPES_H |
| 16 | #include <sys/types.h> |
| 17 | #endif /* HAVE_SYS_TYPES_H */ |
| 18 | |
| 19 | #ifdef HAVE_SYS_STAT_H |
| 20 | #include <sys/stat.h> |
| 21 | #endif /* HAVE_SYS_STAT_H */ |
| 22 | |
| 23 | |
| 24 | /* Various interned strings */ |
| 25 | |
| 26 | PyObject *_PyIO_str_close; |
| 27 | PyObject *_PyIO_str_closed; |
| 28 | PyObject *_PyIO_str_decode; |
| 29 | PyObject *_PyIO_str_encode; |
| 30 | PyObject *_PyIO_str_fileno; |
| 31 | PyObject *_PyIO_str_flush; |
| 32 | PyObject *_PyIO_str_getstate; |
| 33 | PyObject *_PyIO_str_isatty; |
| 34 | PyObject *_PyIO_str_newlines; |
| 35 | PyObject *_PyIO_str_nl; |
| 36 | PyObject *_PyIO_str_read; |
| 37 | PyObject *_PyIO_str_read1; |
| 38 | PyObject *_PyIO_str_readable; |
Victor Stinner | b57f108 | 2011-05-26 00:19:38 +0200 | [diff] [blame] | 39 | PyObject *_PyIO_str_readall; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 40 | PyObject *_PyIO_str_readinto; |
| 41 | PyObject *_PyIO_str_readline; |
| 42 | PyObject *_PyIO_str_reset; |
| 43 | PyObject *_PyIO_str_seek; |
| 44 | PyObject *_PyIO_str_seekable; |
Antoine Pitrou | e450185 | 2009-05-14 18:55:55 +0000 | [diff] [blame] | 45 | PyObject *_PyIO_str_setstate; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 46 | PyObject *_PyIO_str_tell; |
| 47 | PyObject *_PyIO_str_truncate; |
| 48 | PyObject *_PyIO_str_writable; |
| 49 | PyObject *_PyIO_str_write; |
| 50 | |
| 51 | PyObject *_PyIO_empty_str; |
| 52 | PyObject *_PyIO_empty_bytes; |
Antoine Pitrou | e450185 | 2009-05-14 18:55:55 +0000 | [diff] [blame] | 53 | PyObject *_PyIO_zero; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 54 | |
| 55 | |
| 56 | PyDoc_STRVAR(module_doc, |
| 57 | "The io module provides the Python interfaces to stream handling. The\n" |
| 58 | "builtin open function is defined in this module.\n" |
| 59 | "\n" |
| 60 | "At the top of the I/O hierarchy is the abstract base class IOBase. It\n" |
| 61 | "defines the basic interface to a stream. Note, however, that there is no\n" |
Éric Araujo | fab9766 | 2012-02-26 02:14:08 +0100 | [diff] [blame] | 62 | "separation between reading and writing to streams; implementations are\n" |
Andrew Svetlov | 737fb89 | 2012-12-18 21:14:22 +0200 | [diff] [blame] | 63 | "allowed to raise an IOError if they do not support a given operation.\n" |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 64 | "\n" |
| 65 | "Extending IOBase is RawIOBase which deals simply with the reading and\n" |
Benjamin Peterson | 8f2b665 | 2009-04-05 00:46:27 +0000 | [diff] [blame] | 66 | "writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide\n" |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 67 | "an interface to OS files.\n" |
| 68 | "\n" |
| 69 | "BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its\n" |
| 70 | "subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer\n" |
| 71 | "streams that are readable, writable, and both respectively.\n" |
| 72 | "BufferedRandom provides a buffered interface to random access\n" |
| 73 | "streams. BytesIO is a simple stream of in-memory bytes.\n" |
| 74 | "\n" |
| 75 | "Another IOBase subclass, TextIOBase, deals with the encoding and decoding\n" |
| 76 | "of streams into text. TextIOWrapper, which extends it, is a buffered text\n" |
| 77 | "interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO\n" |
| 78 | "is a in-memory stream for text.\n" |
| 79 | "\n" |
| 80 | "Argument names are not part of the specification, and only the arguments\n" |
| 81 | "of open() are intended to be used as keyword arguments.\n" |
| 82 | "\n" |
| 83 | "data:\n" |
| 84 | "\n" |
| 85 | "DEFAULT_BUFFER_SIZE\n" |
| 86 | "\n" |
| 87 | " An int containing the default buffer size used by the module's buffered\n" |
| 88 | " I/O classes. open() uses the file's blksize (as obtained by os.stat) if\n" |
| 89 | " possible.\n" |
| 90 | ); |
| 91 | |
| 92 | |
| 93 | /* |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 94 | * The main open() function |
| 95 | */ |
Serhiy Storchaka | f24131f | 2015-04-16 11:19:43 +0300 | [diff] [blame] | 96 | /*[clinic input] |
| 97 | module _io |
| 98 | |
| 99 | _io.open |
| 100 | file: object |
| 101 | mode: str = "r" |
| 102 | buffering: int = -1 |
Larry Hastings | dbfdc38 | 2015-05-04 06:59:46 -0700 | [diff] [blame] | 103 | encoding: str(accept={str, NoneType}) = NULL |
| 104 | errors: str(accept={str, NoneType}) = NULL |
| 105 | newline: str(accept={str, NoneType}) = NULL |
Serhiy Storchaka | f24131f | 2015-04-16 11:19:43 +0300 | [diff] [blame] | 106 | closefd: int(c_default="1") = True |
| 107 | opener: object = None |
| 108 | |
| 109 | Open file and return a stream. Raise IOError upon failure. |
| 110 | |
| 111 | file is either a text or byte string giving the name (and the path |
| 112 | if the file isn't in the current working directory) of the file to |
| 113 | be opened or an integer file descriptor of the file to be |
| 114 | wrapped. (If a file descriptor is given, it is closed when the |
| 115 | returned I/O object is closed, unless closefd is set to False.) |
| 116 | |
| 117 | mode is an optional string that specifies the mode in which the file |
| 118 | is opened. It defaults to 'r' which means open for reading in text |
| 119 | mode. Other common values are 'w' for writing (truncating the file if |
| 120 | it already exists), 'x' for creating and writing to a new file, and |
| 121 | 'a' for appending (which on some Unix systems, means that all writes |
| 122 | append to the end of the file regardless of the current seek position). |
| 123 | In text mode, if encoding is not specified the encoding used is platform |
| 124 | dependent: locale.getpreferredencoding(False) is called to get the |
| 125 | current locale encoding. (For reading and writing raw bytes use binary |
| 126 | mode and leave encoding unspecified.) The available modes are: |
| 127 | |
| 128 | ========= =============================================================== |
| 129 | Character Meaning |
| 130 | --------- --------------------------------------------------------------- |
| 131 | 'r' open for reading (default) |
| 132 | 'w' open for writing, truncating the file first |
| 133 | 'x' create a new file and open it for writing |
| 134 | 'a' open for writing, appending to the end of the file if it exists |
| 135 | 'b' binary mode |
| 136 | 't' text mode (default) |
| 137 | '+' open a disk file for updating (reading and writing) |
| 138 | 'U' universal newline mode (deprecated) |
| 139 | ========= =============================================================== |
| 140 | |
| 141 | The default mode is 'rt' (open for reading text). For binary random |
| 142 | access, the mode 'w+b' opens and truncates the file to 0 bytes, while |
| 143 | 'r+b' opens the file without truncation. The 'x' mode implies 'w' and |
| 144 | raises an `FileExistsError` if the file already exists. |
| 145 | |
| 146 | Python distinguishes between files opened in binary and text modes, |
| 147 | even when the underlying operating system doesn't. Files opened in |
| 148 | binary mode (appending 'b' to the mode argument) return contents as |
| 149 | bytes objects without any decoding. In text mode (the default, or when |
| 150 | 't' is appended to the mode argument), the contents of the file are |
| 151 | returned as strings, the bytes having been first decoded using a |
| 152 | platform-dependent encoding or using the specified encoding if given. |
| 153 | |
| 154 | 'U' mode is deprecated and will raise an exception in future versions |
| 155 | of Python. It has no effect in Python 3. Use newline to control |
| 156 | universal newlines mode. |
| 157 | |
| 158 | buffering is an optional integer used to set the buffering policy. |
| 159 | Pass 0 to switch buffering off (only allowed in binary mode), 1 to select |
| 160 | line buffering (only usable in text mode), and an integer > 1 to indicate |
| 161 | the size of a fixed-size chunk buffer. When no buffering argument is |
| 162 | given, the default buffering policy works as follows: |
| 163 | |
| 164 | * Binary files are buffered in fixed-size chunks; the size of the buffer |
| 165 | is chosen using a heuristic trying to determine the underlying device's |
| 166 | "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. |
| 167 | On many systems, the buffer will typically be 4096 or 8192 bytes long. |
| 168 | |
| 169 | * "Interactive" text files (files for which isatty() returns True) |
| 170 | use line buffering. Other text files use the policy described above |
| 171 | for binary files. |
| 172 | |
| 173 | encoding is the name of the encoding used to decode or encode the |
| 174 | file. This should only be used in text mode. The default encoding is |
| 175 | platform dependent, but any encoding supported by Python can be |
| 176 | passed. See the codecs module for the list of supported encodings. |
| 177 | |
| 178 | errors is an optional string that specifies how encoding errors are to |
| 179 | be handled---this argument should not be used in binary mode. Pass |
| 180 | 'strict' to raise a ValueError exception if there is an encoding error |
| 181 | (the default of None has the same effect), or pass 'ignore' to ignore |
| 182 | errors. (Note that ignoring encoding errors can lead to data loss.) |
| 183 | See the documentation for codecs.register or run 'help(codecs.Codec)' |
| 184 | for a list of the permitted encoding error strings. |
| 185 | |
| 186 | newline controls how universal newlines works (it only applies to text |
| 187 | mode). It can be None, '', '\n', '\r', and '\r\n'. It works as |
| 188 | follows: |
| 189 | |
| 190 | * On input, if newline is None, universal newlines mode is |
| 191 | enabled. Lines in the input can end in '\n', '\r', or '\r\n', and |
| 192 | these are translated into '\n' before being returned to the |
| 193 | caller. If it is '', universal newline mode is enabled, but line |
| 194 | endings are returned to the caller untranslated. If it has any of |
| 195 | the other legal values, input lines are only terminated by the given |
| 196 | string, and the line ending is returned to the caller untranslated. |
| 197 | |
| 198 | * On output, if newline is None, any '\n' characters written are |
| 199 | translated to the system default line separator, os.linesep. If |
| 200 | newline is '' or '\n', no translation takes place. If newline is any |
| 201 | of the other legal values, any '\n' characters written are translated |
| 202 | to the given string. |
| 203 | |
| 204 | If closefd is False, the underlying file descriptor will be kept open |
| 205 | when the file is closed. This does not work when a file name is given |
| 206 | and must be True in that case. |
| 207 | |
| 208 | A custom opener can be used by passing a callable as *opener*. The |
| 209 | underlying file descriptor for the file object is then obtained by |
| 210 | calling *opener* with (*file*, *flags*). *opener* must return an open |
| 211 | file descriptor (passing os.open as *opener* results in functionality |
| 212 | similar to passing None). |
| 213 | |
| 214 | open() returns a file object whose type depends on the mode, and |
| 215 | through which the standard file operations such as reading and writing |
| 216 | are performed. When open() is used to open a file in a text mode ('w', |
| 217 | 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open |
| 218 | a file in a binary mode, the returned class varies: in read binary |
| 219 | mode, it returns a BufferedReader; in write binary and append binary |
| 220 | modes, it returns a BufferedWriter, and in read/write mode, it returns |
| 221 | a BufferedRandom. |
| 222 | |
| 223 | It is also possible to use a string or bytearray as a file for both |
| 224 | reading and writing. For strings StringIO can be used like a file |
| 225 | opened in a text mode, and for bytes a BytesIO can be used like a file |
| 226 | opened in a binary mode. |
| 227 | [clinic start generated code]*/ |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 228 | |
| 229 | static PyObject * |
Serhiy Storchaka | f24131f | 2015-04-16 11:19:43 +0300 | [diff] [blame] | 230 | _io_open_impl(PyModuleDef *module, PyObject *file, const char *mode, |
| 231 | int buffering, const char *encoding, const char *errors, |
| 232 | const char *newline, int closefd, PyObject *opener) |
Larry Hastings | dbfdc38 | 2015-05-04 06:59:46 -0700 | [diff] [blame] | 233 | /*[clinic end generated code: output=7615d0d746eb14d2 input=f4e1ca75223987bc]*/ |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 234 | { |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 235 | unsigned i; |
| 236 | |
Charles-François Natali | dc3044c | 2012-01-09 22:40:02 +0100 | [diff] [blame] | 237 | int creating = 0, reading = 0, writing = 0, appending = 0, updating = 0; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 238 | int text = 0, binary = 0, universal = 0; |
| 239 | |
Christian Heimes | 89ff3c7 | 2012-09-10 03:50:48 +0200 | [diff] [blame] | 240 | char rawmode[6], *m; |
Serhiy Storchaka | 56f6e76 | 2015-09-06 21:25:30 +0300 | [diff] [blame] | 241 | int line_buffering; |
| 242 | long isatty; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 243 | |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 244 | PyObject *raw, *modeobj = NULL, *buffer, *wrapper, *result = NULL; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 245 | |
Antoine Pitrou | de68722 | 2014-06-29 20:07:28 -0400 | [diff] [blame] | 246 | _Py_IDENTIFIER(_blksize); |
Martin v. Löwis | bd928fe | 2011-10-14 10:20:37 +0200 | [diff] [blame] | 247 | _Py_IDENTIFIER(isatty); |
Martin v. Löwis | 767046a | 2011-10-14 15:35:36 +0200 | [diff] [blame] | 248 | _Py_IDENTIFIER(mode); |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 249 | _Py_IDENTIFIER(close); |
Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 250 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 251 | if (!PyUnicode_Check(file) && |
Robert Collins | c94a1dc | 2015-07-26 06:43:13 +1200 | [diff] [blame] | 252 | !PyBytes_Check(file) && |
| 253 | !PyNumber_Check(file)) { |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 254 | PyErr_Format(PyExc_TypeError, "invalid file: %R", file); |
| 255 | return NULL; |
| 256 | } |
| 257 | |
| 258 | /* Decode mode */ |
| 259 | for (i = 0; i < strlen(mode); i++) { |
| 260 | char c = mode[i]; |
| 261 | |
| 262 | switch (c) { |
Charles-François Natali | dc3044c | 2012-01-09 22:40:02 +0100 | [diff] [blame] | 263 | case 'x': |
| 264 | creating = 1; |
| 265 | break; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 266 | case 'r': |
| 267 | reading = 1; |
| 268 | break; |
| 269 | case 'w': |
| 270 | writing = 1; |
| 271 | break; |
| 272 | case 'a': |
| 273 | appending = 1; |
| 274 | break; |
| 275 | case '+': |
| 276 | updating = 1; |
| 277 | break; |
| 278 | case 't': |
| 279 | text = 1; |
| 280 | break; |
| 281 | case 'b': |
| 282 | binary = 1; |
| 283 | break; |
| 284 | case 'U': |
| 285 | universal = 1; |
| 286 | reading = 1; |
| 287 | break; |
| 288 | default: |
| 289 | goto invalid_mode; |
| 290 | } |
| 291 | |
| 292 | /* c must not be duplicated */ |
| 293 | if (strchr(mode+i+1, c)) { |
| 294 | invalid_mode: |
| 295 | PyErr_Format(PyExc_ValueError, "invalid mode: '%s'", mode); |
| 296 | return NULL; |
| 297 | } |
| 298 | |
| 299 | } |
| 300 | |
| 301 | m = rawmode; |
Charles-François Natali | dc3044c | 2012-01-09 22:40:02 +0100 | [diff] [blame] | 302 | if (creating) *(m++) = 'x'; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 303 | if (reading) *(m++) = 'r'; |
| 304 | if (writing) *(m++) = 'w'; |
| 305 | if (appending) *(m++) = 'a'; |
| 306 | if (updating) *(m++) = '+'; |
| 307 | *m = '\0'; |
| 308 | |
| 309 | /* Parameters validation */ |
| 310 | if (universal) { |
Robert Collins | c94a1dc | 2015-07-26 06:43:13 +1200 | [diff] [blame] | 311 | if (creating || writing || appending || updating) { |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 312 | PyErr_SetString(PyExc_ValueError, |
Robert Collins | c94a1dc | 2015-07-26 06:43:13 +1200 | [diff] [blame] | 313 | "mode U cannot be combined with x', 'w', 'a', or '+'"); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 314 | return NULL; |
| 315 | } |
Serhiy Storchaka | 6787a38 | 2013-11-23 22:12:06 +0200 | [diff] [blame] | 316 | if (PyErr_WarnEx(PyExc_DeprecationWarning, |
| 317 | "'U' mode is deprecated", 1) < 0) |
| 318 | return NULL; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 319 | reading = 1; |
| 320 | } |
| 321 | |
| 322 | if (text && binary) { |
| 323 | PyErr_SetString(PyExc_ValueError, |
| 324 | "can't have text and binary mode at once"); |
| 325 | return NULL; |
| 326 | } |
| 327 | |
Charles-François Natali | dc3044c | 2012-01-09 22:40:02 +0100 | [diff] [blame] | 328 | if (creating + reading + writing + appending > 1) { |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 329 | PyErr_SetString(PyExc_ValueError, |
Charles-François Natali | dc3044c | 2012-01-09 22:40:02 +0100 | [diff] [blame] | 330 | "must have exactly one of create/read/write/append mode"); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 331 | return NULL; |
| 332 | } |
| 333 | |
| 334 | if (binary && encoding != NULL) { |
| 335 | PyErr_SetString(PyExc_ValueError, |
| 336 | "binary mode doesn't take an encoding argument"); |
| 337 | return NULL; |
| 338 | } |
| 339 | |
| 340 | if (binary && errors != NULL) { |
| 341 | PyErr_SetString(PyExc_ValueError, |
| 342 | "binary mode doesn't take an errors argument"); |
| 343 | return NULL; |
| 344 | } |
| 345 | |
| 346 | if (binary && newline != NULL) { |
| 347 | PyErr_SetString(PyExc_ValueError, |
| 348 | "binary mode doesn't take a newline argument"); |
| 349 | return NULL; |
| 350 | } |
| 351 | |
| 352 | /* Create the Raw file stream */ |
| 353 | raw = PyObject_CallFunction((PyObject *)&PyFileIO_Type, |
Ross Lagerwall | 59142db | 2011-10-31 20:34:46 +0200 | [diff] [blame] | 354 | "OsiO", file, rawmode, closefd, opener); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 355 | if (raw == NULL) |
| 356 | return NULL; |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 357 | result = raw; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 358 | |
| 359 | modeobj = PyUnicode_FromString(mode); |
| 360 | if (modeobj == NULL) |
| 361 | goto error; |
| 362 | |
| 363 | /* buffering */ |
| 364 | { |
Martin v. Löwis | afe55bb | 2011-10-09 10:38:36 +0200 | [diff] [blame] | 365 | PyObject *res = _PyObject_CallMethodId(raw, &PyId_isatty, NULL); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 366 | if (res == NULL) |
| 367 | goto error; |
| 368 | isatty = PyLong_AsLong(res); |
| 369 | Py_DECREF(res); |
| 370 | if (isatty == -1 && PyErr_Occurred()) |
| 371 | goto error; |
| 372 | } |
| 373 | |
| 374 | if (buffering == 1 || (buffering < 0 && isatty)) { |
| 375 | buffering = -1; |
| 376 | line_buffering = 1; |
| 377 | } |
| 378 | else |
| 379 | line_buffering = 0; |
| 380 | |
| 381 | if (buffering < 0) { |
Antoine Pitrou | de68722 | 2014-06-29 20:07:28 -0400 | [diff] [blame] | 382 | PyObject *blksize_obj; |
| 383 | blksize_obj = _PyObject_GetAttrId(raw, &PyId__blksize); |
| 384 | if (blksize_obj == NULL) |
| 385 | goto error; |
| 386 | buffering = PyLong_AsLong(blksize_obj); |
| 387 | Py_DECREF(blksize_obj); |
| 388 | if (buffering == -1 && PyErr_Occurred()) |
| 389 | goto error; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 390 | } |
| 391 | if (buffering < 0) { |
| 392 | PyErr_SetString(PyExc_ValueError, |
| 393 | "invalid buffering size"); |
| 394 | goto error; |
| 395 | } |
| 396 | |
| 397 | /* if not buffering, returns the raw file object */ |
| 398 | if (buffering == 0) { |
| 399 | if (!binary) { |
| 400 | PyErr_SetString(PyExc_ValueError, |
| 401 | "can't have unbuffered text I/O"); |
| 402 | goto error; |
| 403 | } |
| 404 | |
| 405 | Py_DECREF(modeobj); |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 406 | return result; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 407 | } |
| 408 | |
| 409 | /* wraps into a buffered file */ |
| 410 | { |
| 411 | PyObject *Buffered_class; |
| 412 | |
| 413 | if (updating) |
| 414 | Buffered_class = (PyObject *)&PyBufferedRandom_Type; |
Charles-François Natali | dc3044c | 2012-01-09 22:40:02 +0100 | [diff] [blame] | 415 | else if (creating || writing || appending) |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 416 | Buffered_class = (PyObject *)&PyBufferedWriter_Type; |
| 417 | else if (reading) |
| 418 | Buffered_class = (PyObject *)&PyBufferedReader_Type; |
| 419 | else { |
| 420 | PyErr_Format(PyExc_ValueError, |
| 421 | "unknown mode: '%s'", mode); |
| 422 | goto error; |
| 423 | } |
| 424 | |
| 425 | buffer = PyObject_CallFunction(Buffered_class, "Oi", raw, buffering); |
| 426 | } |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 427 | if (buffer == NULL) |
| 428 | goto error; |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 429 | result = buffer; |
| 430 | Py_DECREF(raw); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 431 | |
| 432 | |
| 433 | /* if binary, returns the buffered file */ |
| 434 | if (binary) { |
| 435 | Py_DECREF(modeobj); |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 436 | return result; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 437 | } |
| 438 | |
| 439 | /* wraps into a TextIOWrapper */ |
| 440 | wrapper = PyObject_CallFunction((PyObject *)&PyTextIOWrapper_Type, |
Robert Collins | c94a1dc | 2015-07-26 06:43:13 +1200 | [diff] [blame] | 441 | "Osssi", |
| 442 | buffer, |
| 443 | encoding, errors, newline, |
| 444 | line_buffering); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 445 | if (wrapper == NULL) |
| 446 | goto error; |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 447 | result = wrapper; |
| 448 | Py_DECREF(buffer); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 449 | |
Martin v. Löwis | 767046a | 2011-10-14 15:35:36 +0200 | [diff] [blame] | 450 | if (_PyObject_SetAttrId(wrapper, &PyId_mode, modeobj) < 0) |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 451 | goto error; |
| 452 | Py_DECREF(modeobj); |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 453 | return result; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 454 | |
| 455 | error: |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 456 | if (result != NULL) { |
Benjamin Peterson | 4f654fb | 2014-07-04 17:00:25 -0700 | [diff] [blame] | 457 | PyObject *exc, *val, *tb, *close_result; |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 458 | PyErr_Fetch(&exc, &val, &tb); |
Benjamin Peterson | 4f654fb | 2014-07-04 17:00:25 -0700 | [diff] [blame] | 459 | close_result = _PyObject_CallMethodId(result, &PyId_close, NULL); |
Serhiy Storchaka | e2bd2a7 | 2014-10-08 22:31:52 +0300 | [diff] [blame] | 460 | _PyErr_ChainExceptions(exc, val, tb); |
| 461 | Py_XDECREF(close_result); |
Serhiy Storchaka | f10063e | 2014-06-09 13:32:34 +0300 | [diff] [blame] | 462 | Py_DECREF(result); |
| 463 | } |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 464 | Py_XDECREF(modeobj); |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 465 | return NULL; |
| 466 | } |
| 467 | |
| 468 | /* |
| 469 | * Private helpers for the io module. |
| 470 | */ |
| 471 | |
| 472 | Py_off_t |
| 473 | PyNumber_AsOff_t(PyObject *item, PyObject *err) |
| 474 | { |
| 475 | Py_off_t result; |
| 476 | PyObject *runerr; |
| 477 | PyObject *value = PyNumber_Index(item); |
| 478 | if (value == NULL) |
| 479 | return -1; |
| 480 | |
| 481 | /* We're done if PyLong_AsSsize_t() returns without error. */ |
| 482 | result = PyLong_AsOff_t(value); |
| 483 | if (result != -1 || !(runerr = PyErr_Occurred())) |
| 484 | goto finish; |
| 485 | |
| 486 | /* Error handling code -- only manage OverflowError differently */ |
| 487 | if (!PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) |
| 488 | goto finish; |
| 489 | |
| 490 | PyErr_Clear(); |
| 491 | /* If no error-handling desired then the default clipping |
| 492 | is sufficient. |
| 493 | */ |
| 494 | if (!err) { |
| 495 | assert(PyLong_Check(value)); |
| 496 | /* Whether or not it is less than or equal to |
| 497 | zero is determined by the sign of ob_size |
| 498 | */ |
| 499 | if (_PyLong_Sign(value) < 0) |
| 500 | result = PY_OFF_T_MIN; |
| 501 | else |
| 502 | result = PY_OFF_T_MAX; |
| 503 | } |
| 504 | else { |
| 505 | /* Otherwise replace the error with caller's error object. */ |
| 506 | PyErr_Format(err, |
| 507 | "cannot fit '%.200s' into an offset-sized integer", |
| 508 | item->ob_type->tp_name); |
| 509 | } |
| 510 | |
| 511 | finish: |
| 512 | Py_DECREF(value); |
| 513 | return result; |
| 514 | } |
| 515 | |
Benjamin Peterson | bf5ff76 | 2009-12-13 19:25:34 +0000 | [diff] [blame] | 516 | |
| 517 | /* Basically the "n" format code with the ability to turn None into -1. */ |
Brett Cannon | efb00c0 | 2012-02-29 18:31:31 -0500 | [diff] [blame] | 518 | int |
Benjamin Peterson | bf5ff76 | 2009-12-13 19:25:34 +0000 | [diff] [blame] | 519 | _PyIO_ConvertSsize_t(PyObject *obj, void *result) { |
| 520 | Py_ssize_t limit; |
| 521 | if (obj == Py_None) { |
| 522 | limit = -1; |
| 523 | } |
| 524 | else if (PyNumber_Check(obj)) { |
| 525 | limit = PyNumber_AsSsize_t(obj, PyExc_OverflowError); |
| 526 | if (limit == -1 && PyErr_Occurred()) |
| 527 | return 0; |
| 528 | } |
| 529 | else { |
| 530 | PyErr_Format(PyExc_TypeError, |
| 531 | "integer argument expected, got '%.200s'", |
| 532 | Py_TYPE(obj)->tp_name); |
| 533 | return 0; |
| 534 | } |
| 535 | *((Py_ssize_t *)result) = limit; |
| 536 | return 1; |
| 537 | } |
| 538 | |
| 539 | |
Antoine Pitrou | 712cb73 | 2013-12-21 15:51:54 +0100 | [diff] [blame] | 540 | _PyIO_State * |
| 541 | _PyIO_get_module_state(void) |
| 542 | { |
| 543 | PyObject *mod = PyState_FindModule(&_PyIO_Module); |
| 544 | _PyIO_State *state; |
| 545 | if (mod == NULL || (state = IO_MOD_STATE(mod)) == NULL) { |
| 546 | PyErr_SetString(PyExc_RuntimeError, |
| 547 | "could not find io module state " |
| 548 | "(interpreter shutdown?)"); |
| 549 | return NULL; |
| 550 | } |
| 551 | return state; |
| 552 | } |
| 553 | |
Antoine Pitrou | 932ff83 | 2013-08-01 21:04:50 +0200 | [diff] [blame] | 554 | PyObject * |
| 555 | _PyIO_get_locale_module(_PyIO_State *state) |
| 556 | { |
| 557 | PyObject *mod; |
| 558 | if (state->locale_module != NULL) { |
| 559 | assert(PyWeakref_CheckRef(state->locale_module)); |
| 560 | mod = PyWeakref_GET_OBJECT(state->locale_module); |
| 561 | if (mod != Py_None) { |
| 562 | Py_INCREF(mod); |
| 563 | return mod; |
| 564 | } |
| 565 | Py_CLEAR(state->locale_module); |
| 566 | } |
Antoine Pitrou | fd4722c | 2013-10-12 00:13:50 +0200 | [diff] [blame] | 567 | mod = PyImport_ImportModule("_bootlocale"); |
Antoine Pitrou | 932ff83 | 2013-08-01 21:04:50 +0200 | [diff] [blame] | 568 | if (mod == NULL) |
| 569 | return NULL; |
| 570 | state->locale_module = PyWeakref_NewRef(mod, NULL); |
| 571 | if (state->locale_module == NULL) { |
| 572 | Py_DECREF(mod); |
| 573 | return NULL; |
| 574 | } |
| 575 | return mod; |
| 576 | } |
| 577 | |
| 578 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 579 | static int |
| 580 | iomodule_traverse(PyObject *mod, visitproc visit, void *arg) { |
| 581 | _PyIO_State *state = IO_MOD_STATE(mod); |
| 582 | if (!state->initialized) |
| 583 | return 0; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 584 | if (state->locale_module != NULL) { |
| 585 | Py_VISIT(state->locale_module); |
| 586 | } |
| 587 | Py_VISIT(state->unsupported_operation); |
| 588 | return 0; |
| 589 | } |
| 590 | |
Benjamin Peterson | bf5ff76 | 2009-12-13 19:25:34 +0000 | [diff] [blame] | 591 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 592 | static int |
| 593 | iomodule_clear(PyObject *mod) { |
| 594 | _PyIO_State *state = IO_MOD_STATE(mod); |
| 595 | if (!state->initialized) |
| 596 | return 0; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 597 | if (state->locale_module != NULL) |
| 598 | Py_CLEAR(state->locale_module); |
| 599 | Py_CLEAR(state->unsupported_operation); |
| 600 | return 0; |
| 601 | } |
| 602 | |
| 603 | static void |
| 604 | iomodule_free(PyObject *mod) { |
| 605 | iomodule_clear(mod); |
| 606 | } |
| 607 | |
Benjamin Peterson | bf5ff76 | 2009-12-13 19:25:34 +0000 | [diff] [blame] | 608 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 609 | /* |
| 610 | * Module definition |
| 611 | */ |
| 612 | |
Serhiy Storchaka | f24131f | 2015-04-16 11:19:43 +0300 | [diff] [blame] | 613 | #include "clinic/_iomodule.c.h" |
| 614 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 615 | static PyMethodDef module_methods[] = { |
Serhiy Storchaka | f24131f | 2015-04-16 11:19:43 +0300 | [diff] [blame] | 616 | _IO_OPEN_METHODDEF |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 617 | {NULL, NULL} |
| 618 | }; |
| 619 | |
| 620 | struct PyModuleDef _PyIO_Module = { |
| 621 | PyModuleDef_HEAD_INIT, |
| 622 | "io", |
| 623 | module_doc, |
| 624 | sizeof(_PyIO_State), |
| 625 | module_methods, |
| 626 | NULL, |
| 627 | iomodule_traverse, |
| 628 | iomodule_clear, |
| 629 | (freefunc)iomodule_free, |
| 630 | }; |
| 631 | |
| 632 | PyMODINIT_FUNC |
| 633 | PyInit__io(void) |
| 634 | { |
| 635 | PyObject *m = PyModule_Create(&_PyIO_Module); |
| 636 | _PyIO_State *state = NULL; |
| 637 | if (m == NULL) |
| 638 | return NULL; |
| 639 | state = IO_MOD_STATE(m); |
| 640 | state->initialized = 0; |
| 641 | |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 642 | #define ADD_TYPE(type, name) \ |
| 643 | if (PyType_Ready(type) < 0) \ |
| 644 | goto fail; \ |
| 645 | Py_INCREF(type); \ |
| 646 | if (PyModule_AddObject(m, name, (PyObject *)type) < 0) { \ |
| 647 | Py_DECREF(type); \ |
| 648 | goto fail; \ |
| 649 | } |
| 650 | |
| 651 | /* DEFAULT_BUFFER_SIZE */ |
| 652 | if (PyModule_AddIntMacro(m, DEFAULT_BUFFER_SIZE) < 0) |
| 653 | goto fail; |
| 654 | |
| 655 | /* UnsupportedOperation inherits from ValueError and IOError */ |
| 656 | state->unsupported_operation = PyObject_CallFunction( |
| 657 | (PyObject *)&PyType_Type, "s(OO){}", |
| 658 | "UnsupportedOperation", PyExc_ValueError, PyExc_IOError); |
| 659 | if (state->unsupported_operation == NULL) |
| 660 | goto fail; |
| 661 | Py_INCREF(state->unsupported_operation); |
| 662 | if (PyModule_AddObject(m, "UnsupportedOperation", |
| 663 | state->unsupported_operation) < 0) |
| 664 | goto fail; |
| 665 | |
Antoine Pitrou | 6b4883d | 2011-10-12 02:54:14 +0200 | [diff] [blame] | 666 | /* BlockingIOError, for compatibility */ |
| 667 | Py_INCREF(PyExc_BlockingIOError); |
| 668 | if (PyModule_AddObject(m, "BlockingIOError", |
| 669 | (PyObject *) PyExc_BlockingIOError) < 0) |
| 670 | goto fail; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 671 | |
| 672 | /* Concrete base types of the IO ABCs. |
| 673 | (the ABCs themselves are declared through inheritance in io.py) |
| 674 | */ |
| 675 | ADD_TYPE(&PyIOBase_Type, "_IOBase"); |
| 676 | ADD_TYPE(&PyRawIOBase_Type, "_RawIOBase"); |
| 677 | ADD_TYPE(&PyBufferedIOBase_Type, "_BufferedIOBase"); |
| 678 | ADD_TYPE(&PyTextIOBase_Type, "_TextIOBase"); |
| 679 | |
| 680 | /* Implementation of concrete IO objects. */ |
| 681 | /* FileIO */ |
| 682 | PyFileIO_Type.tp_base = &PyRawIOBase_Type; |
| 683 | ADD_TYPE(&PyFileIO_Type, "FileIO"); |
| 684 | |
| 685 | /* BytesIO */ |
| 686 | PyBytesIO_Type.tp_base = &PyBufferedIOBase_Type; |
| 687 | ADD_TYPE(&PyBytesIO_Type, "BytesIO"); |
Antoine Pitrou | 972ee13 | 2010-09-06 18:48:21 +0000 | [diff] [blame] | 688 | if (PyType_Ready(&_PyBytesIOBuffer_Type) < 0) |
| 689 | goto fail; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 690 | |
| 691 | /* StringIO */ |
| 692 | PyStringIO_Type.tp_base = &PyTextIOBase_Type; |
| 693 | ADD_TYPE(&PyStringIO_Type, "StringIO"); |
| 694 | |
| 695 | /* BufferedReader */ |
| 696 | PyBufferedReader_Type.tp_base = &PyBufferedIOBase_Type; |
| 697 | ADD_TYPE(&PyBufferedReader_Type, "BufferedReader"); |
| 698 | |
| 699 | /* BufferedWriter */ |
| 700 | PyBufferedWriter_Type.tp_base = &PyBufferedIOBase_Type; |
| 701 | ADD_TYPE(&PyBufferedWriter_Type, "BufferedWriter"); |
| 702 | |
| 703 | /* BufferedRWPair */ |
| 704 | PyBufferedRWPair_Type.tp_base = &PyBufferedIOBase_Type; |
| 705 | ADD_TYPE(&PyBufferedRWPair_Type, "BufferedRWPair"); |
| 706 | |
| 707 | /* BufferedRandom */ |
| 708 | PyBufferedRandom_Type.tp_base = &PyBufferedIOBase_Type; |
| 709 | ADD_TYPE(&PyBufferedRandom_Type, "BufferedRandom"); |
| 710 | |
| 711 | /* TextIOWrapper */ |
| 712 | PyTextIOWrapper_Type.tp_base = &PyTextIOBase_Type; |
| 713 | ADD_TYPE(&PyTextIOWrapper_Type, "TextIOWrapper"); |
| 714 | |
| 715 | /* IncrementalNewlineDecoder */ |
| 716 | ADD_TYPE(&PyIncrementalNewlineDecoder_Type, "IncrementalNewlineDecoder"); |
| 717 | |
| 718 | /* Interned strings */ |
Antoine Pitrou | fc1b6f0 | 2012-01-18 16:13:56 +0100 | [diff] [blame] | 719 | #define ADD_INTERNED(name) \ |
| 720 | if (!_PyIO_str_ ## name && \ |
| 721 | !(_PyIO_str_ ## name = PyUnicode_InternFromString(# name))) \ |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 722 | goto fail; |
Antoine Pitrou | fc1b6f0 | 2012-01-18 16:13:56 +0100 | [diff] [blame] | 723 | |
| 724 | ADD_INTERNED(close) |
| 725 | ADD_INTERNED(closed) |
| 726 | ADD_INTERNED(decode) |
| 727 | ADD_INTERNED(encode) |
| 728 | ADD_INTERNED(fileno) |
| 729 | ADD_INTERNED(flush) |
| 730 | ADD_INTERNED(getstate) |
| 731 | ADD_INTERNED(isatty) |
| 732 | ADD_INTERNED(newlines) |
| 733 | ADD_INTERNED(read) |
| 734 | ADD_INTERNED(read1) |
| 735 | ADD_INTERNED(readable) |
Antoine Pitrou | bb5b92d | 2012-01-18 16:19:19 +0100 | [diff] [blame] | 736 | ADD_INTERNED(readall) |
Antoine Pitrou | fc1b6f0 | 2012-01-18 16:13:56 +0100 | [diff] [blame] | 737 | ADD_INTERNED(readinto) |
| 738 | ADD_INTERNED(readline) |
| 739 | ADD_INTERNED(reset) |
| 740 | ADD_INTERNED(seek) |
| 741 | ADD_INTERNED(seekable) |
| 742 | ADD_INTERNED(setstate) |
| 743 | ADD_INTERNED(tell) |
| 744 | ADD_INTERNED(truncate) |
| 745 | ADD_INTERNED(write) |
| 746 | ADD_INTERNED(writable) |
| 747 | |
| 748 | if (!_PyIO_str_nl && |
| 749 | !(_PyIO_str_nl = PyUnicode_InternFromString("\n"))) |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 750 | goto fail; |
Antoine Pitrou | fc1b6f0 | 2012-01-18 16:13:56 +0100 | [diff] [blame] | 751 | |
| 752 | if (!_PyIO_empty_str && |
| 753 | !(_PyIO_empty_str = PyUnicode_FromStringAndSize(NULL, 0))) |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 754 | goto fail; |
Antoine Pitrou | fc1b6f0 | 2012-01-18 16:13:56 +0100 | [diff] [blame] | 755 | if (!_PyIO_empty_bytes && |
| 756 | !(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0))) |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 757 | goto fail; |
Antoine Pitrou | fc1b6f0 | 2012-01-18 16:13:56 +0100 | [diff] [blame] | 758 | if (!_PyIO_zero && |
| 759 | !(_PyIO_zero = PyLong_FromLong(0L))) |
Antoine Pitrou | e450185 | 2009-05-14 18:55:55 +0000 | [diff] [blame] | 760 | goto fail; |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 761 | |
| 762 | state->initialized = 1; |
| 763 | |
| 764 | return m; |
| 765 | |
| 766 | fail: |
Benjamin Peterson | 4fa88fa | 2009-03-04 00:14:51 +0000 | [diff] [blame] | 767 | Py_XDECREF(state->unsupported_operation); |
| 768 | Py_DECREF(m); |
| 769 | return NULL; |
| 770 | } |