blob: a67240a20fa632ff361b9db84088d1e64be5d84b [file] [log] [blame]
Guido van Rossum0612d842000-03-10 23:20:43 +00001""" codecs -- Python Codec Registry, API and helpers.
2
3
4Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""#"
9
Marc-André Lemburgb28de0d2002-12-12 17:37:50 +000010import __builtin__, sys
Guido van Rossum0612d842000-03-10 23:20:43 +000011
12### Registry and builtin stateless codec functions
13
Guido van Rossumb95de4f2000-03-31 17:25:23 +000014try:
15 from _codecs import *
Tim Peters30324a72001-05-15 17:19:16 +000016except ImportError, why:
Walter Dörwald6a7ec7c2006-03-18 16:35:17 +000017 raise SystemError('Failed to load the builtin codecs: %s' % why)
Guido van Rossum0612d842000-03-10 23:20:43 +000018
Tim Peters30324a72001-05-15 17:19:16 +000019__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
Walter Dörwald474458d2002-06-04 15:16:29 +000020 "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
21 "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
Walter Dörwald3aeb6322002-09-02 13:14:32 +000022 "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
23 "strict_errors", "ignore_errors", "replace_errors",
24 "xmlcharrefreplace_errors",
25 "register_error", "lookup_error"]
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000026
Guido van Rossum0612d842000-03-10 23:20:43 +000027### Constants
28
29#
Walter Dörwald474458d2002-06-04 15:16:29 +000030# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
31# and its possible byte string values
32# for UTF8/UTF16/UTF32 output and little/big endian machines
Guido van Rossum0612d842000-03-10 23:20:43 +000033#
Guido van Rossum0612d842000-03-10 23:20:43 +000034
Walter Dörwald474458d2002-06-04 15:16:29 +000035# UTF-8
36BOM_UTF8 = '\xef\xbb\xbf'
37
38# UTF-16, little endian
39BOM_LE = BOM_UTF16_LE = '\xff\xfe'
40
41# UTF-16, big endian
42BOM_BE = BOM_UTF16_BE = '\xfe\xff'
43
44# UTF-32, little endian
45BOM_UTF32_LE = '\xff\xfe\x00\x00'
46
47# UTF-32, big endian
48BOM_UTF32_BE = '\x00\x00\xfe\xff'
49
Marc-André Lemburgb28de0d2002-12-12 17:37:50 +000050if sys.byteorder == 'little':
Walter Dörwald474458d2002-06-04 15:16:29 +000051
Marc-André Lemburgb28de0d2002-12-12 17:37:50 +000052 # UTF-16, native endianness
53 BOM = BOM_UTF16 = BOM_UTF16_LE
54
55 # UTF-32, native endianness
56 BOM_UTF32 = BOM_UTF32_LE
57
58else:
59
60 # UTF-16, native endianness
61 BOM = BOM_UTF16 = BOM_UTF16_BE
62
63 # UTF-32, native endianness
64 BOM_UTF32 = BOM_UTF32_BE
Walter Dörwald474458d2002-06-04 15:16:29 +000065
66# Old broken names (don't use in new code)
67BOM32_LE = BOM_UTF16_LE
68BOM32_BE = BOM_UTF16_BE
69BOM64_LE = BOM_UTF32_LE
70BOM64_BE = BOM_UTF32_BE
Guido van Rossum0612d842000-03-10 23:20:43 +000071
72
73### Codec base classes (defining the API)
74
Walter Dörwaldabb02e52006-03-15 11:35:15 +000075class CodecInfo(tuple):
76
77 def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
78 incrementalencoder=None, incrementaldecoder=None, name=None):
79 self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
80 self.name = name
81 self.encode = encode
82 self.decode = decode
83 self.incrementalencoder = incrementalencoder
84 self.incrementaldecoder = incrementaldecoder
85 self.streamwriter = streamwriter
86 self.streamreader = streamreader
87 return self
88
89 def __repr__(self):
90 return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
91
Guido van Rossum0612d842000-03-10 23:20:43 +000092class Codec:
93
94 """ Defines the interface for stateless encoders/decoders.
95
Walter Dörwald7f82f792002-11-19 21:42:53 +000096 The .encode()/.decode() methods may use different error
Guido van Rossum0612d842000-03-10 23:20:43 +000097 handling schemes by providing the errors argument. These
Walter Dörwald7f82f792002-11-19 21:42:53 +000098 string values are predefined:
Guido van Rossum0612d842000-03-10 23:20:43 +000099
Guido van Rossumd8855fd2000-03-24 22:14:19 +0000100 'strict' - raise a ValueError error (or a subclass)
Guido van Rossum0612d842000-03-10 23:20:43 +0000101 'ignore' - ignore the character and continue with the next
102 'replace' - replace with a suitable replacement character;
103 Python will use the official U+FFFD REPLACEMENT
Walter Dörwald7f82f792002-11-19 21:42:53 +0000104 CHARACTER for the builtin Unicode codecs on
105 decoding and '?' on encoding.
106 'xmlcharrefreplace' - Replace with the appropriate XML
107 character reference (only for encoding).
108 'backslashreplace' - Replace with backslashed escape sequences
109 (only for encoding).
110
111 The set of allowed values can be extended via register_error.
Guido van Rossum0612d842000-03-10 23:20:43 +0000112
113 """
Tim Peters30324a72001-05-15 17:19:16 +0000114 def encode(self, input, errors='strict'):
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000115
Fred Drake3e74c0d2000-03-17 15:40:35 +0000116 """ Encodes the object input and returns a tuple (output
Guido van Rossum0612d842000-03-10 23:20:43 +0000117 object, length consumed).
118
119 errors defines the error handling to apply. It defaults to
120 'strict' handling.
121
122 The method may not store state in the Codec instance. Use
123 StreamCodec for codecs which have to keep state in order to
124 make encoding/decoding efficient.
125
126 The encoder must be able to handle zero length input and
127 return an empty object of the output object type in this
128 situation.
129
130 """
131 raise NotImplementedError
132
Tim Peters30324a72001-05-15 17:19:16 +0000133 def decode(self, input, errors='strict'):
Guido van Rossum0612d842000-03-10 23:20:43 +0000134
135 """ Decodes the object input and returns a tuple (output
136 object, length consumed).
137
138 input must be an object which provides the bf_getreadbuf
139 buffer slot. Python strings, buffer objects and memory
140 mapped files are examples of objects providing this slot.
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000141
Guido van Rossum0612d842000-03-10 23:20:43 +0000142 errors defines the error handling to apply. It defaults to
143 'strict' handling.
144
145 The method may not store state in the Codec instance. Use
146 StreamCodec for codecs which have to keep state in order to
147 make encoding/decoding efficient.
148
149 The decoder must be able to handle zero length input and
150 return an empty object of the output object type in this
151 situation.
152
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000153 """
Guido van Rossum0612d842000-03-10 23:20:43 +0000154 raise NotImplementedError
155
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000156class IncrementalEncoder(object):
157 """
Neal Norwitz6bed1c12006-03-16 07:49:19 +0000158 An IncrementalEncoder encodes an input in multiple steps. The input can be
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000159 passed piece by piece to the encode() method. The IncrementalEncoder remembers
160 the state of the Encoding process between calls to encode().
161 """
162 def __init__(self, errors='strict'):
163 """
Neal Norwitz6bed1c12006-03-16 07:49:19 +0000164 Creates an IncrementalEncoder instance.
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000165
166 The IncrementalEncoder may use different error handling schemes by
167 providing the errors keyword argument. See the module docstring
168 for a list of possible values.
169 """
170 self.errors = errors
171 self.buffer = ""
172
173 def encode(self, input, final=False):
174 """
175 Encodes input and returns the resulting object.
176 """
177 raise NotImplementedError
178
179 def reset(self):
180 """
181 Resets the encoder to the initial state.
182 """
183
Christian Heimes1a6387e2008-03-26 12:49:49 +0000184 def getstate(self):
185 """
186 Return the current state of the encoder.
187 """
188 return 0
189
190 def setstate(self, state):
191 """
192 Set the current state of the encoder. state must have been
193 returned by getstate().
194 """
195
Walter Dörwald78a0be62006-04-14 18:25:39 +0000196class BufferedIncrementalEncoder(IncrementalEncoder):
197 """
198 This subclass of IncrementalEncoder can be used as the baseclass for an
199 incremental encoder if the encoder must keep some of the output in a
200 buffer between calls to encode().
201 """
202 def __init__(self, errors='strict'):
203 IncrementalEncoder.__init__(self, errors)
204 self.buffer = "" # unencoded input that is kept between calls to encode()
205
206 def _buffer_encode(self, input, errors, final):
207 # Overwrite this method in subclasses: It must encode input
208 # and return an (output, length consumed) tuple
209 raise NotImplementedError
210
211 def encode(self, input, final=False):
212 # encode input (taking the buffer into account)
213 data = self.buffer + input
214 (result, consumed) = self._buffer_encode(data, self.errors, final)
215 # keep unencoded input until the next call
216 self.buffer = data[consumed:]
217 return result
218
219 def reset(self):
220 IncrementalEncoder.reset(self)
221 self.buffer = ""
222
Christian Heimes1a6387e2008-03-26 12:49:49 +0000223 def getstate(self):
224 return self.buffer or 0
225
226 def setstate(self, state):
227 self.buffer = state or ""
228
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000229class IncrementalDecoder(object):
230 """
231 An IncrementalDecoder decodes an input in multiple steps. The input can be
232 passed piece by piece to the decode() method. The IncrementalDecoder
233 remembers the state of the decoding process between calls to decode().
234 """
235 def __init__(self, errors='strict'):
236 """
237 Creates a IncrementalDecoder instance.
238
239 The IncrementalDecoder may use different error handling schemes by
240 providing the errors keyword argument. See the module docstring
241 for a list of possible values.
242 """
243 self.errors = errors
244
245 def decode(self, input, final=False):
246 """
247 Decodes input and returns the resulting object.
248 """
249 raise NotImplementedError
250
251 def reset(self):
252 """
253 Resets the decoder to the initial state.
254 """
255
Christian Heimes1a6387e2008-03-26 12:49:49 +0000256 def getstate(self):
257 """
258 Return the current state of the decoder.
259
260 This must be a (buffered_input, additional_state_info) tuple.
261 buffered_input must be a bytes object containing bytes that
262 were passed to decode() that have not yet been converted.
263 additional_state_info must be a non-negative integer
264 representing the state of the decoder WITHOUT yet having
265 processed the contents of buffered_input. In the initial state
266 and after reset(), getstate() must return (b"", 0).
267 """
268 return (b"", 0)
269
270 def setstate(self, state):
271 """
272 Set the current state of the decoder.
273
274 state must have been returned by getstate(). The effect of
275 setstate((b"", 0)) must be equivalent to reset().
276 """
277
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000278class BufferedIncrementalDecoder(IncrementalDecoder):
279 """
280 This subclass of IncrementalDecoder can be used as the baseclass for an
281 incremental decoder if the decoder must be able to handle incomplete byte
282 sequences.
283 """
284 def __init__(self, errors='strict'):
285 IncrementalDecoder.__init__(self, errors)
286 self.buffer = "" # undecoded input that is kept between calls to decode()
287
288 def _buffer_decode(self, input, errors, final):
289 # Overwrite this method in subclasses: It must decode input
290 # and return an (output, length consumed) tuple
291 raise NotImplementedError
292
293 def decode(self, input, final=False):
294 # decode input (taking the buffer into account)
295 data = self.buffer + input
296 (result, consumed) = self._buffer_decode(data, self.errors, final)
297 # keep undecoded input until the next call
298 self.buffer = data[consumed:]
299 return result
300
301 def reset(self):
302 IncrementalDecoder.reset(self)
Walter Dörwaldb17f12b2006-04-14 15:40:54 +0000303 self.buffer = ""
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000304
Christian Heimes1a6387e2008-03-26 12:49:49 +0000305 def getstate(self):
306 # additional state info is always 0
307 return (self.buffer, 0)
308
309 def setstate(self, state):
310 # ignore additional state info
311 self.buffer = state[0]
312
Guido van Rossum0612d842000-03-10 23:20:43 +0000313#
314# The StreamWriter and StreamReader class provide generic working
Andrew M. Kuchling97c56352001-09-18 20:29:48 +0000315# interfaces which can be used to implement new encoding submodules
Guido van Rossum0612d842000-03-10 23:20:43 +0000316# very easily. See encodings/utf_8.py for an example on how this is
317# done.
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000318#
Guido van Rossum0612d842000-03-10 23:20:43 +0000319
320class StreamWriter(Codec):
321
Tim Peters30324a72001-05-15 17:19:16 +0000322 def __init__(self, stream, errors='strict'):
Guido van Rossum0612d842000-03-10 23:20:43 +0000323
324 """ Creates a StreamWriter instance.
325
326 stream must be a file-like object open for writing
327 (binary) data.
328
Walter Dörwald7f82f792002-11-19 21:42:53 +0000329 The StreamWriter may use different error handling
Guido van Rossum0612d842000-03-10 23:20:43 +0000330 schemes by providing the errors keyword argument. These
Walter Dörwald7f82f792002-11-19 21:42:53 +0000331 parameters are predefined:
Guido van Rossum0612d842000-03-10 23:20:43 +0000332
333 'strict' - raise a ValueError (or a subclass)
334 'ignore' - ignore the character and continue with the next
335 'replace'- replace with a suitable replacement character
Walter Dörwald7f82f792002-11-19 21:42:53 +0000336 'xmlcharrefreplace' - Replace with the appropriate XML
337 character reference.
338 'backslashreplace' - Replace with backslashed escape
339 sequences (only for encoding).
Guido van Rossum0612d842000-03-10 23:20:43 +0000340
Walter Dörwald7f82f792002-11-19 21:42:53 +0000341 The set of allowed parameter values can be extended via
342 register_error.
Guido van Rossum0612d842000-03-10 23:20:43 +0000343 """
344 self.stream = stream
345 self.errors = errors
346
Guido van Rossuma3277132000-04-11 15:37:43 +0000347 def write(self, object):
Guido van Rossum0612d842000-03-10 23:20:43 +0000348
349 """ Writes the object's contents encoded to self.stream.
350 """
Tim Peters30324a72001-05-15 17:19:16 +0000351 data, consumed = self.encode(object, self.errors)
Guido van Rossum0612d842000-03-10 23:20:43 +0000352 self.stream.write(data)
353
Guido van Rossuma3277132000-04-11 15:37:43 +0000354 def writelines(self, list):
355
356 """ Writes the concatenated list of strings to the stream
357 using .write().
358 """
359 self.write(''.join(list))
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000360
Guido van Rossum0612d842000-03-10 23:20:43 +0000361 def reset(self):
362
363 """ Flushes and resets the codec buffers used for keeping state.
364
365 Calling this method should ensure that the data on the
366 output is put into a clean state, that allows appending
367 of new fresh data without having to rescan the whole
368 stream to recover state.
369
370 """
371 pass
372
Tim Peters30324a72001-05-15 17:19:16 +0000373 def __getattr__(self, name,
Guido van Rossum0612d842000-03-10 23:20:43 +0000374 getattr=getattr):
375
376 """ Inherit all other methods from the underlying stream.
377 """
Tim Peters30324a72001-05-15 17:19:16 +0000378 return getattr(self.stream, name)
Guido van Rossum0612d842000-03-10 23:20:43 +0000379
Georg Brandl8f99f812006-10-29 08:39:22 +0000380 def __enter__(self):
381 return self
382
383 def __exit__(self, type, value, tb):
384 self.stream.close()
385
Guido van Rossum0612d842000-03-10 23:20:43 +0000386###
387
388class StreamReader(Codec):
389
Tim Peters30324a72001-05-15 17:19:16 +0000390 def __init__(self, stream, errors='strict'):
Guido van Rossum0612d842000-03-10 23:20:43 +0000391
392 """ Creates a StreamReader instance.
393
394 stream must be a file-like object open for reading
395 (binary) data.
396
Walter Dörwald7f82f792002-11-19 21:42:53 +0000397 The StreamReader may use different error handling
Guido van Rossum0612d842000-03-10 23:20:43 +0000398 schemes by providing the errors keyword argument. These
Walter Dörwald7f82f792002-11-19 21:42:53 +0000399 parameters are predefined:
Guido van Rossum0612d842000-03-10 23:20:43 +0000400
401 'strict' - raise a ValueError (or a subclass)
402 'ignore' - ignore the character and continue with the next
403 'replace'- replace with a suitable replacement character;
404
Walter Dörwald7f82f792002-11-19 21:42:53 +0000405 The set of allowed parameter values can be extended via
406 register_error.
Guido van Rossum0612d842000-03-10 23:20:43 +0000407 """
408 self.stream = stream
409 self.errors = errors
Walter Dörwald69652032004-09-07 20:24:22 +0000410 self.bytebuffer = ""
Walter Dörwaldc9878e12005-07-20 22:15:39 +0000411 # For str->str decoding this will stay a str
412 # For str->unicode decoding the first read will promote it to unicode
413 self.charbuffer = ""
Martin v. Löwis4ed67382005-09-18 08:34:39 +0000414 self.linebuffer = None
Guido van Rossum0612d842000-03-10 23:20:43 +0000415
Walter Dörwald69652032004-09-07 20:24:22 +0000416 def decode(self, input, errors='strict'):
417 raise NotImplementedError
418
Martin v. Löwis56066d22005-08-24 07:38:12 +0000419 def read(self, size=-1, chars=-1, firstline=False):
Guido van Rossum0612d842000-03-10 23:20:43 +0000420
421 """ Decodes data from the stream self.stream and returns the
422 resulting object.
423
Walter Dörwald69652032004-09-07 20:24:22 +0000424 chars indicates the number of characters to read from the
425 stream. read() will never return more than chars
426 characters, but it might return less, if there are not enough
427 characters available.
428
Guido van Rossum0612d842000-03-10 23:20:43 +0000429 size indicates the approximate maximum number of bytes to
430 read from the stream for decoding purposes. The decoder
431 can modify this setting as appropriate. The default value
432 -1 indicates to read and decode as much as possible. size
433 is intended to prevent having to decode huge files in one
434 step.
435
Martin v. Löwis56066d22005-08-24 07:38:12 +0000436 If firstline is true, and a UnicodeDecodeError happens
437 after the first line terminator in the input only the first line
438 will be returned, the rest of the input will be kept until the
439 next call to read().
440
Guido van Rossum0612d842000-03-10 23:20:43 +0000441 The method should use a greedy read strategy meaning that
442 it should read as much data as is allowed within the
443 definition of the encoding and the given size, e.g. if
444 optional encoding endings or state markers are available
445 on the stream, these should be read too.
Guido van Rossum0612d842000-03-10 23:20:43 +0000446 """
Martin v. Löwis4ed67382005-09-18 08:34:39 +0000447 # If we have lines cached, first merge them back into characters
448 if self.linebuffer:
449 self.charbuffer = "".join(self.linebuffer)
450 self.linebuffer = None
Tim Peters536cf992005-12-25 23:18:31 +0000451
Walter Dörwald69652032004-09-07 20:24:22 +0000452 # read until we get the required number of characters (if available)
Walter Dörwald69652032004-09-07 20:24:22 +0000453 while True:
454 # can the request can be satisfied from the character buffer?
455 if chars < 0:
Walter Dörwaldca199432006-03-06 22:39:12 +0000456 if size < 0:
457 if self.charbuffer:
458 break
459 elif len(self.charbuffer) >= size:
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000460 break
Guido van Rossum0612d842000-03-10 23:20:43 +0000461 else:
Walter Dörwald69652032004-09-07 20:24:22 +0000462 if len(self.charbuffer) >= chars:
Walter Dörwald69652032004-09-07 20:24:22 +0000463 break
464 # we need more data
465 if size < 0:
466 newdata = self.stream.read()
467 else:
468 newdata = self.stream.read(size)
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000469 # decode bytes (those remaining from the last call included)
Walter Dörwald69652032004-09-07 20:24:22 +0000470 data = self.bytebuffer + newdata
Martin v. Löwis56066d22005-08-24 07:38:12 +0000471 try:
472 newchars, decodedbytes = self.decode(data, self.errors)
473 except UnicodeDecodeError, exc:
474 if firstline:
475 newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
476 lines = newchars.splitlines(True)
477 if len(lines)<=1:
478 raise
479 else:
480 raise
Walter Dörwald69652032004-09-07 20:24:22 +0000481 # keep undecoded bytes until the next call
482 self.bytebuffer = data[decodedbytes:]
483 # put new characters in the character buffer
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000484 self.charbuffer += newchars
Walter Dörwald69652032004-09-07 20:24:22 +0000485 # there was no data available
486 if not newdata:
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000487 break
488 if chars < 0:
489 # Return everything we've got
490 result = self.charbuffer
Walter Dörwaldc9878e12005-07-20 22:15:39 +0000491 self.charbuffer = ""
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000492 else:
493 # Return the first chars characters
494 result = self.charbuffer[:chars]
495 self.charbuffer = self.charbuffer[chars:]
Walter Dörwald69652032004-09-07 20:24:22 +0000496 return result
Guido van Rossum0612d842000-03-10 23:20:43 +0000497
Walter Dörwald69652032004-09-07 20:24:22 +0000498 def readline(self, size=None, keepends=True):
Guido van Rossuma3277132000-04-11 15:37:43 +0000499
500 """ Read one line from the input stream and return the
501 decoded data.
502
Walter Dörwald69652032004-09-07 20:24:22 +0000503 size, if given, is passed as size argument to the
504 read() method.
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000505
Guido van Rossuma3277132000-04-11 15:37:43 +0000506 """
Martin v. Löwis4ed67382005-09-18 08:34:39 +0000507 # If we have lines cached from an earlier read, return
508 # them unconditionally
509 if self.linebuffer:
510 line = self.linebuffer[0]
511 del self.linebuffer[0]
512 if len(self.linebuffer) == 1:
513 # revert to charbuffer mode; we might need more data
514 # next time
515 self.charbuffer = self.linebuffer[0]
516 self.linebuffer = None
517 if not keepends:
518 line = line.splitlines(False)[0]
519 return line
Tim Peters536cf992005-12-25 23:18:31 +0000520
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000521 readsize = size or 72
Walter Dörwaldc9878e12005-07-20 22:15:39 +0000522 line = ""
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000523 # If size is given, we call read() only once
Walter Dörwald69652032004-09-07 20:24:22 +0000524 while True:
Martin v. Löwis56066d22005-08-24 07:38:12 +0000525 data = self.read(readsize, firstline=True)
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000526 if data:
Walter Dörwalda4eb2d52005-04-21 21:42:35 +0000527 # If we're at a "\r" read one extra character (which might
528 # be a "\n") to get a proper line ending. If the stream is
Walter Dörwaldbc8e6422005-04-21 21:32:03 +0000529 # temporarily exhausted we return the wrong line ending.
Walter Dörwaldc9878e12005-07-20 22:15:39 +0000530 if data.endswith("\r"):
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000531 data += self.read(size=1, chars=1)
Walter Dörwald7a6dc132005-04-04 21:38:47 +0000532
Walter Dörwald69652032004-09-07 20:24:22 +0000533 line += data
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000534 lines = line.splitlines(True)
535 if lines:
Martin v. Löwis4ed67382005-09-18 08:34:39 +0000536 if len(lines) > 1:
537 # More than one line result; the first line is a full line
538 # to return
539 line = lines[0]
540 del lines[0]
541 if len(lines) > 1:
542 # cache the remaining lines
543 lines[-1] += self.charbuffer
544 self.linebuffer = lines
545 self.charbuffer = None
546 else:
547 # only one remaining line, put it back into charbuffer
548 self.charbuffer = lines[0] + self.charbuffer
549 if not keepends:
550 line = line.splitlines(False)[0]
551 break
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000552 line0withend = lines[0]
553 line0withoutend = lines[0].splitlines(False)[0]
554 if line0withend != line0withoutend: # We really have a line end
555 # Put the rest back together and keep it until the next call
Walter Dörwaldc9878e12005-07-20 22:15:39 +0000556 self.charbuffer = "".join(lines[1:]) + self.charbuffer
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000557 if keepends:
558 line = line0withend
559 else:
560 line = line0withoutend
Walter Dörwald9fa09462005-01-10 12:01:39 +0000561 break
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000562 # we didn't get anything or this was our only try
Walter Dörwald9fa09462005-01-10 12:01:39 +0000563 if not data or size is not None:
Walter Dörwalde57d7b12004-12-21 22:24:00 +0000564 if line and not keepends:
565 line = line.splitlines(False)[0]
566 break
567 if readsize<8000:
568 readsize *= 2
569 return line
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000570
Walter Dörwald69652032004-09-07 20:24:22 +0000571 def readlines(self, sizehint=None, keepends=True):
Guido van Rossuma3277132000-04-11 15:37:43 +0000572
573 """ Read all lines available on the input stream
574 and return them as list of lines.
575
576 Line breaks are implemented using the codec's decoder
577 method and are included in the list entries.
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000578
Marc-André Lemburgd5948492004-02-26 15:22:17 +0000579 sizehint, if given, is ignored since there is no efficient
580 way to finding the true end-of-line.
Guido van Rossuma3277132000-04-11 15:37:43 +0000581
582 """
Walter Dörwald69652032004-09-07 20:24:22 +0000583 data = self.read()
Hye-Shik Changaf5c7cf2004-10-17 23:51:21 +0000584 return data.splitlines(keepends)
Guido van Rossum0612d842000-03-10 23:20:43 +0000585
586 def reset(self):
587
588 """ Resets the codec buffers used for keeping state.
589
590 Note that no stream repositioning should take place.
Thomas Wouters7e474022000-07-16 12:04:32 +0000591 This method is primarily intended to be able to recover
Guido van Rossum0612d842000-03-10 23:20:43 +0000592 from decoding errors.
593
594 """
Walter Dörwald729c31f2005-03-14 19:06:30 +0000595 self.bytebuffer = ""
596 self.charbuffer = u""
Martin v. Löwis4ed67382005-09-18 08:34:39 +0000597 self.linebuffer = None
Walter Dörwald729c31f2005-03-14 19:06:30 +0000598
Walter Dörwald71fd90d2005-03-14 19:25:41 +0000599 def seek(self, offset, whence=0):
Walter Dörwald729c31f2005-03-14 19:06:30 +0000600 """ Set the input stream's current position.
601
602 Resets the codec buffers used for keeping state.
603 """
604 self.reset()
605 self.stream.seek(offset, whence)
Guido van Rossum0612d842000-03-10 23:20:43 +0000606
Walter Dörwald4dbf1922002-11-06 16:53:44 +0000607 def next(self):
608
609 """ Return the next decoded line from the input stream."""
610 line = self.readline()
611 if line:
612 return line
613 raise StopIteration
614
615 def __iter__(self):
616 return self
617
Tim Peters30324a72001-05-15 17:19:16 +0000618 def __getattr__(self, name,
Guido van Rossum0612d842000-03-10 23:20:43 +0000619 getattr=getattr):
620
621 """ Inherit all other methods from the underlying stream.
622 """
Tim Peters30324a72001-05-15 17:19:16 +0000623 return getattr(self.stream, name)
Guido van Rossum0612d842000-03-10 23:20:43 +0000624
Georg Brandl8f99f812006-10-29 08:39:22 +0000625 def __enter__(self):
626 return self
627
628 def __exit__(self, type, value, tb):
629 self.stream.close()
630
Guido van Rossum0612d842000-03-10 23:20:43 +0000631###
632
633class StreamReaderWriter:
634
Fred Drake49fd1072000-04-13 14:11:21 +0000635 """ StreamReaderWriter instances allow wrapping streams which
636 work in both read and write modes.
637
638 The design is such that one can use the factory functions
Thomas Wouters7e474022000-07-16 12:04:32 +0000639 returned by the codec.lookup() function to construct the
Fred Drake49fd1072000-04-13 14:11:21 +0000640 instance.
641
642 """
Guido van Rossuma3277132000-04-11 15:37:43 +0000643 # Optional attributes set by the file wrappers below
644 encoding = 'unknown'
645
Tim Peters30324a72001-05-15 17:19:16 +0000646 def __init__(self, stream, Reader, Writer, errors='strict'):
Guido van Rossum0612d842000-03-10 23:20:43 +0000647
648 """ Creates a StreamReaderWriter instance.
649
650 stream must be a Stream-like object.
651
652 Reader, Writer must be factory functions or classes
653 providing the StreamReader, StreamWriter interface resp.
654
655 Error handling is done in the same way as defined for the
656 StreamWriter/Readers.
657
658 """
659 self.stream = stream
660 self.reader = Reader(stream, errors)
661 self.writer = Writer(stream, errors)
662 self.errors = errors
663
Tim Peters30324a72001-05-15 17:19:16 +0000664 def read(self, size=-1):
Guido van Rossum0612d842000-03-10 23:20:43 +0000665
666 return self.reader.read(size)
667
Guido van Rossumd58c26f2000-05-01 16:17:32 +0000668 def readline(self, size=None):
Guido van Rossuma3277132000-04-11 15:37:43 +0000669
670 return self.reader.readline(size)
671
Guido van Rossumd58c26f2000-05-01 16:17:32 +0000672 def readlines(self, sizehint=None):
Guido van Rossuma3277132000-04-11 15:37:43 +0000673
674 return self.reader.readlines(sizehint)
675
Walter Dörwald4dbf1922002-11-06 16:53:44 +0000676 def next(self):
677
678 """ Return the next decoded line from the input stream."""
679 return self.reader.next()
680
681 def __iter__(self):
682 return self
683
Tim Peters30324a72001-05-15 17:19:16 +0000684 def write(self, data):
Guido van Rossum0612d842000-03-10 23:20:43 +0000685
686 return self.writer.write(data)
687
Tim Peters30324a72001-05-15 17:19:16 +0000688 def writelines(self, list):
Guido van Rossuma3277132000-04-11 15:37:43 +0000689
690 return self.writer.writelines(list)
691
Guido van Rossum0612d842000-03-10 23:20:43 +0000692 def reset(self):
693
694 self.reader.reset()
695 self.writer.reset()
696
Tim Peters30324a72001-05-15 17:19:16 +0000697 def __getattr__(self, name,
Guido van Rossum0612d842000-03-10 23:20:43 +0000698 getattr=getattr):
699
700 """ Inherit all other methods from the underlying stream.
701 """
Tim Peters30324a72001-05-15 17:19:16 +0000702 return getattr(self.stream, name)
Guido van Rossum0612d842000-03-10 23:20:43 +0000703
Georg Brandl8f99f812006-10-29 08:39:22 +0000704 # these are needed to make "with codecs.open(...)" work properly
705
706 def __enter__(self):
707 return self
708
709 def __exit__(self, type, value, tb):
710 self.stream.close()
711
Guido van Rossum0612d842000-03-10 23:20:43 +0000712###
713
714class StreamRecoder:
715
Fred Drake49fd1072000-04-13 14:11:21 +0000716 """ StreamRecoder instances provide a frontend - backend
717 view of encoding data.
718
719 They use the complete set of APIs returned by the
720 codecs.lookup() function to implement their task.
721
722 Data written to the stream is first decoded into an
723 intermediate format (which is dependent on the given codec
724 combination) and then written to the stream using an instance
725 of the provided Writer class.
726
727 In the other direction, data is read from the stream using a
728 Reader instance and then return encoded data to the caller.
729
730 """
Guido van Rossuma3277132000-04-11 15:37:43 +0000731 # Optional attributes set by the file wrappers below
732 data_encoding = 'unknown'
733 file_encoding = 'unknown'
734
Tim Peters30324a72001-05-15 17:19:16 +0000735 def __init__(self, stream, encode, decode, Reader, Writer,
736 errors='strict'):
Guido van Rossum0612d842000-03-10 23:20:43 +0000737
738 """ Creates a StreamRecoder instance which implements a two-way
739 conversion: encode and decode work on the frontend (the
Guido van Rossum1c89b0e2000-04-11 15:41:38 +0000740 input to .read() and output of .write()) while
Guido van Rossum0612d842000-03-10 23:20:43 +0000741 Reader and Writer work on the backend (reading and
Fred Drake908670c2000-03-17 15:42:11 +0000742 writing to the stream).
Guido van Rossum0612d842000-03-10 23:20:43 +0000743
744 You can use these objects to do transparent direct
745 recodings from e.g. latin-1 to utf-8 and back.
746
747 stream must be a file-like object.
748
749 encode, decode must adhere to the Codec interface, Reader,
750 Writer must be factory functions or classes providing the
751 StreamReader, StreamWriter interface resp.
752
753 encode and decode are needed for the frontend translation,
754 Reader and Writer for the backend translation. Unicode is
755 used as intermediate encoding.
756
757 Error handling is done in the same way as defined for the
758 StreamWriter/Readers.
759
760 """
761 self.stream = stream
762 self.encode = encode
763 self.decode = decode
764 self.reader = Reader(stream, errors)
765 self.writer = Writer(stream, errors)
766 self.errors = errors
767
Tim Peters30324a72001-05-15 17:19:16 +0000768 def read(self, size=-1):
Guido van Rossum0612d842000-03-10 23:20:43 +0000769
770 data = self.reader.read(size)
771 data, bytesencoded = self.encode(data, self.errors)
772 return data
773
Tim Peters30324a72001-05-15 17:19:16 +0000774 def readline(self, size=None):
Guido van Rossuma3277132000-04-11 15:37:43 +0000775
776 if size is None:
777 data = self.reader.readline()
778 else:
779 data = self.reader.readline(size)
780 data, bytesencoded = self.encode(data, self.errors)
781 return data
782
Tim Peters30324a72001-05-15 17:19:16 +0000783 def readlines(self, sizehint=None):
Guido van Rossuma3277132000-04-11 15:37:43 +0000784
Marc-André Lemburgd5948492004-02-26 15:22:17 +0000785 data = self.reader.read()
Guido van Rossuma3277132000-04-11 15:37:43 +0000786 data, bytesencoded = self.encode(data, self.errors)
787 return data.splitlines(1)
788
Walter Dörwald4dbf1922002-11-06 16:53:44 +0000789 def next(self):
790
791 """ Return the next decoded line from the input stream."""
Walter Dörwaldc5238b82005-09-01 11:56:53 +0000792 data = self.reader.next()
793 data, bytesencoded = self.encode(data, self.errors)
794 return data
Walter Dörwald4dbf1922002-11-06 16:53:44 +0000795
796 def __iter__(self):
797 return self
798
Tim Peters30324a72001-05-15 17:19:16 +0000799 def write(self, data):
Guido van Rossum0612d842000-03-10 23:20:43 +0000800
801 data, bytesdecoded = self.decode(data, self.errors)
802 return self.writer.write(data)
803
Tim Peters30324a72001-05-15 17:19:16 +0000804 def writelines(self, list):
Guido van Rossuma3277132000-04-11 15:37:43 +0000805
806 data = ''.join(list)
807 data, bytesdecoded = self.decode(data, self.errors)
808 return self.writer.write(data)
Guido van Rossum0612d842000-03-10 23:20:43 +0000809
810 def reset(self):
811
812 self.reader.reset()
813 self.writer.reset()
814
Tim Peters30324a72001-05-15 17:19:16 +0000815 def __getattr__(self, name,
Guido van Rossum0612d842000-03-10 23:20:43 +0000816 getattr=getattr):
817
818 """ Inherit all other methods from the underlying stream.
819 """
Tim Peters30324a72001-05-15 17:19:16 +0000820 return getattr(self.stream, name)
Guido van Rossum0612d842000-03-10 23:20:43 +0000821
Georg Brandl8f99f812006-10-29 08:39:22 +0000822 def __enter__(self):
823 return self
824
825 def __exit__(self, type, value, tb):
826 self.stream.close()
827
Guido van Rossum0612d842000-03-10 23:20:43 +0000828### Shortcuts
829
Marc-André Lemburg349a3d32000-06-21 21:21:04 +0000830def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
Guido van Rossum0612d842000-03-10 23:20:43 +0000831
832 """ Open an encoded file using the given mode and return
833 a wrapped version providing transparent encoding/decoding.
834
835 Note: The wrapped version will only accept the object format
836 defined by the codecs, i.e. Unicode objects for most builtin
Skip Montanaro9f5f9d92005-03-16 03:51:56 +0000837 codecs. Output is also codec dependent and will usually be
Guido van Rossum0612d842000-03-10 23:20:43 +0000838 Unicode as well.
839
Marc-André Lemburg349a3d32000-06-21 21:21:04 +0000840 Files are always opened in binary mode, even if no binary mode
Walter Dörwald7f3ed742003-02-02 23:08:27 +0000841 was specified. This is done to avoid data loss due to encodings
Marc-André Lemburg349a3d32000-06-21 21:21:04 +0000842 using 8-bit values. The default file mode is 'rb' meaning to
843 open the file in binary read mode.
844
Guido van Rossum0612d842000-03-10 23:20:43 +0000845 encoding specifies the encoding which is to be used for the
Walter Dörwald7f3ed742003-02-02 23:08:27 +0000846 file.
Guido van Rossum0612d842000-03-10 23:20:43 +0000847
848 errors may be given to define the error handling. It defaults
849 to 'strict' which causes ValueErrors to be raised in case an
850 encoding error occurs.
851
852 buffering has the same meaning as for the builtin open() API.
853 It defaults to line buffered.
854
Fred Drake49fd1072000-04-13 14:11:21 +0000855 The returned wrapped file object provides an extra attribute
856 .encoding which allows querying the used encoding. This
857 attribute is only available if an encoding was specified as
858 parameter.
859
Guido van Rossum0612d842000-03-10 23:20:43 +0000860 """
Florent Xiclunaf4b61862010-02-26 10:40:58 +0000861 if encoding is not None:
862 if 'U' in mode:
863 # No automatic conversion of '\n' is done on reading and writing
864 mode = mode.strip().replace('U', '')
865 if mode[:1] not in set('rwa'):
866 mode = 'r' + mode
867 if 'b' not in mode:
868 # Force opening of the file in binary mode
869 mode = mode + 'b'
Guido van Rossum0612d842000-03-10 23:20:43 +0000870 file = __builtin__.open(filename, mode, buffering)
871 if encoding is None:
872 return file
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000873 info = lookup(encoding)
874 srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
Guido van Rossuma3277132000-04-11 15:37:43 +0000875 # Add attributes to simplify introspection
876 srw.encoding = encoding
877 return srw
Guido van Rossum0612d842000-03-10 23:20:43 +0000878
Guido van Rossuma3277132000-04-11 15:37:43 +0000879def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
Guido van Rossum0612d842000-03-10 23:20:43 +0000880
881 """ Return a wrapped version of file which provides transparent
882 encoding translation.
883
884 Strings written to the wrapped file are interpreted according
Guido van Rossuma3277132000-04-11 15:37:43 +0000885 to the given data_encoding and then written to the original
886 file as string using file_encoding. The intermediate encoding
887 will usually be Unicode but depends on the specified codecs.
Guido van Rossum0612d842000-03-10 23:20:43 +0000888
Guido van Rossuma3277132000-04-11 15:37:43 +0000889 Strings are read from the file using file_encoding and then
890 passed back to the caller as string using data_encoding.
891
892 If file_encoding is not given, it defaults to data_encoding.
Guido van Rossum0612d842000-03-10 23:20:43 +0000893
894 errors may be given to define the error handling. It defaults
895 to 'strict' which causes ValueErrors to be raised in case an
896 encoding error occurs.
897
Fred Drake49fd1072000-04-13 14:11:21 +0000898 The returned wrapped file object provides two extra attributes
899 .data_encoding and .file_encoding which reflect the given
900 parameters of the same name. The attributes can be used for
901 introspection by Python programs.
902
Guido van Rossum0612d842000-03-10 23:20:43 +0000903 """
Guido van Rossuma3277132000-04-11 15:37:43 +0000904 if file_encoding is None:
905 file_encoding = data_encoding
Georg Brandl8f99f812006-10-29 08:39:22 +0000906 data_info = lookup(data_encoding)
907 file_info = lookup(file_encoding)
908 sr = StreamRecoder(file, data_info.encode, data_info.decode,
909 file_info.streamreader, file_info.streamwriter, errors)
Guido van Rossuma3277132000-04-11 15:37:43 +0000910 # Add attributes to simplify introspection
911 sr.data_encoding = data_encoding
912 sr.file_encoding = file_encoding
913 return sr
Guido van Rossum0612d842000-03-10 23:20:43 +0000914
Marc-André Lemburgaa32c5a2001-09-19 11:24:48 +0000915### Helpers for codec lookup
916
917def getencoder(encoding):
918
919 """ Lookup up the codec for the given encoding and return
920 its encoder function.
921
922 Raises a LookupError in case the encoding cannot be found.
923
924 """
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000925 return lookup(encoding).encode
Marc-André Lemburgaa32c5a2001-09-19 11:24:48 +0000926
927def getdecoder(encoding):
928
929 """ Lookup up the codec for the given encoding and return
930 its decoder function.
931
932 Raises a LookupError in case the encoding cannot be found.
933
934 """
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000935 return lookup(encoding).decode
936
937def getincrementalencoder(encoding):
938
939 """ Lookup up the codec for the given encoding and return
940 its IncrementalEncoder class or factory function.
941
942 Raises a LookupError in case the encoding cannot be found
943 or the codecs doesn't provide an incremental encoder.
944
945 """
946 encoder = lookup(encoding).incrementalencoder
947 if encoder is None:
948 raise LookupError(encoding)
949 return encoder
950
951def getincrementaldecoder(encoding):
952
953 """ Lookup up the codec for the given encoding and return
954 its IncrementalDecoder class or factory function.
955
956 Raises a LookupError in case the encoding cannot be found
957 or the codecs doesn't provide an incremental decoder.
958
959 """
960 decoder = lookup(encoding).incrementaldecoder
961 if decoder is None:
962 raise LookupError(encoding)
963 return decoder
Marc-André Lemburgaa32c5a2001-09-19 11:24:48 +0000964
965def getreader(encoding):
966
967 """ Lookup up the codec for the given encoding and return
968 its StreamReader class or factory function.
969
970 Raises a LookupError in case the encoding cannot be found.
971
972 """
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000973 return lookup(encoding).streamreader
Marc-André Lemburgaa32c5a2001-09-19 11:24:48 +0000974
975def getwriter(encoding):
976
977 """ Lookup up the codec for the given encoding and return
978 its StreamWriter class or factory function.
979
980 Raises a LookupError in case the encoding cannot be found.
981
982 """
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000983 return lookup(encoding).streamwriter
984
985def iterencode(iterator, encoding, errors='strict', **kwargs):
986 """
987 Encoding iterator.
988
989 Encodes the input strings from the iterator using a IncrementalEncoder.
990
991 errors and kwargs are passed through to the IncrementalEncoder
992 constructor.
993 """
994 encoder = getincrementalencoder(encoding)(errors, **kwargs)
995 for input in iterator:
996 output = encoder.encode(input)
997 if output:
998 yield output
999 output = encoder.encode("", True)
1000 if output:
1001 yield output
1002
1003def iterdecode(iterator, encoding, errors='strict', **kwargs):
1004 """
1005 Decoding iterator.
1006
1007 Decodes the input strings from the iterator using a IncrementalDecoder.
1008
1009 errors and kwargs are passed through to the IncrementalDecoder
1010 constructor.
1011 """
1012 decoder = getincrementaldecoder(encoding)(errors, **kwargs)
1013 for input in iterator:
1014 output = decoder.decode(input)
1015 if output:
1016 yield output
1017 output = decoder.decode("", True)
1018 if output:
1019 yield output
Marc-André Lemburgaa32c5a2001-09-19 11:24:48 +00001020
Marc-André Lemburga866df82001-01-03 21:29:14 +00001021### Helpers for charmap-based codecs
1022
1023def make_identity_dict(rng):
1024
1025 """ make_identity_dict(rng) -> dict
1026
1027 Return a dictionary where elements of the rng sequence are
1028 mapped to themselves.
Tim Peters88869f92001-01-14 23:36:06 +00001029
Marc-André Lemburga866df82001-01-03 21:29:14 +00001030 """
1031 res = {}
1032 for i in rng:
1033 res[i]=i
1034 return res
1035
Marc-André Lemburg716cf912001-05-16 09:41:45 +00001036def make_encoding_map(decoding_map):
1037
1038 """ Creates an encoding map from a decoding map.
1039
Walter Dörwald7f3ed742003-02-02 23:08:27 +00001040 If a target mapping in the decoding map occurs multiple
Marc-André Lemburg716cf912001-05-16 09:41:45 +00001041 times, then that target is mapped to None (undefined mapping),
1042 causing an exception when encountered by the charmap codec
1043 during translation.
1044
1045 One example where this happens is cp875.py which decodes
1046 multiple character to \u001a.
1047
1048 """
1049 m = {}
1050 for k,v in decoding_map.items():
Raymond Hettinger54f02222002-06-01 14:18:47 +00001051 if not v in m:
Marc-André Lemburg716cf912001-05-16 09:41:45 +00001052 m[v] = k
1053 else:
1054 m[v] = None
1055 return m
Tim Peters3a2ab1a2001-05-29 06:06:54 +00001056
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001057### error handlers
1058
Martin v. Löwise2713be2005-03-08 15:03:08 +00001059try:
1060 strict_errors = lookup_error("strict")
1061 ignore_errors = lookup_error("ignore")
1062 replace_errors = lookup_error("replace")
1063 xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
1064 backslashreplace_errors = lookup_error("backslashreplace")
1065except LookupError:
1066 # In --disable-unicode builds, these error handler are missing
1067 strict_errors = None
1068 ignore_errors = None
1069 replace_errors = None
1070 xmlcharrefreplace_errors = None
1071 backslashreplace_errors = None
Walter Dörwald3aeb6322002-09-02 13:14:32 +00001072
Martin v. Löwis6cd441d2001-07-31 08:54:55 +00001073# Tell modulefinder that using codecs probably needs the encodings
1074# package
1075_false = 0
1076if _false:
1077 import encodings
1078
Guido van Rossum0612d842000-03-10 23:20:43 +00001079### Tests
Guido van Rossum1c89b0e2000-04-11 15:41:38 +00001080
Guido van Rossum0612d842000-03-10 23:20:43 +00001081if __name__ == '__main__':
1082
Guido van Rossuma3277132000-04-11 15:37:43 +00001083 # Make stdout translate Latin-1 output into UTF-8 output
1084 sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
Guido van Rossum1c89b0e2000-04-11 15:41:38 +00001085
Guido van Rossuma3277132000-04-11 15:37:43 +00001086 # Have stdin translate Latin-1 input into UTF-8 input
1087 sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')