Lib/codecs.py - platform/external/python/cpython2 - Gitiles

 """ codecs -- Python Codec Registry, API and helpers.


 Written by Marc-Andre Lemburg (mal@lemburg.com).

 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

 """#"

 import struct,types,__builtin__

 ### Registry and builtin stateless codec functions

 try:
     from _codecs import *
 except ImportError,why:
     raise SystemError,\
           'Failed to load the builtin codecs: %s' % why

 ### Constants

 #
 # Byte Order Mark (BOM) and its possible values (BOM_BE, BOM_LE)
 #
 BOM = struct.pack('=H',0xFEFF)
 #
 BOM_BE = BOM32_BE = '\376\377'
 #	corresponds to Unicode U+FEFF in UTF-16 on big endian
 #	platforms == ZERO WIDTH NO-BREAK SPACE
 BOM_LE = BOM32_LE = '\377\376'
 #	corresponds to Unicode U+FFFE in UTF-16 on little endian
 #	platforms == defined as being an illegal Unicode character

 #
 # 64-bit Byte Order Marks
 #
 BOM64_BE = '\000\000\376\377'
 #	corresponds to Unicode U+0000FEFF in UCS-4
 BOM64_LE = '\377\376\000\000'
 #	corresponds to Unicode U+0000FFFE in UCS-4


 ### Codec base classes (defining the API)

 class Codec:

     """ Defines the interface for stateless encoders/decoders.

         The .encode()/.decode() methods may implement different error
         handling schemes by providing the errors argument. These
         string values are defined:

          'strict' - raise a ValueError error (or a subclass)
          'ignore' - ignore the character and continue with the next
          'replace' - replace with a suitable replacement character;
                     Python will use the official U+FFFD REPLACEMENT
                     CHARACTER for the builtin Unicode codecs.

     """
     def encode(self,input,errors='strict'):

         """ Encodes the object input and returns a tuple (output
             object, length consumed).

             errors defines the error handling to apply. It defaults to
             'strict' handling.

             The method may not store state in the Codec instance. Use
             StreamCodec for codecs which have to keep state in order to
             make encoding/decoding efficient.

             The encoder must be able to handle zero length input and
             return an empty object of the output object type in this
             situation.

         """
         raise NotImplementedError

     def decode(self,input,errors='strict'):

         """ Decodes the object input and returns a tuple (output
             object, length consumed).

             input must be an object which provides the bf_getreadbuf
             buffer slot. Python strings, buffer objects and memory
             mapped files are examples of objects providing this slot.

             errors defines the error handling to apply. It defaults to
             'strict' handling.

             The method may not store state in the Codec instance. Use
             StreamCodec for codecs which have to keep state in order to
             make encoding/decoding efficient.

             The decoder must be able to handle zero length input and
             return an empty object of the output object type in this
             situation.

         """
         raise NotImplementedError

 #
 # The StreamWriter and StreamReader class provide generic working
 # interfaces which can be used to implement new encodings submodules
 # very easily. See encodings/utf_8.py for an example on how this is
 # done.
 #

 class StreamWriter(Codec):

     def __init__(self,stream,errors='strict'):

         """ Creates a StreamWriter instance.

             stream must be a file-like object open for writing
             (binary) data.

             The StreamWriter may implement different error handling
             schemes by providing the errors keyword argument. These
             parameters are defined:

              'strict' - raise a ValueError (or a subclass)
              'ignore' - ignore the character and continue with the next
              'replace'- replace with a suitable replacement character

         """
         self.stream = stream
         self.errors = errors

     def write(self, object):

         """ Writes the object's contents encoded to self.stream.
         """
         data, consumed = self.encode(object,self.errors)
         self.stream.write(data)

     def writelines(self, list):

         """ Writes the concatenated list of strings to the stream
             using .write().
         """
         self.write(''.join(list))

     def reset(self):

         """ Flushes and resets the codec buffers used for keeping state.

             Calling this method should ensure that the data on the
             output is put into a clean state, that allows appending
             of new fresh data without having to rescan the whole
             stream to recover state.

         """
         pass

     def __getattr__(self,name,

                     getattr=getattr):

         """ Inherit all other methods from the underlying stream.
         """
         return getattr(self.stream,name)

 ###

 class StreamReader(Codec):

     def __init__(self,stream,errors='strict'):

         """ Creates a StreamReader instance.

             stream must be a file-like object open for reading
             (binary) data.

             The StreamReader may implement different error handling
             schemes by providing the errors keyword argument. These
             parameters are defined:

              'strict' - raise a ValueError (or a subclass)
              'ignore' - ignore the character and continue with the next
              'replace'- replace with a suitable replacement character;

         """
         self.stream = stream
         self.errors = errors

     def read(self, size=-1):

         """ Decodes data from the stream self.stream and returns the
             resulting object.

             size indicates the approximate maximum number of bytes to
             read from the stream for decoding purposes. The decoder
             can modify this setting as appropriate. The default value
             -1 indicates to read and decode as much as possible.  size
             is intended to prevent having to decode huge files in one
             step.

             The method should use a greedy read strategy meaning that
             it should read as much data as is allowed within the
             definition of the encoding and the given size, e.g.  if
             optional encoding endings or state markers are available
             on the stream, these should be read too.

         """
         # Unsliced reading:
         if size < 0:
             return self.decode(self.stream.read())[0]

         # Sliced reading:
         read = self.stream.read
         decode = self.decode
         data = read(size)
         i = 0
         while 1:
             try:
                 object, decodedbytes = decode(data)
             except ValueError,why:
                 # This method is slow but should work under pretty much
                 # all conditions; at most 10 tries are made
                 i = i + 1
                 newdata = read(1)
                 if not newdata or i > 10:
                     raise
                 data = data + newdata
             else:
                 return object

     def readline(self, size=None):

         """ Read one line from the input stream and return the
             decoded data.

             Note: Unlike the .readlines() method, this method inherits
             the line breaking knowledge from the underlying stream's
             .readline() method -- there is currently no support for
             line breaking using the codec decoder due to lack of line
             buffering. Sublcasses should however, if possible, try to
             implement this method using their own knowledge of line
             breaking.

             size, if given, is passed as size argument to the stream's
             .readline() method.

         """
         if size is None:
             line = self.stream.readline()
         else:
             line = self.stream.readline(size)
         return self.decode(line)[0]


     def readlines(self, sizehint=0):

         """ Read all lines available on the input stream
             and return them as list of lines.

             Line breaks are implemented using the codec's decoder
             method and are included in the list entries.

             sizehint, if given, is passed as size argument to the
             stream's .read() method.

         """
         if sizehint is None:
             data = self.stream.read()
         else:
             data = self.stream.read(sizehint)
         return self.decode(data)[0].splitlines(1)

     def reset(self):

         """ Resets the codec buffers used for keeping state.

             Note that no stream repositioning should take place.
             This method is primarely intended to be able to recover
             from decoding errors.

         """
         pass

     def __getattr__(self,name,

                     getattr=getattr):

         """ Inherit all other methods from the underlying stream.
         """
         return getattr(self.stream,name)

 ###

 class StreamReaderWriter:

     """ StreamReaderWriter instances allow wrapping streams which
         work in both read and write modes.

         The design is such that one can use the factory functions
         returned by the codec.lookup() function to contruct the
         instance.

     """
     # Optional attributes set by the file wrappers below
     encoding = 'unknown'

     def __init__(self,stream,Reader,Writer,errors='strict'):

         """ Creates a StreamReaderWriter instance.

             stream must be a Stream-like object.

             Reader, Writer must be factory functions or classes
             providing the StreamReader, StreamWriter interface resp.

             Error handling is done in the same way as defined for the
             StreamWriter/Readers.

         """
         self.stream = stream
         self.reader = Reader(stream, errors)
         self.writer = Writer(stream, errors)
         self.errors = errors

     def read(self,size=-1):

         return self.reader.read(size)

     def readline(self, size=None):

         return self.reader.readline(size)

     def readlines(self, sizehint=None):

         return self.reader.readlines(sizehint)

     def write(self,data):

         return self.writer.write(data)

     def writelines(self,list):

         return self.writer.writelines(list)

     def reset(self):

         self.reader.reset()
         self.writer.reset()

     def __getattr__(self,name,

                     getattr=getattr):

         """ Inherit all other methods from the underlying stream.
         """
         return getattr(self.stream,name)

 ###

 class StreamRecoder:

     """ StreamRecoder instances provide a frontend - backend
         view of encoding data.

         They use the complete set of APIs returned by the
         codecs.lookup() function to implement their task.

         Data written to the stream is first decoded into an
         intermediate format (which is dependent on the given codec
         combination) and then written to the stream using an instance
         of the provided Writer class.

         In the other direction, data is read from the stream using a
         Reader instance and then return encoded data to the caller.

     """
     # Optional attributes set by the file wrappers below
     data_encoding = 'unknown'
     file_encoding = 'unknown'

     def __init__(self,stream,encode,decode,Reader,Writer,errors='strict'):

         """ Creates a StreamRecoder instance which implements a two-way
             conversion: encode and decode work on the frontend (the
             input to .read() and output of .write()) while
             Reader and Writer work on the backend (reading and
             writing to the stream).

             You can use these objects to do transparent direct
             recodings from e.g. latin-1 to utf-8 and back.

             stream must be a file-like object.

             encode, decode must adhere to the Codec interface, Reader,
             Writer must be factory functions or classes providing the
             StreamReader, StreamWriter interface resp.

             encode and decode are needed for the frontend translation,
             Reader and Writer for the backend translation. Unicode is
             used as intermediate encoding.

             Error handling is done in the same way as defined for the
             StreamWriter/Readers.

         """
         self.stream = stream
         self.encode = encode
         self.decode = decode
         self.reader = Reader(stream, errors)
         self.writer = Writer(stream, errors)
         self.errors = errors

     def read(self,size=-1):

         data = self.reader.read(size)
         data, bytesencoded = self.encode(data, self.errors)
         return data

     def readline(self,size=None):

         if size is None:
             data = self.reader.readline()
         else:
             data = self.reader.readline(size)
         data, bytesencoded = self.encode(data, self.errors)
         return data

     def readlines(self,sizehint=None):

         if sizehint is None:
             data = self.reader.read()
         else:
             data = self.reader.read(sizehint)
         data, bytesencoded = self.encode(data, self.errors)
         return data.splitlines(1)

     def write(self,data):

         data, bytesdecoded = self.decode(data, self.errors)
         return self.writer.write(data)

     def writelines(self,list):

         data = ''.join(list)
         data, bytesdecoded = self.decode(data, self.errors)
         return self.writer.write(data)

     def reset(self):

         self.reader.reset()
         self.writer.reset()

     def __getattr__(self,name,

                     getattr=getattr):

         """ Inherit all other methods from the underlying stream.
         """
         return getattr(self.stream,name)

 ### Shortcuts

 def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):

     """ Open an encoded file using the given mode and return
         a wrapped version providing transparent encoding/decoding.

         Note: The wrapped version will only accept the object format
         defined by the codecs, i.e. Unicode objects for most builtin
         codecs. Output is also codec dependent and will usually by
         Unicode as well.

         Files are always opened in binary mode, even if no binary mode
         was specified. Thisis done to avoid data loss due to encodings
         using 8-bit values. The default file mode is 'rb' meaning to
         open the file in binary read mode.

         encoding specifies the encoding which is to be used for the
         the file.

         errors may be given to define the error handling. It defaults
         to 'strict' which causes ValueErrors to be raised in case an
         encoding error occurs.

         buffering has the same meaning as for the builtin open() API.
         It defaults to line buffered.

         The returned wrapped file object provides an extra attribute
         .encoding which allows querying the used encoding. This
         attribute is only available if an encoding was specified as
         parameter.

     """
     if encoding is not None and \
        'b' not in mode:
         # Force opening of the file in binary mode
         mode = mode + 'b'
     file = __builtin__.open(filename, mode, buffering)
     if encoding is None:
         return file
     (e,d,sr,sw) = lookup(encoding)
     srw = StreamReaderWriter(file, sr, sw, errors)
     # Add attributes to simplify introspection
     srw.encoding = encoding
     return srw

 def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):

     """ Return a wrapped version of file which provides transparent
         encoding translation.

         Strings written to the wrapped file are interpreted according
         to the given data_encoding and then written to the original
         file as string using file_encoding. The intermediate encoding
         will usually be Unicode but depends on the specified codecs.

         Strings are read from the file using file_encoding and then
         passed back to the caller as string using data_encoding.

         If file_encoding is not given, it defaults to data_encoding.

         errors may be given to define the error handling. It defaults
         to 'strict' which causes ValueErrors to be raised in case an
         encoding error occurs.

         data_encoding and file_encoding are added to the wrapped file
         object as attributes .data_encoding and .file_encoding resp.

         The returned wrapped file object provides two extra attributes
         .data_encoding and .file_encoding which reflect the given
         parameters of the same name. The attributes can be used for
         introspection by Python programs.

     """
     if file_encoding is None:
         file_encoding = data_encoding
     encode, decode = lookup(data_encoding)[:2]
     Reader, Writer = lookup(file_encoding)[2:]
     sr = StreamRecoder(file,
                        encode,decode,Reader,Writer,
                        errors)
     # Add attributes to simplify introspection
     sr.data_encoding = data_encoding
     sr.file_encoding = file_encoding
     return sr

 ### Tests

 if __name__ == '__main__':

     import sys

     # Make stdout translate Latin-1 output into UTF-8 output
     sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')

     # Have stdin translate Latin-1 input into UTF-8 input
     sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
	""" codecs -- Python Codec Registry, API and helpers.


	Written by Marc-Andre Lemburg (mal@lemburg.com).

	(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

	"""#"

	import struct,types,__builtin__

	### Registry and builtin stateless codec functions

	try:
	from _codecs import *
	except ImportError,why:
	raise SystemError,\
	'Failed to load the builtin codecs: %s' % why

	### Constants

	#
	# Byte Order Mark (BOM) and its possible values (BOM_BE, BOM_LE)
	#
	BOM = struct.pack('=H',0xFEFF)
	#
	BOM_BE = BOM32_BE = '\376\377'
	# corresponds to Unicode U+FEFF in UTF-16 on big endian
	# platforms == ZERO WIDTH NO-BREAK SPACE
	BOM_LE = BOM32_LE = '\377\376'
	# corresponds to Unicode U+FFFE in UTF-16 on little endian
	# platforms == defined as being an illegal Unicode character

	#
	# 64-bit Byte Order Marks
	#
	BOM64_BE = '\000\000\376\377'
	# corresponds to Unicode U+0000FEFF in UCS-4
	BOM64_LE = '\377\376\000\000'
	# corresponds to Unicode U+0000FFFE in UCS-4


	### Codec base classes (defining the API)

	class Codec:

	""" Defines the interface for stateless encoders/decoders.

	The .encode()/.decode() methods may implement different error
	handling schemes by providing the errors argument. These
	string values are defined:

	'strict' - raise a ValueError error (or a subclass)
	'ignore' - ignore the character and continue with the next
	'replace' - replace with a suitable replacement character;
	Python will use the official U+FFFD REPLACEMENT
	CHARACTER for the builtin Unicode codecs.

	"""
	def encode(self,input,errors='strict'):

	""" Encodes the object input and returns a tuple (output
	object, length consumed).

	errors defines the error handling to apply. It defaults to
	'strict' handling.

	The method may not store state in the Codec instance. Use
	StreamCodec for codecs which have to keep state in order to
	make encoding/decoding efficient.

	The encoder must be able to handle zero length input and
	return an empty object of the output object type in this
	situation.

	"""
	raise NotImplementedError

	def decode(self,input,errors='strict'):

	""" Decodes the object input and returns a tuple (output
	object, length consumed).

	input must be an object which provides the bf_getreadbuf
	buffer slot. Python strings, buffer objects and memory
	mapped files are examples of objects providing this slot.

	errors defines the error handling to apply. It defaults to
	'strict' handling.

	The method may not store state in the Codec instance. Use
	StreamCodec for codecs which have to keep state in order to
	make encoding/decoding efficient.

	The decoder must be able to handle zero length input and
	return an empty object of the output object type in this
	situation.

	"""
	raise NotImplementedError

	#
	# The StreamWriter and StreamReader class provide generic working
	# interfaces which can be used to implement new encodings submodules
	# very easily. See encodings/utf_8.py for an example on how this is
	# done.
	#

	class StreamWriter(Codec):

	def __init__(self,stream,errors='strict'):

	""" Creates a StreamWriter instance.

	stream must be a file-like object open for writing
	(binary) data.

	The StreamWriter may implement different error handling
	schemes by providing the errors keyword argument. These
	parameters are defined:

	'strict' - raise a ValueError (or a subclass)
	'ignore' - ignore the character and continue with the next
	'replace'- replace with a suitable replacement character

	"""
	self.stream = stream
	self.errors = errors

	def write(self, object):

	""" Writes the object's contents encoded to self.stream.
	"""
	data, consumed = self.encode(object,self.errors)
	self.stream.write(data)

	def writelines(self, list):

	""" Writes the concatenated list of strings to the stream
	using .write().
	"""
	self.write(''.join(list))

	def reset(self):

	""" Flushes and resets the codec buffers used for keeping state.

	Calling this method should ensure that the data on the
	output is put into a clean state, that allows appending
	of new fresh data without having to rescan the whole
	stream to recover state.

	"""
	pass

	def __getattr__(self,name,

	getattr=getattr):

	""" Inherit all other methods from the underlying stream.
	"""
	return getattr(self.stream,name)

	###

	class StreamReader(Codec):

	def __init__(self,stream,errors='strict'):

	""" Creates a StreamReader instance.

	stream must be a file-like object open for reading
	(binary) data.

	The StreamReader may implement different error handling
	schemes by providing the errors keyword argument. These
	parameters are defined:

	'strict' - raise a ValueError (or a subclass)
	'ignore' - ignore the character and continue with the next
	'replace'- replace with a suitable replacement character;

	"""
	self.stream = stream
	self.errors = errors

	def read(self, size=-1):

	""" Decodes data from the stream self.stream and returns the
	resulting object.

	size indicates the approximate maximum number of bytes to
	read from the stream for decoding purposes. The decoder
	can modify this setting as appropriate. The default value
	-1 indicates to read and decode as much as possible. size
	is intended to prevent having to decode huge files in one
	step.

	The method should use a greedy read strategy meaning that
	it should read as much data as is allowed within the
	definition of the encoding and the given size, e.g. if
	optional encoding endings or state markers are available
	on the stream, these should be read too.

	"""
	# Unsliced reading:
	if size < 0:
	return self.decode(self.stream.read())[0]

	# Sliced reading:
	read = self.stream.read
	decode = self.decode
	data = read(size)
	i = 0
	while 1:
	try:
	object, decodedbytes = decode(data)
	except ValueError,why:
	# This method is slow but should work under pretty much
	# all conditions; at most 10 tries are made
	i = i + 1
	newdata = read(1)
	if not newdata or i > 10:
	raise
	data = data + newdata
	else:
	return object

	def readline(self, size=None):

	""" Read one line from the input stream and return the
	decoded data.

	Note: Unlike the .readlines() method, this method inherits
	the line breaking knowledge from the underlying stream's
	.readline() method -- there is currently no support for
	line breaking using the codec decoder due to lack of line
	buffering. Sublcasses should however, if possible, try to
	implement this method using their own knowledge of line
	breaking.

	size, if given, is passed as size argument to the stream's
	.readline() method.

	"""
	if size is None:
	line = self.stream.readline()
	else:
	line = self.stream.readline(size)
	return self.decode(line)[0]


	def readlines(self, sizehint=0):

	""" Read all lines available on the input stream
	and return them as list of lines.

	Line breaks are implemented using the codec's decoder
	method and are included in the list entries.

	sizehint, if given, is passed as size argument to the
	stream's .read() method.

	"""
	if sizehint is None:
	data = self.stream.read()
	else:
	data = self.stream.read(sizehint)
	return self.decode(data)[0].splitlines(1)

	def reset(self):

	""" Resets the codec buffers used for keeping state.

	Note that no stream repositioning should take place.
	This method is primarely intended to be able to recover
	from decoding errors.

	"""
	pass

	def __getattr__(self,name,

	getattr=getattr):

	""" Inherit all other methods from the underlying stream.
	"""
	return getattr(self.stream,name)

	###

	class StreamReaderWriter:

	""" StreamReaderWriter instances allow wrapping streams which
	work in both read and write modes.

	The design is such that one can use the factory functions
	returned by the codec.lookup() function to contruct the
	instance.

	"""
	# Optional attributes set by the file wrappers below
	encoding = 'unknown'

	def __init__(self,stream,Reader,Writer,errors='strict'):

	""" Creates a StreamReaderWriter instance.

	stream must be a Stream-like object.

	Reader, Writer must be factory functions or classes
	providing the StreamReader, StreamWriter interface resp.

	Error handling is done in the same way as defined for the
	StreamWriter/Readers.

	"""
	self.stream = stream
	self.reader = Reader(stream, errors)
	self.writer = Writer(stream, errors)
	self.errors = errors

	def read(self,size=-1):

	return self.reader.read(size)

	def readline(self, size=None):

	return self.reader.readline(size)

	def readlines(self, sizehint=None):

	return self.reader.readlines(sizehint)

	def write(self,data):

	return self.writer.write(data)

	def writelines(self,list):

	return self.writer.writelines(list)

	def reset(self):

	self.reader.reset()
	self.writer.reset()

	def __getattr__(self,name,

	getattr=getattr):

	""" Inherit all other methods from the underlying stream.
	"""
	return getattr(self.stream,name)

	###

	class StreamRecoder:

	""" StreamRecoder instances provide a frontend - backend
	view of encoding data.

	They use the complete set of APIs returned by the
	codecs.lookup() function to implement their task.

	Data written to the stream is first decoded into an
	intermediate format (which is dependent on the given codec
	combination) and then written to the stream using an instance
	of the provided Writer class.

	In the other direction, data is read from the stream using a
	Reader instance and then return encoded data to the caller.

	"""
	# Optional attributes set by the file wrappers below
	data_encoding = 'unknown'
	file_encoding = 'unknown'

	def __init__(self,stream,encode,decode,Reader,Writer,errors='strict'):

	""" Creates a StreamRecoder instance which implements a two-way
	conversion: encode and decode work on the frontend (the
	input to .read() and output of .write()) while
	Reader and Writer work on the backend (reading and
	writing to the stream).

	You can use these objects to do transparent direct
	recodings from e.g. latin-1 to utf-8 and back.

	stream must be a file-like object.

	encode, decode must adhere to the Codec interface, Reader,
	Writer must be factory functions or classes providing the
	StreamReader, StreamWriter interface resp.

	encode and decode are needed for the frontend translation,
	Reader and Writer for the backend translation. Unicode is
	used as intermediate encoding.

	Error handling is done in the same way as defined for the
	StreamWriter/Readers.

	"""
	self.stream = stream
	self.encode = encode
	self.decode = decode
	self.reader = Reader(stream, errors)
	self.writer = Writer(stream, errors)
	self.errors = errors

	def read(self,size=-1):

	data = self.reader.read(size)
	data, bytesencoded = self.encode(data, self.errors)
	return data

	def readline(self,size=None):

	if size is None:
	data = self.reader.readline()
	else:
	data = self.reader.readline(size)
	data, bytesencoded = self.encode(data, self.errors)
	return data

	def readlines(self,sizehint=None):

	if sizehint is None:
	data = self.reader.read()
	else:
	data = self.reader.read(sizehint)
	data, bytesencoded = self.encode(data, self.errors)
	return data.splitlines(1)

	def write(self,data):

	data, bytesdecoded = self.decode(data, self.errors)
	return self.writer.write(data)

	def writelines(self,list):

	data = ''.join(list)
	data, bytesdecoded = self.decode(data, self.errors)
	return self.writer.write(data)

	def reset(self):

	self.reader.reset()
	self.writer.reset()

	def __getattr__(self,name,

	getattr=getattr):

	""" Inherit all other methods from the underlying stream.
	"""
	return getattr(self.stream,name)

	### Shortcuts

	def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):

	""" Open an encoded file using the given mode and return
	a wrapped version providing transparent encoding/decoding.

	Note: The wrapped version will only accept the object format
	defined by the codecs, i.e. Unicode objects for most builtin
	codecs. Output is also codec dependent and will usually by
	Unicode as well.

	Files are always opened in binary mode, even if no binary mode
	was specified. Thisis done to avoid data loss due to encodings
	using 8-bit values. The default file mode is 'rb' meaning to
	open the file in binary read mode.

	encoding specifies the encoding which is to be used for the
	the file.

	errors may be given to define the error handling. It defaults
	to 'strict' which causes ValueErrors to be raised in case an
	encoding error occurs.

	buffering has the same meaning as for the builtin open() API.
	It defaults to line buffered.

	The returned wrapped file object provides an extra attribute
	.encoding which allows querying the used encoding. This
	attribute is only available if an encoding was specified as
	parameter.

	"""
	if encoding is not None and \
	'b' not in mode:
	# Force opening of the file in binary mode
	mode = mode + 'b'
	file = __builtin__.open(filename, mode, buffering)
	if encoding is None:
	return file
	(e,d,sr,sw) = lookup(encoding)
	srw = StreamReaderWriter(file, sr, sw, errors)
	# Add attributes to simplify introspection
	srw.encoding = encoding
	return srw

	def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):

	""" Return a wrapped version of file which provides transparent
	encoding translation.

	Strings written to the wrapped file are interpreted according
	to the given data_encoding and then written to the original
	file as string using file_encoding. The intermediate encoding
	will usually be Unicode but depends on the specified codecs.

	Strings are read from the file using file_encoding and then
	passed back to the caller as string using data_encoding.

	If file_encoding is not given, it defaults to data_encoding.

	errors may be given to define the error handling. It defaults
	to 'strict' which causes ValueErrors to be raised in case an
	encoding error occurs.

	data_encoding and file_encoding are added to the wrapped file
	object as attributes .data_encoding and .file_encoding resp.

	The returned wrapped file object provides two extra attributes
	.data_encoding and .file_encoding which reflect the given
	parameters of the same name. The attributes can be used for
	introspection by Python programs.

	"""
	if file_encoding is None:
	file_encoding = data_encoding
	encode, decode = lookup(data_encoding)[:2]
	Reader, Writer = lookup(file_encoding)[2:]
	sr = StreamRecoder(file,
	encode,decode,Reader,Writer,
	errors)
	# Add attributes to simplify introspection
	sr.data_encoding = data_encoding
	sr.file_encoding = file_encoding
	return sr

	### Tests

	if __name__ == '__main__':

	import sys

	# Make stdout translate Latin-1 output into UTF-8 output
	sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')

	# Have stdin translate Latin-1 input into UTF-8 input
	sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')