Issue 19548: update codecs module documentation - clarified the distinction between text encodings and other codecs - clarified relationship with builtin open and the io module - consolidated documentation of error handlers into one section - clarified type constraints of some behaviours - added tests for some of the new statements in the docs

commit: b9fdb7a452c2b6f7a628118b5f695bd061b62cc8 [log] [tgz]
author: Nick Coghlan <ncoghlan@gmail.com> Wed Jan 07 00:22:00 2015 +1000
committer: Nick Coghlan <ncoghlan@gmail.com> Wed Jan 07 00:22:00 2015 +1000
tree: 724c4a77f635adc95e78674ce5f24e79aa391fae
parent: fcfed1991382f9697df574fae4115a9f815adca0 [diff] [blame]
diff --git a/Lib/codecs.py b/Lib/codecs.py
index ba9c073..bca3ef3 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py

@@ -346,8 +346,7 @@
 
         """ Creates a StreamWriter instance.
 
-            stream must be a file-like object open for writing
-            (binary) data.
+            stream must be a file-like object open for writing.
 
             The StreamWriter may use different error handling
             schemes by providing the errors keyword argument. These
@@ -421,8 +420,7 @@
 
         """ Creates a StreamReader instance.
 
-            stream must be a file-like object open for reading
-            (binary) data.
+            stream must be a file-like object open for reading.
 
             The StreamReader may use different error handling
             schemes by providing the errors keyword argument. These
@@ -450,13 +448,12 @@
         """ Decodes data from the stream self.stream and returns the
             resulting object.
 
-            chars indicates the number of characters to read from the
-            stream. read() will never return more than chars
-            characters, but it might return less, if there are not enough
-            characters available.
+            chars indicates the number of decoded code points or bytes to
+            return. read() will never return more data than requested,
+            but it might return less, if there is not enough available.
 
-            size indicates the approximate maximum number of bytes to
-            read from the stream for decoding purposes. The decoder
+            size indicates the approximate maximum number of decoded
+            bytes or code points to read for decoding. The decoder
             can modify this setting as appropriate. The default value
             -1 indicates to read and decode as much as possible.  size
             is intended to prevent having to decode huge files in one
@@ -467,7 +464,7 @@
             will be returned, the rest of the input will be kept until the
             next call to read().
 
-            The method should use a greedy read strategy meaning that
+            The method should use a greedy read strategy, meaning that
             it should read as much data as is allowed within the
             definition of the encoding and the given size, e.g.  if
             optional encoding endings or state markers are available
@@ -602,7 +599,7 @@
     def readlines(self, sizehint=None, keepends=True):
 
         """ Read all lines available on the input stream
-            and return them as list of lines.
+            and return them as a list.
 
             Line breaks are implemented using the codec's decoder
             method and are included in the list entries.
@@ -750,19 +747,18 @@
 
 class StreamRecoder:
 
-    """ StreamRecoder instances provide a frontend - backend
-        view of encoding data.
+    """ StreamRecoder instances translate data from one encoding to another.
 
         They use the complete set of APIs returned by the
         codecs.lookup() function to implement their task.
 
-        Data written to the stream is first decoded into an
-        intermediate format (which is dependent on the given codec
-        combination) and then written to the stream using an instance
-        of the provided Writer class.
+        Data written to the StreamRecoder is first decoded into an
+        intermediate format (depending on the "decode" codec) and then
+        written to the underlying stream using an instance of the provided
+        Writer class.
 
-        In the other direction, data is read from the stream using a
-        Reader instance and then return encoded data to the caller.
+        In the other direction, data is read from the underlying stream using
+        a Reader instance and then encoded and returned to the caller.
 
     """
     # Optional attributes set by the file wrappers below
@@ -774,22 +770,17 @@
 
         """ Creates a StreamRecoder instance which implements a two-way
             conversion: encode and decode work on the frontend (the
-            input to .read() and output of .write()) while
-            Reader and Writer work on the backend (reading and
-            writing to the stream).
+            data visible to .read() and .write()) while Reader and Writer
+            work on the backend (the data in stream).
 
-            You can use these objects to do transparent direct
-            recodings from e.g. latin-1 to utf-8 and back.
+            You can use these objects to do transparent
+            transcodings from e.g. latin-1 to utf-8 and back.
 
             stream must be a file-like object.
 
-            encode, decode must adhere to the Codec interface, Reader,
+            encode and decode must adhere to the Codec interface; Reader and
             Writer must be factory functions or classes providing the
-            StreamReader, StreamWriter interface resp.
-
-            encode and decode are needed for the frontend translation,
-            Reader and Writer for the backend translation. Unicode is
-            used as intermediate encoding.
+            StreamReader and StreamWriter interfaces resp.
 
             Error handling is done in the same way as defined for the
             StreamWriter/Readers.
@@ -864,7 +855,7 @@
 
 ### Shortcuts
 
-def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
+def open(filename, mode='r', encoding=None, errors='strict', buffering=1):
 
     """ Open an encoded file using the given mode and return
         a wrapped version providing transparent encoding/decoding.
@@ -874,10 +865,8 @@
         codecs. Output is also codec dependent and will usually be
         Unicode as well.
 
-        Files are always opened in binary mode, even if no binary mode
-        was specified. This is done to avoid data loss due to encodings
-        using 8-bit values. The default file mode is 'rb' meaning to
-        open the file in binary read mode.
+        Underlying encoded files are always opened in binary mode.
+        The default file mode is 'r', meaning to open the file in read mode.
 
         encoding specifies the encoding which is to be used for the
         file.
@@ -913,13 +902,13 @@
     """ Return a wrapped version of file which provides transparent
         encoding translation.
 
-        Strings written to the wrapped file are interpreted according
-        to the given data_encoding and then written to the original
-        file as string using file_encoding. The intermediate encoding
+        Data written to the wrapped file is decoded according
+        to the given data_encoding and then encoded to the underlying
+        file using file_encoding. The intermediate data type
         will usually be Unicode but depends on the specified codecs.
 
-        Strings are read from the file using file_encoding and then
-        passed back to the caller as string using data_encoding.
+        Bytes read from the file are decoded using file_encoding and then
+        passed back to the caller encoded using data_encoding.
 
         If file_encoding is not given, it defaults to data_encoding.
commit	b9fdb7a452c2b6f7a628118b5f695bd061b62cc8	[log] [tgz]
author	Nick Coghlan <ncoghlan@gmail.com>	Wed Jan 07 00:22:00 2015 +1000
committer	Nick Coghlan <ncoghlan@gmail.com>	Wed Jan 07 00:22:00 2015 +1000
tree	724c4a77f635adc95e78674ce5f24e79aa391fae
parent	fcfed1991382f9697df574fae4115a9f815adca0 [diff] [blame]