Merge p3yk branch with the trunk up to revision 45595. This breaks a fair number of tests, all because of the codecs/_multibytecodecs issue described here (it's not a Py3K issue, just something Py3K discovers): http://mail.python.org/pipermail/python-dev/2006-April/064051.html Hye-Shik Chang promised to look for a fix, so no need to fix it here. The tests that are expected to break are: test_codecencodings_cn test_codecencodings_hk test_codecencodings_jp test_codecencodings_kr test_codecencodings_tw test_codecs test_multibytecodec This merge fixes an actual test failure (test_weakref) in this branch, though, so I believe merging is the right thing to do anyway.

commit: 49fd7fa4431da299196d74087df4a04f99f9c46f [log] [tgz]
author: Thomas Wouters <thomas@python.org> Fri Apr 21 10:40:58 2006 +0000
committer: Thomas Wouters <thomas@python.org> Fri Apr 21 10:40:58 2006 +0000
tree: 35ace5fe78d3d52c7a9ab356ab9f6dbf8d4b71f4
parent: 9ada3d6e29d5165dadacbe6be07bcd35cfbef59d [diff] [blame]
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 28856c7..1518d75 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py

@@ -14,8 +14,7 @@
 try:
     from _codecs import *
 except ImportError, why:
-    raise SystemError,\
-          'Failed to load the builtin codecs: %s' % why
+    raise SystemError('Failed to load the builtin codecs: %s' % why)
 
 __all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
            "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
@@ -156,13 +155,13 @@
 
 class IncrementalEncoder(object):
     """
-    A IncrementalEncoder encodes an input in multiple steps. The input can be
+    An IncrementalEncoder encodes an input in multiple steps. The input can be
     passed piece by piece to the encode() method. The IncrementalEncoder remembers
     the state of the Encoding process between calls to encode().
     """
     def __init__(self, errors='strict'):
         """
-        Creates a IncrementalEncoder instance.
+        Creates an IncrementalEncoder instance.
 
         The IncrementalEncoder may use different error handling schemes by
         providing the errors keyword argument. See the module docstring
@@ -182,6 +181,33 @@
         Resets the encoder to the initial state.
         """
 
+class BufferedIncrementalEncoder(IncrementalEncoder):
+    """
+    This subclass of IncrementalEncoder can be used as the baseclass for an
+    incremental encoder if the encoder must keep some of the output in a
+    buffer between calls to encode().
+    """
+    def __init__(self, errors='strict'):
+        IncrementalEncoder.__init__(self, errors)
+        self.buffer = "" # unencoded input that is kept between calls to encode()
+
+    def _buffer_encode(self, input, errors, final):
+        # Overwrite this method in subclasses: It must encode input
+        # and return an (output, length consumed) tuple
+        raise NotImplementedError
+
+    def encode(self, input, final=False):
+        # encode input (taking the buffer into account)
+        data = self.buffer + input
+        (result, consumed) = self._buffer_encode(data, self.errors, final)
+        # keep unencoded input until the next call
+        self.buffer = data[consumed:]
+        return result
+
+    def reset(self):
+        IncrementalEncoder.reset(self)
+        self.buffer = ""
+
 class IncrementalDecoder(object):
     """
     An IncrementalDecoder decodes an input in multiple steps. The input can be
@@ -234,7 +260,7 @@
 
     def reset(self):
         IncrementalDecoder.reset(self)
-        self.bytebuffer = ""
+        self.buffer = ""
 
 #
 # The StreamWriter and StreamReader class provide generic working
commit	49fd7fa4431da299196d74087df4a04f99f9c46f	[log] [tgz]
author	Thomas Wouters <thomas@python.org>	Fri Apr 21 10:40:58 2006 +0000
committer	Thomas Wouters <thomas@python.org>	Fri Apr 21 10:40:58 2006 +0000
tree	35ace5fe78d3d52c7a9ab356ab9f6dbf8d4b71f4
parent	9ada3d6e29d5165dadacbe6be07bcd35cfbef59d [diff] [blame]