Patch 1137: allow assigning to .buffer_size attribute of PyExpat.parser objects
diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst
index a4cc1d2..9a0f914 100644
--- a/Doc/library/pyexpat.rst
+++ b/Doc/library/pyexpat.rst
@@ -182,11 +182,15 @@
 
 .. attribute:: xmlparser.buffer_size
 
-   The size of the buffer used when :attr:`buffer_text` is true.  This value cannot
-   be changed at this time.
+   The size of the buffer used when :attr:`buffer_text` is true.  
+   A new buffer size can be set by assigning a new integer value 
+   to this attribute.  
+   When the size is changed, the buffer will be flushed.
 
    .. versionadded:: 2.3
 
+   .. versionchanged:: 2.6
+      The buffer size can now be changed.
 
 .. attribute:: xmlparser.buffer_text
 
diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst
index fee298d..4d90d35 100644
--- a/Doc/whatsnew/2.6.rst
+++ b/Doc/whatsnew/2.6.rst
@@ -875,6 +875,13 @@
   changed and :const:`UF_APPEND` to indicate that data can only be appended to the
   file.  (Contributed by M. Levinson.)
 
+* The :mod:`pyexpat` module's :class:`Parser` objects now allow setting
+  their :attr:`buffer_size` attribute to change the size of the buffer 
+  used to hold character data.
+  (Contributed by Achim Gaedke.)
+
+  .. Patch 1137
+
 * The :mod:`random` module's :class:`Random` objects can
   now be pickled on a 32-bit system and unpickled on a 64-bit
   system, and vice versa.  Unfortunately, this change also means
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 69cf08c..de5cded 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -1,7 +1,7 @@
 # XXX TypeErrors on calling handlers, or on bad return values from a
 # handler, are obscure and unhelpful.
 
-import StringIO
+import StringIO, sys
 import unittest
 
 import pyexpat
@@ -434,6 +434,131 @@
 
         self.assertRaises(Exception, parser.Parse, xml)
 
+class ChardataBufferTest(unittest.TestCase):
+    """
+    test setting of chardata buffer size
+    """
+
+    def test_1025_bytes(self):
+        self.assertEquals(self.small_buffer_test(1025), 2)
+
+    def test_1000_bytes(self):
+        self.assertEquals(self.small_buffer_test(1000), 1)
+
+    def test_wrong_size(self):
+        parser = expat.ParserCreate()
+        parser.buffer_text = 1
+        def f(size):
+            parser.buffer_size = size
+
+        self.assertRaises(TypeError, f, sys.maxint+1)
+        self.assertRaises(ValueError, f, -1)
+        self.assertRaises(ValueError, f, 0)
+
+    def test_unchanged_size(self):
+        xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
+        xml2 = 'a'*512 + '</s>'
+        parser = expat.ParserCreate()
+        parser.CharacterDataHandler = self.counting_handler
+        parser.buffer_size = 512
+        parser.buffer_text = 1
+
+        # Feed 512 bytes of character data: the handler should be called
+        # once.
+        self.n = 0
+        parser.Parse(xml1)
+        self.assertEquals(self.n, 1)
+
+        # Reassign to buffer_size, but assign the same size.
+        parser.buffer_size = parser.buffer_size
+        self.assertEquals(self.n, 1)
+
+        # Try parsing rest of the document
+        parser.Parse(xml2)
+        self.assertEquals(self.n, 2)
+
+
+    def test_disabling_buffer(self):
+        xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
+        xml2 = ('b' * 1024)
+        xml3 = "%s</a>" % ('c' * 1024)
+        parser = expat.ParserCreate()
+        parser.CharacterDataHandler = self.counting_handler
+        parser.buffer_text = 1
+        parser.buffer_size = 1024
+        self.assertEquals(parser.buffer_size, 1024)
+
+        # Parse one chunk of XML
+        self.n = 0
+        parser.Parse(xml1, 0)
+        self.assertEquals(parser.buffer_size, 1024)
+        self.assertEquals(self.n, 1)
+
+        # Turn off buffering and parse the next chunk.
+        parser.buffer_text = 0
+        self.assertFalse(parser.buffer_text)
+        self.assertEquals(parser.buffer_size, 1024)
+        for i in range(10):
+            parser.Parse(xml2, 0)
+        self.assertEquals(self.n, 11)
+
+        parser.buffer_text = 1
+        self.assertTrue(parser.buffer_text)
+        self.assertEquals(parser.buffer_size, 1024)
+        parser.Parse(xml3, 1)
+        self.assertEquals(self.n, 12)
+
+
+
+    def make_document(self, bytes):
+        return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
+
+    def counting_handler(self, text):
+        self.n += 1
+
+    def small_buffer_test(self, buffer_len):
+        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
+        parser = expat.ParserCreate()
+        parser.CharacterDataHandler = self.counting_handler
+        parser.buffer_size = 1024
+        parser.buffer_text = 1
+
+        self.n = 0
+        parser.Parse(xml)
+        return self.n
+
+    def test_change_size_1(self):
+        xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
+        xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
+        parser = expat.ParserCreate()
+        parser.CharacterDataHandler = self.counting_handler
+        parser.buffer_text = 1
+        parser.buffer_size = 1024
+        self.assertEquals(parser.buffer_size, 1024)
+
+        self.n = 0
+        parser.Parse(xml1, 0)
+        parser.buffer_size *= 2
+        self.assertEquals(parser.buffer_size, 2048)
+        parser.Parse(xml2, 1)
+        self.assertEquals(self.n, 2)
+
+    def test_change_size_2(self):
+        xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
+        xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
+        parser = expat.ParserCreate()
+        parser.CharacterDataHandler = self.counting_handler
+        parser.buffer_text = 1
+        parser.buffer_size = 2048
+        self.assertEquals(parser.buffer_size, 2048)
+
+        self.n=0
+        parser.Parse(xml1, 0)
+        parser.buffer_size /= 2
+        self.assertEquals(parser.buffer_size, 1024)
+        parser.Parse(xml2, 1)
+        self.assertEquals(self.n, 4)
+
 
 def test_main():
     run_unittest(SetAttributeTest,
@@ -443,7 +568,8 @@
                  BufferTextTest,
                  HandlerExceptionTest,
                  PositionTest,
-                 sf1296433Test)
+                 sf1296433Test,
+                 ChardataBufferTest)
 
 if __name__ == "__main__":
     test_main()
diff --git a/Misc/ACKS b/Misc/ACKS
index 268aac9..2ae4528 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -225,6 +225,7 @@
 Peter Funk
 Geoff Furnish
 Ulisses Furquim
+Achim Gaedke
 Lele Gaifax
 Santiago Gala
 Yitzchak Gale
diff --git a/Misc/NEWS b/Misc/NEWS
index 0a01534..bcaf14f 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -348,6 +348,9 @@
 Library
 -------
 
+- pyexpat, patch #1137: allow setting buffer_size attribute 
+  on Parser objects to set the character data buffer size.
+
 - Issue #1757: The hash of a Decimal instance is no longer affected by
   the current context.
 
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 2d2c4c5..01971b7 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1649,6 +1649,50 @@
             self->specified_attributes = 0;
         return 0;
     }
+
+    if (strcmp(name, "buffer_size") == 0) {
+      long new_buffer_size;
+      if (!PyInt_Check(v)) {
+      	PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
+      	return -1;
+      }
+
+      new_buffer_size=PyInt_AS_LONG(v);
+      /* trivial case -- no change */
+      if (new_buffer_size == self->buffer_size) {
+	return 0;
+      }
+
+      if (new_buffer_size <= 0) {
+	PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
+	return -1;
+      }
+
+      /* check maximum */
+      if (new_buffer_size > INT_MAX) {
+	char errmsg[100];
+	sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
+	PyErr_SetString(PyExc_ValueError, errmsg);
+	return -1;	
+      }
+
+      if (self->buffer != NULL) {
+	/* there is already a buffer */
+	if (self->buffer_used != 0) {
+	  flush_character_buffer(self);
+	}
+	/* free existing buffer */
+	free(self->buffer);
+      }
+      self->buffer = malloc(new_buffer_size);
+      if (self->buffer == NULL) {
+	PyErr_NoMemory();
+	return -1;
+      }	  
+      self->buffer_size = new_buffer_size;
+      return 0;
+    }
+
     if (strcmp(name, "CharacterDataHandler") == 0) {
         /* If we're changing the character data handler, flush all
          * cached data with the old handler.  Not sure there's a