Branch merge
diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst
index 2fd5145..c7252ed 100644
--- a/Doc/c-api/exceptions.rst
+++ b/Doc/c-api/exceptions.rst
@@ -525,7 +525,7 @@
Marks a point where a recursive C-level call is about to be performed.
- If :const:`USE_STACKCHECK` is defined, this function checks if the the OS
+ If :const:`USE_STACKCHECK` is defined, this function checks if the OS
stack overflowed using :c:func:`PyOS_CheckStack`. In this is the case, it
sets a :exc:`MemoryError` and returns a nonzero value.
diff --git a/Doc/c-api/method.rst b/Doc/c-api/method.rst
index 27f9576..acc81e4 100644
--- a/Doc/c-api/method.rst
+++ b/Doc/c-api/method.rst
@@ -27,7 +27,7 @@
.. c:function:: PyObject* PyInstanceMethod_New(PyObject *func)
Return a new instance method object, with *func* being any callable object
- *func* is is the function that will be called when the instance method is
+ *func* is the function that will be called when the instance method is
called.
@@ -70,7 +70,7 @@
.. c:function:: PyObject* PyMethod_New(PyObject *func, PyObject *self)
Return a new method object, with *func* being any callable object and *self*
- the instance the method should be bound. *func* is is the function that will
+ the instance the method should be bound. *func* is the function that will
be called when the method is called. *self* must not be *NULL*.
diff --git a/Doc/distutils/apiref.rst b/Doc/distutils/apiref.rst
index 091cba1..97f1bed 100644
--- a/Doc/distutils/apiref.rst
+++ b/Doc/distutils/apiref.rst
@@ -1744,7 +1744,7 @@
Set final values for all the options that this command supports. This is
always called as late as possible, ie. after any option assignments from the
command-line or from other commands have been done. Thus, this is the place
- to to code option dependencies: if *foo* depends on *bar*, then it is safe to
+ to code option dependencies: if *foo* depends on *bar*, then it is safe to
set *foo* from *bar* as long as *foo* still has the same value it was
assigned in :meth:`initialize_options`.
diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst
index c201e87..0bd7bb3 100644
--- a/Doc/howto/logging-cookbook.rst
+++ b/Doc/howto/logging-cookbook.rst
@@ -960,7 +960,7 @@
``.1``. Each of the existing backup files is renamed to increment the suffix
(``.1`` becomes ``.2``, etc.) and the ``.6`` file is erased.
-Obviously this example sets the log length much much too small as an extreme
+Obviously this example sets the log length much too small as an extreme
example. You would want to set *maxBytes* to an appropriate value.
.. _zeromq-handlers:
diff --git a/Doc/howto/pyporting.rst b/Doc/howto/pyporting.rst
index 124ef33..309f3f7 100644
--- a/Doc/howto/pyporting.rst
+++ b/Doc/howto/pyporting.rst
@@ -328,7 +328,7 @@
textual data, people have over the years been rather loose in their delineation
of what ``str`` instances held text compared to bytes. In Python 3 you cannot
be so care-free anymore and need to properly handle the difference. The key
-handling this issue to to make sure that **every** string literal in your
+handling this issue to make sure that **every** string literal in your
Python 2 code is either syntactically of functionally marked as either bytes or
text data. After this is done you then need to make sure your APIs are designed
to either handle a specific type or made to be properly polymorphic.
diff --git a/Doc/howto/webservers.rst b/Doc/howto/webservers.rst
index c4ac2b2..74cdf4b 100644
--- a/Doc/howto/webservers.rst
+++ b/Doc/howto/webservers.rst
@@ -264,7 +264,7 @@
* `FastCGI, SCGI, and Apache: Background and Future
<http://www.vmunix.com/mark/blog/archives/2006/01/02/fastcgi-scgi-and-apache-background-and-future/>`_
- is a discussion on why the concept of FastCGI and SCGI is better that that
+ is a discussion on why the concept of FastCGI and SCGI is better than that
of mod_python.
diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst
index 5a211e1..7c7319e 100644
--- a/Doc/library/argparse.rst
+++ b/Doc/library/argparse.rst
@@ -443,7 +443,7 @@
--foo FOO FOO! (default: 42)
:class:`MetavarTypeHelpFormatter` uses the name of the type_ argument for each
-argument as as the display name for its values (rather than using the dest_
+argument as the display name for its values (rather than using the dest_
as the regular formatter does)::
>>> parser = argparse.ArgumentParser(
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 84593f2..2a7abf9 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1280,12 +1280,13 @@
.. module:: encodings.mbcs
:synopsis: Windows ANSI codepage
-Encode operand according to the ANSI codepage (CP_ACP). This codec only
-supports ``'strict'`` and ``'replace'`` error handlers to encode, and
-``'strict'`` and ``'ignore'`` error handlers to decode.
+Encode operand according to the ANSI codepage (CP_ACP).
Availability: Windows only.
+.. versionchanged:: 3.3
+ Support any error handler.
+
.. versionchanged:: 3.2
Before 3.2, the *errors* argument was ignored; ``'replace'`` was always used
to encode, and ``'ignore'`` to decode.
diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst
index 768519e..f4edced 100644
--- a/Doc/library/collections.rst
+++ b/Doc/library/collections.rst
@@ -317,7 +317,7 @@
* The multiset methods are designed only for use cases with positive values.
The inputs may be negative or zero, but only outputs with positive values
are created. There are no type restrictions, but the value type needs to
- support support addition, subtraction, and comparison.
+ support addition, subtraction, and comparison.
* The :meth:`elements` method requires integer counts. It ignores zero and
negative counts.
diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst
index f401f93..a88f10c 100644
--- a/Doc/library/concurrent.futures.rst
+++ b/Doc/library/concurrent.futures.rst
@@ -14,7 +14,7 @@
The :mod:`concurrent.futures` module provides a high-level interface for
asynchronously executing callables.
-The asynchronous execution can be be performed with threads, using
+The asynchronous execution can be performed with threads, using
:class:`ThreadPoolExecutor`, or separate processes, using
:class:`ProcessPoolExecutor`. Both implement the same interface, which is
defined by the abstract :class:`Executor` class.
diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst
index 01692fc..8a22e94 100644
--- a/Doc/library/ctypes.rst
+++ b/Doc/library/ctypes.rst
@@ -1966,7 +1966,7 @@
.. function:: string_at(address, size=-1)
- This function returns the C string starting at memory address address as a bytes
+ This function returns the C string starting at memory address *address* as a bytes
object. If size is specified, it is used as size, otherwise the string is assumed
to be zero-terminated.
diff --git a/Doc/library/email.message.rst b/Doc/library/email.message.rst
index 1e6a485..3e63258 100644
--- a/Doc/library/email.message.rst
+++ b/Doc/library/email.message.rst
@@ -291,7 +291,7 @@
Content-Disposition: attachment; filename="bud.gif"
- An example with with non-ASCII characters::
+ An example with non-ASCII characters::
msg.add_header('Content-Disposition', 'attachment',
filename=('iso-8859-1', '', 'Fußballer.ppt'))
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index 22e2468..886cb82 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -828,7 +828,7 @@
.. note::
Python doesn't depend on the underlying operating system's notion of text
- files; all the the processing is done by Python itself, and is therefore
+ files; all the processing is done by Python itself, and is therefore
platform-independent.
*buffering* is an optional integer used to set the buffering policy. Pass 0
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index 9b1ab0f..c1ce15b 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -435,7 +435,7 @@
Set the host and the port for HTTP Connect Tunnelling. Normally used when it
is required to a HTTPS Connection through a proxy server.
- The headers argument should be a mapping of extra HTTP headers to to sent
+ The headers argument should be a mapping of extra HTTP headers to sent
with the CONNECT request.
.. versionadded:: 3.2
diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst
index ff8cfea..83a590e 100644
--- a/Doc/library/mailbox.rst
+++ b/Doc/library/mailbox.rst
@@ -780,7 +780,7 @@
There is no requirement that :class:`Message` instances be used to represent
messages retrieved using :class:`Mailbox` instances. In some situations, the
time and memory required to generate :class:`Message` representations might
- not not acceptable. For such situations, :class:`Mailbox` instances also
+ not be acceptable. For such situations, :class:`Mailbox` instances also
offer string and file-like representations, and a custom message factory may
be specified when a :class:`Mailbox` instance is initialized.
diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst
index 1598cb8..1a19a7e 100644
--- a/Doc/library/mmap.rst
+++ b/Doc/library/mmap.rst
@@ -263,7 +263,7 @@
.. method:: write_byte(byte)
- Write the the integer *byte* into memory at the current
+ Write the integer *byte* into memory at the current
position of the file pointer; the file position is advanced by ``1``. If
the mmap was created with :const:`ACCESS_READ`, then writing to it will
raise a :exc:`TypeError` exception.
diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst
index 5df9851..851b3cf 100644
--- a/Doc/library/multiprocessing.rst
+++ b/Doc/library/multiprocessing.rst
@@ -1518,7 +1518,7 @@
a new shared object -- see documentation for the *method_to_typeid*
argument of :meth:`BaseManager.register`.
- If an exception is raised by the call, then then is re-raised by
+ If an exception is raised by the call, then is re-raised by
:meth:`_callmethod`. If some other exception is raised in the manager's
process then this is converted into a :exc:`RemoteError` exception and is
raised by :meth:`_callmethod`.
@@ -1655,7 +1655,7 @@
The *chunksize* argument is the same as the one used by the :meth:`.map`
method. For very long iterables using a large value for *chunksize* can
- make make the job complete **much** faster than using the default value of
+ make the job complete **much** faster than using the default value of
``1``.
Also if *chunksize* is ``1`` then the :meth:`!next` method of the iterator
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index fe7e705..6adfdb1 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -1325,7 +1325,7 @@
.. function:: writev(fd, buffers)
- Write the the contents of *buffers* to file descriptor *fd*, where *buffers*
+ Write the contents of *buffers* to file descriptor *fd*, where *buffers*
is an arbitrary sequence of buffers.
Returns the total number of bytes written.
diff --git a/Doc/library/packaging.command.rst b/Doc/library/packaging.command.rst
index 98835c0..6a85351 100644
--- a/Doc/library/packaging.command.rst
+++ b/Doc/library/packaging.command.rst
@@ -74,7 +74,7 @@
Set final values for all the options that this command supports. This is
always called as late as possible, i.e. after any option assignments from the
command line or from other commands have been done. Thus, this is the place
- to to code option dependencies: if *foo* depends on *bar*, then it is safe to
+ to code option dependencies: if *foo* depends on *bar*, then it is safe to
set *foo* from *bar* as long as *foo* still has the same value it was
assigned in :meth:`initialize_options`.
diff --git a/Doc/library/packaging.install.rst b/Doc/library/packaging.install.rst
index b619a98..3e00750 100644
--- a/Doc/library/packaging.install.rst
+++ b/Doc/library/packaging.install.rst
@@ -32,7 +32,7 @@
prefer_final=True)
Return information about what's going to be installed and upgraded.
- *requirements* is a string string containing the requirements for this
+ *requirements* is a string containing the requirements for this
project, for example ``'FooBar 1.1'`` or ``'BarBaz (<1.2)'``.
.. XXX are requirements comma-separated?
diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst
index 299be03..f3acdc5 100644
--- a/Doc/library/sqlite3.rst
+++ b/Doc/library/sqlite3.rst
@@ -243,7 +243,7 @@
.. method:: Connection.commit()
This method commits the current transaction. If you don't call this method,
- anything you did since the last call to ``commit()`` is not visible from from
+ anything you did since the last call to ``commit()`` is not visible from
other database connections. If you wonder why you don't see the data you've
written to the database, please check you didn't forget to call this method.
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index 559921a..783d544 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -1121,7 +1121,7 @@
characters and there is at least one character, false
otherwise. Decimal characters are those from general category "Nd". This category
includes digit characters, and all characters
- that that can be used to form decimal-radix numbers, e.g. U+0660,
+ that can be used to form decimal-radix numbers, e.g. U+0660,
ARABIC-INDIC DIGIT ZERO.
diff --git a/Doc/library/string.rst b/Doc/library/string.rst
index 78f2b4d..56a2a34 100644
--- a/Doc/library/string.rst
+++ b/Doc/library/string.rst
@@ -211,7 +211,7 @@
See also the :ref:`formatspec` section.
-The *field_name* itself begins with an *arg_name* that is either either a number or a
+The *field_name* itself begins with an *arg_name* that is either a number or a
keyword. If it's a number, it refers to a positional argument, and if it's a keyword,
it refers to a named keyword argument. If the numerical arg_names in a format string
are 0, 1, 2, ... in sequence, they can all be omitted (not just some)
diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst
index 504a2fb..66b3501 100644
--- a/Doc/library/threading.rst
+++ b/Doc/library/threading.rst
@@ -886,7 +886,7 @@
Pass the barrier. When all the threads party to the barrier have called
this function, they are all released simultaneously. If a *timeout* is
- provided, is is used in preference to any that was supplied to the class
+ provided, it is used in preference to any that was supplied to the class
constructor.
The return value is an integer in the range 0 to *parties* -- 1, different
diff --git a/Doc/library/tkinter.ttk.rst b/Doc/library/tkinter.ttk.rst
index 7bf39b3..f70d163 100644
--- a/Doc/library/tkinter.ttk.rst
+++ b/Doc/library/tkinter.ttk.rst
@@ -1240,7 +1240,7 @@
*layoutspec*, if specified, is expected to be a list or some other
sequence type (excluding strings), where each item should be a tuple and
the first item is the layout name and the second item should have the
- format described described in `Layouts`_.
+ format described in `Layouts`_.
To understand the format, see the following example (it is not
intended to do anything useful)::
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index ecb357e..b69b3b3 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -177,7 +177,7 @@
*method* should be a string that indicates the HTTP request method that
will be used (e.g. ``'HEAD'``). Its value is stored in the
- :attr:`Request.method` attribute and is used by :meth:`Request.get_method()`.
+ :attr:`~Request.method` attribute and is used by :meth:`get_method()`.
.. versionchanged:: 3.3
:attr:`Request.method` argument is added to the Request class.
@@ -379,11 +379,11 @@
.. attribute:: Request.method
The HTTP request method to use. This value is used by
- :meth:`Request.get_method` to override the computed HTTP request
- method that would otherwise be returned. This attribute is
- initialized with the value of the *method* argument passed to the constructor.
+ :meth:`~Request.get_method` to override the computed HTTP request
+ method that would otherwise be returned. This attribute is initialized with
+ the value of the *method* argument passed to the constructor.
- ..versionadded:: 3.3
+ .. versionadded:: 3.3
.. method:: Request.add_data(data)
@@ -399,8 +399,8 @@
``'GET'`` if :attr:`Request.data` is ``None``, or ``'POST'`` if it's not.
This is only meaningful for HTTP requests.
- .. versionchanged:: 3.3
- get_method now looks at the value of :attr:`Request.method` first.
+ .. versionchanged:: 3.3
+ get_method now looks at the value of :attr:`Request.method`.
.. method:: Request.has_data()
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 019a894..6f84bcc 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -398,7 +398,7 @@
+-------+--------------------------+
| Index | Value |
+=======+==========================+
- | ``0`` | Year |
+ | ``0`` | Year (>= 1980) |
+-------+--------------------------+
| ``1`` | Month (one-based) |
+-------+--------------------------+
@@ -411,6 +411,10 @@
| ``5`` | Seconds (zero-based) |
+-------+--------------------------+
+ .. note::
+
+ The ZIP file format does not support timestamps before 1980.
+
.. attribute:: ZipInfo.compress_type
diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst
index 8151a0a..e7a6f18 100644
--- a/Doc/reference/compound_stmts.rst
+++ b/Doc/reference/compound_stmts.rst
@@ -478,7 +478,7 @@
**Default parameter values are evaluated when the function definition is
executed.** This means that the expression is evaluated once, when the function
-is defined, and that that same "pre-computed" value is used for each call. This
+is defined, and that the same "pre-computed" value is used for each call. This
is especially important to understand when a default parameter is a mutable
object, such as a list or a dictionary: if the function modifies the object
(e.g. by appending an item to a list), the default value is in effect modified.
diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst
index 21f801c..f4bc71f 100644
--- a/Doc/reference/toplevel_components.rst
+++ b/Doc/reference/toplevel_components.rst
@@ -111,6 +111,6 @@
single: input; raw
single: readline() (file method)
-Note: to read 'raw' input line without interpretation, you can use the the
+Note: to read 'raw' input line without interpretation, you can use the
:meth:`readline` method of file objects, including ``sys.stdin``.
diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst
index 863fb28..9c3c143 100644
--- a/Doc/tutorial/floatingpoint.rst
+++ b/Doc/tutorial/floatingpoint.rst
@@ -92,7 +92,7 @@
(although some languages may not *display* the difference by default, or in all
output modes).
-For more pleasant output, you may may wish to use string formatting to produce a limited number of significant digits::
+For more pleasant output, you may wish to use string formatting to produce a limited number of significant digits::
>>> format(math.pi, '.12g') # give 12 significant digits
'3.14159265359'
diff --git a/Doc/whatsnew/2.4.rst b/Doc/whatsnew/2.4.rst
index c52b5fb..d94e66f 100644
--- a/Doc/whatsnew/2.4.rst
+++ b/Doc/whatsnew/2.4.rst
@@ -947,7 +947,7 @@
:meth:`__len__` method. (Contributed by Raymond Hettinger.)
* The methods :meth:`list.__getitem__`, :meth:`dict.__getitem__`, and
- :meth:`dict.__contains__` are are now implemented as :class:`method_descriptor`
+ :meth:`dict.__contains__` are now implemented as :class:`method_descriptor`
objects rather than :class:`wrapper_descriptor` objects. This form of access
doubles their performance and makes them more suitable for use as arguments to
functionals: ``map(mydict.__getitem__, keylist)``. (Contributed by Raymond
diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst
index 945aa97..b433c6c 100644
--- a/Doc/whatsnew/3.3.rst
+++ b/Doc/whatsnew/3.3.rst
@@ -197,6 +197,11 @@
codecs
------
+The :mod:`~encodings.mbcs` codec has be rewritten to handle correclty
+``replace`` and ``ignore`` error handlers on all Windows versions. The
+:mod:`~encodings.mbcs` codec is now supporting all error handlers, instead of
+only ``replace`` to encode and ``ignore`` to decode.
+
Multibyte CJK decoders now resynchronize faster. They only ignore the first
byte of an invalid byte sequence. For example, ``b'\xff\n'.decode('gb2312',
'replace')`` now returns a ``\n`` after the replacement character.
@@ -479,7 +484,7 @@
The :class:`~urllib.request.Request` class, now accepts a *method* argument
used by :meth:`~urllib.request.Request.get_method` to determine what HTTP method
-should be used. For example, this will send an ``'HEAD'`` request::
+should be used. For example, this will send a ``'HEAD'`` request::
>>> urlopen(Request('http://www.python.org', method='HEAD'))
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index a1725e5..4e492dd 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -452,7 +452,7 @@
/* Return a void pointer to the raw unicode buffer. */
#define _PyUnicode_COMPACT_DATA(op) \
- (PyUnicode_IS_COMPACT_ASCII(op) ? \
+ (PyUnicode_IS_ASCII(op) ? \
((void*)((PyASCIIObject*)(op) + 1)) : \
((void*)((PyCompactUnicodeObject*)(op) + 1)))
@@ -1466,6 +1466,14 @@
Py_ssize_t *consumed /* bytes consumed */
);
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
+ int code_page, /* code page number */
+ const char *string, /* encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ Py_ssize_t *consumed /* bytes consumed */
+ );
+
PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
PyObject *unicode /* Unicode object */
);
@@ -1473,11 +1481,17 @@
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
const char *errors /* error handling */
);
#endif
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
+ int code_page, /* code page number */
+ PyObject *unicode, /* Unicode object */
+ const char *errors /* error handling */
+ );
+
#endif /* HAVE_MBCS */
/* --- Decimal Encoder ---------------------------------------------------- */
diff --git a/Lib/cgi.py b/Lib/cgi.py
index 63771fc..b3e32f1 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -300,7 +300,7 @@
while s[:1] == ';':
s = s[1:]
end = s.find(';')
- while end > 0 and s.count('"', 0, end) % 2:
+ while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)
if end < 0:
end = len(s)
diff --git a/Lib/configparser.py b/Lib/configparser.py
index b843c00..7bc4398 100644
--- a/Lib/configparser.py
+++ b/Lib/configparser.py
@@ -382,7 +382,7 @@
would resolve the "%(dir)s" to the value of dir. All reference
expansions are done late, on demand. If a user needs to use a bare % in
- a configuration file, she can escape it by writing %%. Other other % usage
+ a configuration file, she can escape it by writing %%. Other % usage
is considered a user error and raises `InterpolationSyntaxError'."""
_KEYCRE = re.compile(r"%\(([^)]+)\)s")
diff --git a/Lib/datetime.py b/Lib/datetime.py
index 1f8c8f7..c5eeca4 100644
--- a/Lib/datetime.py
+++ b/Lib/datetime.py
@@ -2053,7 +2053,7 @@
Because we know z.d said z was in daylight time (else [5] would have held and
we would have stopped then), and we know z.d != z'.d (else [8] would have held
-and we we have stopped then), and there are only 2 possible values dst() can
+and we have stopped then), and there are only 2 possible values dst() can
return in Eastern, it follows that z'.d must be 0 (which it is in the example,
but the reasoning doesn't depend on the example -- it depends on there being
two possible dst() outcomes, one zero and the other non-zero). Therefore
diff --git a/Lib/http/client.py b/Lib/http/client.py
index a490e2b..88da550 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -678,7 +678,10 @@
try:
port = int(host[i+1:])
except ValueError:
- raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
+ if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
+ port = self.default_port
+ else:
+ raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
host = host[:i]
else:
port = self.default_port
diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py
index 9d798e8..97383d6 100644
--- a/Lib/http/cookiejar.py
+++ b/Lib/http/cookiejar.py
@@ -1020,7 +1020,7 @@
(not erhn.startswith(".") and
not ("."+erhn).endswith(domain))):
_debug(" effective request-host %s (even with added "
- "initial dot) does not end end with %s",
+ "initial dot) does not end with %s",
erhn, domain)
return False
if (cookie.version > 0 or
diff --git a/Lib/msilib/schema.py b/Lib/msilib/schema.py
index 379f1ef..a9e1679 100644
--- a/Lib/msilib/schema.py
+++ b/Lib/msilib/schema.py
@@ -958,7 +958,7 @@
('ServiceInstall','StartType','N',0,4,None, None, None, None, 'Type of the service',),
('Shortcut','Name','N',None, None, None, None, 'Filename',None, 'The name of the shortcut to be created.',),
('Shortcut','Description','Y',None, None, None, None, 'Text',None, 'The description for the shortcut.',),
-('Shortcut','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table denoting the component whose selection gates the the shortcut creation/deletion.',),
+('Shortcut','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table denoting the component whose selection gates the shortcut creation/deletion.',),
('Shortcut','Icon_','Y',None, None, 'Icon',1,'Identifier',None, 'Foreign key into the File table denoting the external icon file for the shortcut.',),
('Shortcut','IconIndex','Y',-32767,32767,None, None, None, None, 'The icon index for the shortcut.',),
('Shortcut','Directory_','N',None, None, 'Directory',1,'Identifier',None, 'Foreign key into the Directory table denoting the directory where the shortcut file is created.',),
diff --git a/Lib/multiprocessing/__init__.py b/Lib/multiprocessing/__init__.py
index deb031c..e6e16c8 100644
--- a/Lib/multiprocessing/__init__.py
+++ b/Lib/multiprocessing/__init__.py
@@ -9,7 +9,7 @@
# wrapper for 'threading'.
#
# Try calling `multiprocessing.doc.main()` to read the html
-# documentation in in a webbrowser.
+# documentation in a webbrowser.
#
#
# Copyright (c) 2006-2008, R Oudkerk
diff --git a/Lib/packaging/tests/test_pypi_simple.py b/Lib/packaging/tests/test_pypi_simple.py
index bd50d01..59204c4 100644
--- a/Lib/packaging/tests/test_pypi_simple.py
+++ b/Lib/packaging/tests/test_pypi_simple.py
@@ -87,7 +87,7 @@
try:
crawler._open_url(url)
except Exception as v:
- self.assertIn('nonnumeric port', str(v))
+ self.assertIn('Download error', str(v))
# issue #160
url = server.full_address
diff --git a/Lib/sched.py b/Lib/sched.py
index f6a699c..a119892 100644
--- a/Lib/sched.py
+++ b/Lib/sched.py
@@ -94,7 +94,7 @@
restarted.
It is legal for both the delay function and the action
- function to to modify the queue or to raise an exception;
+ function to modify the queue or to raise an exception;
exceptions are not caught but the scheduler's state remains
well-defined so run() may be called again.
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index dba7727..cb59008 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -342,6 +342,10 @@
self.assertEqual(
cgi.parse_header('attachment; filename="strange;name";size=123;'),
("attachment", {"filename": "strange;name", "size": "123"}))
+ self.assertEqual(
+ cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
+ ("form-data", {"name": "files", "filename": 'fo"o;bar'}))
+
BOUNDARY = "---------------------------721837373350705526688164684"
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index e9ce95a..fa257b8 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1744,6 +1744,203 @@
self.assertEqual(sout, b"\x80")
+@unittest.skipUnless(sys.platform == 'win32',
+ 'code pages are specific to Windows')
+class CodePageTest(unittest.TestCase):
+ CP_UTF8 = 65001
+
+ def vista_or_later(self):
+ return (sys.getwindowsversion().major >= 6)
+
+ def test_invalid_code_page(self):
+ self.assertRaises(ValueError, codecs.code_page_encode, -1, 'a')
+ self.assertRaises(ValueError, codecs.code_page_decode, -1, b'a')
+ self.assertRaises(WindowsError, codecs.code_page_encode, 123, 'a')
+ self.assertRaises(WindowsError, codecs.code_page_decode, 123, b'a')
+
+ def test_code_page_name(self):
+ self.assertRaisesRegex(UnicodeEncodeError, 'cp932',
+ codecs.code_page_encode, 932, '\xff')
+ self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
+ codecs.code_page_decode, 932, b'\x81\x00')
+ self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
+ codecs.code_page_decode, self.CP_UTF8, b'\xff')
+
+ def check_decode(self, cp, tests):
+ for raw, errors, expected in tests:
+ if expected is not None:
+ try:
+ decoded = codecs.code_page_decode(cp, raw, errors)
+ except UnicodeDecodeError as err:
+ self.fail('Unable to decode %a from "cp%s" with '
+ 'errors=%r: %s' % (raw, cp, errors, err))
+ self.assertEqual(decoded[0], expected,
+ '%a.decode("cp%s", %r)=%a != %a'
+ % (raw, cp, errors, decoded[0], expected))
+ # assert 0 <= decoded[1] <= len(raw)
+ self.assertGreaterEqual(decoded[1], 0)
+ self.assertLessEqual(decoded[1], len(raw))
+ else:
+ self.assertRaises(UnicodeDecodeError,
+ codecs.code_page_decode, cp, raw, errors)
+
+ def check_encode(self, cp, tests):
+ for text, errors, expected in tests:
+ if expected is not None:
+ try:
+ encoded = codecs.code_page_encode(cp, text, errors)
+ except UnicodeEncodeError as err:
+ self.fail('Unable to encode %a to "cp%s" with '
+ 'errors=%r: %s' % (text, cp, errors, err))
+ self.assertEqual(encoded[0], expected,
+ '%a.encode("cp%s", %r)=%a != %a'
+ % (text, cp, errors, encoded[0], expected))
+ self.assertEqual(encoded[1], len(text))
+ else:
+ self.assertRaises(UnicodeEncodeError,
+ codecs.code_page_encode, cp, text, errors)
+
+ def test_cp932(self):
+ self.check_encode(932, (
+ ('abc', 'strict', b'abc'),
+ ('\uff44\u9a3e', 'strict', b'\x82\x84\xe9\x80'),
+ # not encodable
+ ('\xff', 'strict', None),
+ ('[\xff]', 'ignore', b'[]'),
+ ('[\xff]', 'replace', b'[y]'),
+ ('[\u20ac]', 'replace', b'[?]'),
+ ))
+ self.check_decode(932, (
+ (b'abc', 'strict', 'abc'),
+ (b'\x82\x84\xe9\x80', 'strict', '\uff44\u9a3e'),
+ # invalid bytes
+ (b'\xff', 'strict', None),
+ (b'\xff', 'ignore', ''),
+ (b'\xff', 'replace', '\ufffd'),
+ (b'\x81\x00abc', 'strict', None),
+ (b'\x81\x00abc', 'ignore', '\x00abc'),
+ (b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
+ ))
+
+ def test_cp1252(self):
+ self.check_encode(1252, (
+ ('abc', 'strict', b'abc'),
+ ('\xe9\u20ac', 'strict', b'\xe9\x80'),
+ ('\xff', 'strict', b'\xff'),
+ ('\u0141', 'strict', None),
+ ('\u0141', 'ignore', b''),
+ ('\u0141', 'replace', b'L'),
+ ))
+ self.check_decode(1252, (
+ (b'abc', 'strict', 'abc'),
+ (b'\xe9\x80', 'strict', '\xe9\u20ac'),
+ (b'\xff', 'strict', '\xff'),
+ ))
+
+ def test_cp_utf7(self):
+ cp = 65000
+ self.check_encode(cp, (
+ ('abc', 'strict', b'abc'),
+ ('\xe9\u20ac', 'strict', b'+AOkgrA-'),
+ ('\U0010ffff', 'strict', b'+2//f/w-'),
+ ('\udc80', 'strict', b'+3IA-'),
+ ('\ufffd', 'strict', b'+//0-'),
+ ))
+ self.check_decode(cp, (
+ (b'abc', 'strict', 'abc'),
+ (b'+AOkgrA-', 'strict', '\xe9\u20ac'),
+ (b'+2//f/w-', 'strict', '\U0010ffff'),
+ (b'+3IA-', 'strict', '\udc80'),
+ (b'+//0-', 'strict', '\ufffd'),
+ # invalid bytes
+ (b'[+/]', 'strict', '[]'),
+ (b'[\xff]', 'strict', '[\xff]'),
+ ))
+
+ def test_cp_utf8(self):
+ cp = self.CP_UTF8
+
+ tests = [
+ ('abc', 'strict', b'abc'),
+ ('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
+ ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
+ ]
+ if self.vista_or_later():
+ tests.append(('\udc80', 'strict', None))
+ tests.append(('\udc80', 'ignore', b''))
+ tests.append(('\udc80', 'replace', b'?'))
+ else:
+ tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
+ self.check_encode(cp, tests)
+
+ tests = [
+ (b'abc', 'strict', 'abc'),
+ (b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),
+ (b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),
+ (b'\xef\xbf\xbd', 'strict', '\ufffd'),
+ (b'[\xc3\xa9]', 'strict', '[\xe9]'),
+ # invalid bytes
+ (b'[\xff]', 'strict', None),
+ (b'[\xff]', 'ignore', '[]'),
+ (b'[\xff]', 'replace', '[\ufffd]'),
+ ]
+ if self.vista_or_later():
+ tests.extend((
+ (b'[\xed\xb2\x80]', 'strict', None),
+ (b'[\xed\xb2\x80]', 'ignore', '[]'),
+ (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
+ ))
+ else:
+ tests.extend((
+ (b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
+ ))
+ self.check_decode(cp, tests)
+
+ def test_error_handlers(self):
+ self.check_encode(932, (
+ ('\xff', 'backslashreplace', b'\\xff'),
+ ('\xff', 'xmlcharrefreplace', b'ÿ'),
+ ))
+ self.check_decode(932, (
+ (b'\xff', 'surrogateescape', '\udcff'),
+ ))
+ if self.vista_or_later():
+ self.check_encode(self.CP_UTF8, (
+ ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
+ ))
+
+ def test_multibyte_encoding(self):
+ self.check_decode(932, (
+ (b'\x84\xe9\x80', 'ignore', '\u9a3e'),
+ (b'\x84\xe9\x80', 'replace', '\ufffd\u9a3e'),
+ ))
+ self.check_decode(self.CP_UTF8, (
+ (b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'),
+ (b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'),
+ ))
+ if self.vista_or_later():
+ self.check_encode(self.CP_UTF8, (
+ ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
+ ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
+ ))
+
+ def test_incremental(self):
+ decoded = codecs.code_page_decode(932,
+ b'\xe9\x80\xe9', 'strict',
+ False)
+ self.assertEqual(decoded, ('\u9a3e', 2))
+
+ decoded = codecs.code_page_decode(932,
+ b'\xe9\x80\xe9\x80', 'strict',
+ False)
+ self.assertEqual(decoded, ('\u9a3e\u9a3e', 4))
+
+ decoded = codecs.code_page_decode(932,
+ b'abc', 'strict',
+ False)
+ self.assertEqual(decoded, ('abc', 3))
+
+
def test_main():
support.run_unittest(
UTF32Test,
@@ -1772,6 +1969,7 @@
SurrogateEscapeTest,
BomTest,
TransformCodecTest,
+ CodePageTest,
)
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 1729743..ba97da2 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -161,14 +161,16 @@
def test_host_port(self):
# Check invalid host_port
- for hp in ("www.python.org:abc", "www.python.org:"):
+ for hp in ("www.python.org:abc", "user:password@www.python.org"):
self.assertRaises(client.InvalidURL, client.HTTPConnection, hp)
for hp, h, p in (("[fe80::207:e9ff:fe9b]:8000",
"fe80::207:e9ff:fe9b", 8000),
("www.python.org:80", "www.python.org", 80),
+ ("www.python.org:", "www.python.org", 80),
("www.python.org", "www.python.org", 80),
- ("[fe80::207:e9ff:fe9b]", "fe80::207:e9ff:fe9b", 80)):
+ ("[fe80::207:e9ff:fe9b]", "fe80::207:e9ff:fe9b", 80),
+ ("[fe80::207:e9ff:fe9b]:", "fe80::207:e9ff:fe9b", 80)):
c = client.HTTPConnection(hp)
self.assertEqual(h, c.host)
self.assertEqual(p, c.port)
@@ -539,6 +541,24 @@
self.assertEqual(resp.status, 404)
del server
+ def test_host_port(self):
+ # Check invalid host_port
+
+ for hp in ("www.python.org:abc", "user:password@www.python.org"):
+ self.assertRaises(client.InvalidURL, client.HTTPSConnection, hp)
+
+ for hp, h, p in (("[fe80::207:e9ff:fe9b]:8000",
+ "fe80::207:e9ff:fe9b", 8000),
+ ("www.python.org:443", "www.python.org", 443),
+ ("www.python.org:", "www.python.org", 443),
+ ("www.python.org", "www.python.org", 443),
+ ("[fe80::207:e9ff:fe9b]", "fe80::207:e9ff:fe9b", 443),
+ ("[fe80::207:e9ff:fe9b]:", "fe80::207:e9ff:fe9b",
+ 443)):
+ c = client.HTTPSConnection(hp)
+ self.assertEqual(h, c.host)
+ self.assertEqual(p, c.port)
+
class RequestBodyTest(TestCase):
"""Test cases where a request includes a message body."""
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 713a97f..84a058a 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -879,7 +879,7 @@
def test_http_doubleslash(self):
# Checks the presence of any unnecessary double slash in url does not
# break anything. Previously, a double slash directly after the host
- # could could cause incorrect parsing.
+ # could cause incorrect parsing.
h = urllib.request.AbstractHTTPHandler()
o = h.parent = MockOpener()
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 4de190b..a6e7ee8 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -9,7 +9,7 @@
RFC3986_BASE = 'http://a/b/c/d;p?q'
SIMPLE_BASE = 'http://a/b/c/d'
-# A list of test cases. Each test case is a a two-tuple that contains
+# A list of test cases. Each test case is a two-tuple that contains
# a string with the query and a dictionary with the expected result.
parse_qsl_test_cases = [
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py
index 04e762a..4ccc6ff 100644
--- a/Lib/test/test_xmlrpc.py
+++ b/Lib/test/test_xmlrpc.py
@@ -295,7 +295,7 @@
global ADDR, PORT, URL
ADDR, PORT = serv.socket.getsockname()
#connect to IP address directly. This avoids socket.create_connection()
- #trying to connect to to "localhost" using all address families, which
+ #trying to connect to "localhost" using all address families, which
#causes slowdown e.g. on vista which supports AF_INET6. The server listens
#on AF_INET only.
URL = "http://%s:%d"%(ADDR, PORT)
@@ -354,7 +354,7 @@
global ADDR, PORT, URL
ADDR, PORT = serv.socket.getsockname()
#connect to IP address directly. This avoids socket.create_connection()
- #trying to connect to to "localhost" using all address families, which
+ #trying to connect to "localhost" using all address families, which
#causes slowdown e.g. on vista which supports AF_INET6. The server listens
#on AF_INET only.
URL = "http://%s:%d"%(ADDR, PORT)
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 4dcb690..0b3a694 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -500,6 +500,18 @@
except zipfile.BadZipFile:
self.assertTrue(zipfp2.fp is None, 'zipfp is not closed')
+ def test_add_file_before_1980(self):
+ # Set atime and mtime to 1970-01-01
+ os.utime(TESTFN, (0, 0))
+ with zipfile.ZipFile(TESTFN2, "w") as zipfp:
+ self.assertRaises(ValueError, zipfp.write, TESTFN)
+
+
+
+
+
+
+
@requires_zlib
def test_unicode_filenames(self):
# bug #10801
@@ -1046,6 +1058,10 @@
f.close()
self.assertRaises(zipfile.BadZipFile, zipfile.ZipFile, TESTFN, 'r')
+ def test_create_zipinfo_before_1980(self):
+ self.assertRaises(ValueError,
+ zipfile.ZipInfo, 'seventies', (1979, 1, 1, 0, 0, 0))
+
def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
diff --git a/Lib/threading.py b/Lib/threading.py
index b663232..8d505b7 100644
--- a/Lib/threading.py
+++ b/Lib/threading.py
@@ -425,7 +425,7 @@
# to be cyclic. Threads are not allowed into it until it has fully drained
# since the previous cycle. In addition, a 'resetting' state exists which is
# similar to 'draining' except that threads leave with a BrokenBarrierError,
-# and a 'broken' state in which all threads get get the exception.
+# and a 'broken' state in which all threads get the exception.
class Barrier(_Verbose):
"""
Barrier. Useful for synchronizing a fixed number of threads
diff --git a/Lib/tkinter/tix.py b/Lib/tkinter/tix.py
index be44a09..4884e22 100644
--- a/Lib/tkinter/tix.py
+++ b/Lib/tkinter/tix.py
@@ -1554,8 +1554,8 @@
'''This command is used to indicate whether the entry given by
entryPath has children entries and whether the children are visible. mode
must be one of open, close or none. If mode is set to open, a (+)
- indicator is drawn next the the entry. If mode is set to close, a (-)
- indicator is drawn next the the entry. If mode is set to none, no
+ indicator is drawn next the entry. If mode is set to close, a (-)
+ indicator is drawn next the entry. If mode is set to none, no
indicators will be drawn for this entry. The default mode is none. The
open mode indicates the entry has hidden children and this entry can be
opened by the user. The close mode indicates that all the children of the
@@ -1873,13 +1873,13 @@
return self.tk.call(self, 'info', 'bbox', x, y)
def move_column(self, from_, to, offset):
- """Moves the the range of columns from position FROM through TO by
+ """Moves the range of columns from position FROM through TO by
the distance indicated by OFFSET. For example, move_column(2, 4, 1)
moves the columns 2,3,4 to columns 3,4,5."""
self.tk.call(self, 'move', 'column', from_, to, offset)
def move_row(self, from_, to, offset):
- """Moves the the range of rows from position FROM through TO by
+ """Moves the range of rows from position FROM through TO by
the distance indicated by OFFSET.
For example, move_row(2, 4, 1) moves the rows 2,3,4 to rows 3,4,5."""
self.tk.call(self, 'move', 'row', from_, to, offset)
@@ -1938,7 +1938,7 @@
pad0 pixels
Specifies the paddings to the top of a row.
pad1 pixels
- Specifies the paddings to the the bottom of a row.
+ Specifies the paddings to the bottom of a row.
size val
Specifies the height of a row.
Val may be: "auto" -- the height of the row is set the
diff --git a/Lib/tkinter/ttk.py b/Lib/tkinter/ttk.py
index 1a6a9f4..928e1de 100644
--- a/Lib/tkinter/ttk.py
+++ b/Lib/tkinter/ttk.py
@@ -37,7 +37,7 @@
import os
tilelib = os.environ.get('TILE_LIBRARY')
if tilelib:
- # append custom tile path to the the list of directories that
+ # append custom tile path to the list of directories that
# Tcl uses when attempting to resolve packages with the package
# command
master.tk.eval(
diff --git a/Lib/turtle.py b/Lib/turtle.py
index ad75183..71a89e6 100644
--- a/Lib/turtle.py
+++ b/Lib/turtle.py
@@ -96,7 +96,7 @@
docstrings to disc, so it can serve as a template for translations.
Behind the scenes there are some features included with possible
-extensions in in mind. These will be commented and documented elsewhere.
+extensions in mind. These will be commented and documented elsewhere.
"""
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 65ce287..71011bd 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -557,12 +557,11 @@
# For security reasons we don't allow redirection to anything other
# than http, https or ftp.
- if not urlparts.scheme in ('http', 'https', 'ftp'):
- raise HTTPError(newurl, code,
- msg +
- " - Redirection to url '%s' is not allowed" %
- newurl,
- headers, fp)
+ if urlparts.scheme not in ('http', 'https', 'ftp'):
+ raise HTTPError(
+ newurl, code,
+ "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
+ headers, fp)
if not urlparts.path:
urlparts = list(urlparts)
@@ -727,7 +726,7 @@
# uri could be a single URI or a sequence
if isinstance(uri, str):
uri = [uri]
- if not realm in self.passwd:
+ if realm not in self.passwd:
self.passwd[realm] = {}
for default_port in True, False:
reduced_uri = tuple(
@@ -831,7 +830,7 @@
if authreq:
scheme = authreq.split()[0]
- if not scheme.lower() == 'basic':
+ if scheme.lower() != 'basic':
raise ValueError("AbstractBasicAuthHandler does not"
" support the following scheme: '%s'" %
scheme)
@@ -929,7 +928,7 @@
scheme = authreq.split()[0]
if scheme.lower() == 'digest':
return self.retry_http_digest_auth(req, authreq)
- elif not scheme.lower() == 'basic':
+ elif scheme.lower() != 'basic':
raise ValueError("AbstractDigestAuthHandler does not support"
" the following scheme: '%s'" % scheme)
@@ -1839,7 +1838,7 @@
del self.ftpcache[k]
v.close()
try:
- if not key in self.ftpcache:
+ if key not in self.ftpcache:
self.ftpcache[key] = \
ftpwrapper(user, passwd, host, port, dirs)
if not file: type = 'D'
@@ -1954,7 +1953,7 @@
# We are using newer HTTPError with older redirect_internal method
# This older method will get deprecated in 3.3
- if not urlparts.scheme in ('http', 'https', 'ftp'):
+ if urlparts.scheme not in ('http', 'https', 'ftp'):
raise HTTPError(newurl, errcode,
errmsg +
" Redirection to url '%s' is not allowed." % newurl,
@@ -1981,7 +1980,7 @@
retry=False):
"""Error 401 -- authentication required.
This function supports Basic authentication only."""
- if not 'www-authenticate' in headers:
+ if 'www-authenticate' not in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['www-authenticate']
@@ -2007,7 +2006,7 @@
retry=False):
"""Error 407 -- proxy authentication required.
This function supports Basic authentication only."""
- if not 'proxy-authenticate' in headers:
+ if 'proxy-authenticate' not in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['proxy-authenticate']
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 5cc7816..6ca269f 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -300,6 +300,10 @@
self.filename = filename # Normalized file name
self.date_time = date_time # year, month, day, hour, min, sec
+
+ if date_time[0] < 1980:
+ raise ValueError('ZIP does not support timestamps before 1980')
+
# Standard values:
self.compress_type = ZIP_STORED # Type of compression for the file
self.comment = b"" # Comment for each file
diff --git a/Misc/NEWS b/Misc/NEWS
index d29c9e3..8774334 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
Core and Builtins
-----------------
+- Issue #12281: Rewrite the MBCS codec to handle correctly replace and ignore
+ error handlers on all Windows versions. The MBCS codec is now supporting all
+ error handlers, instead of only replace to encode and ignore to decode.
+
- Issue #13188: When called without an explicit traceback argument,
generator.throw() now gets the traceback from the passed exception's
``__traceback__`` attribute. Patch by Petri Lehtinen.
@@ -315,6 +319,9 @@
Library
-------
+- Issue #6090: zipfile raises a ValueError when a document with a timestamp
+ earlier than 1980 is provided. Patch contributed by Petri Lehtinen.
+
- Issue #13150: sysconfig no longer parses the Makefile and config.h files
when imported, instead doing it at build time. This makes importing
sysconfig faster and reduces Python startup time by 20%.
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 26c8788..be31fd2 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -612,6 +612,31 @@
return codec_tuple(decoded, consumed);
}
+static PyObject *
+code_page_decode(PyObject *self,
+ PyObject *args)
+{
+ Py_buffer pbuf;
+ const char *errors = NULL;
+ int final = 0;
+ Py_ssize_t consumed;
+ PyObject *decoded = NULL;
+ int code_page;
+
+ if (!PyArg_ParseTuple(args, "iy*|zi:code_page_decode",
+ &code_page, &pbuf, &errors, &final))
+ return NULL;
+ consumed = pbuf.len;
+
+ decoded = PyUnicode_DecodeCodePageStateful(code_page,
+ pbuf.buf, pbuf.len, errors,
+ final ? NULL : &consumed);
+ PyBuffer_Release(&pbuf);
+ if (decoded == NULL)
+ return NULL;
+ return codec_tuple(decoded, consumed);
+}
+
#endif /* HAVE_MBCS */
/* --- Encoder ------------------------------------------------------------ */
@@ -1011,6 +1036,29 @@
return v;
}
+static PyObject *
+code_page_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str, *v;
+ const char *errors = NULL;
+ int code_page;
+
+ if (!PyArg_ParseTuple(args, "iO|z:code_page_encode",
+ &code_page, &str, &errors))
+ return NULL;
+
+ str = PyUnicode_FromObject(str);
+ if (str == NULL)
+ return NULL;
+ v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
+ str,
+ errors),
+ PyUnicode_GET_LENGTH(str));
+ Py_DECREF(str);
+ return v;
+}
+
#endif /* HAVE_MBCS */
/* --- Error handler registry --------------------------------------------- */
@@ -1101,6 +1149,8 @@
#ifdef HAVE_MBCS
{"mbcs_encode", mbcs_encode, METH_VARARGS},
{"mbcs_decode", mbcs_decode, METH_VARARGS},
+ {"code_page_encode", code_page_encode, METH_VARARGS},
+ {"code_page_decode", code_page_decode, METH_VARARGS},
#endif
{"register_error", register_error, METH_VARARGS,
register_error__doc__},
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 395a120..56f35c3 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -2730,7 +2730,7 @@
#if defined(VVERBOSE)
#define VTRACE(v) printf v
#else
-#define VTRACE(v)
+#define VTRACE(v) do {} while(0) /* do nothing */
#endif
/* Report failure */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5f56cf7..1068916 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -429,6 +429,10 @@
}
#endif
+#ifdef HAVE_MBCS
+static OSVERSIONINFOEX winver;
+#endif
+
/* --- Bloom Filters ----------------------------------------------------- */
/* stuff to implement simple "bloom filters" for Unicode characters.
@@ -777,6 +781,7 @@
_PyUnicode_LENGTH(unicode) = 0;
_PyUnicode_UTF8(unicode) = NULL;
_PyUnicode_UTF8_LENGTH(unicode) = 0;
+ assert(_PyUnicode_CheckConsistency(unicode, 0));
return unicode;
onError:
@@ -6896,130 +6901,307 @@
#define NEED_RETRY
#endif
-/* XXX This code is limited to "true" double-byte encodings, as
- a) it assumes an incomplete character consists of a single byte, and
- b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte
- encodings, see IsDBCSLeadByteEx documentation. */
+#ifndef WC_ERR_INVALID_CHARS
+# define WC_ERR_INVALID_CHARS 0x0080
+#endif
+
+static char*
+code_page_name(UINT code_page, PyObject **obj)
+{
+ *obj = NULL;
+ if (code_page == CP_ACP)
+ return "mbcs";
+ if (code_page == CP_UTF7)
+ return "CP_UTF7";
+ if (code_page == CP_UTF8)
+ return "CP_UTF8";
+
+ *obj = PyBytes_FromFormat("cp%u", code_page);
+ if (*obj == NULL)
+ return NULL;
+ return PyBytes_AS_STRING(*obj);
+}
static int
-is_dbcs_lead_byte(const char *s, int offset)
+is_dbcs_lead_byte(UINT code_page, const char *s, int offset)
{
const char *curr = s + offset;
+ const char *prev;
- if (IsDBCSLeadByte(*curr)) {
- const char *prev = CharPrev(s, curr);
- return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2);
- }
+ if (!IsDBCSLeadByteEx(code_page, *curr))
+ return 0;
+
+ prev = CharPrevExA(code_page, s, curr, 0);
+ if (prev == curr)
+ return 1;
+ /* FIXME: This code is limited to "true" double-byte encodings,
+ as it assumes an incomplete character consists of a single
+ byte. */
+ if (curr - prev == 2)
+ return 1;
+ if (!IsDBCSLeadByteEx(code_page, *prev))
+ return 1;
return 0;
}
+static DWORD
+decode_code_page_flags(UINT code_page)
+{
+ if (code_page == CP_UTF7) {
+ /* The CP_UTF7 decoder only supports flags=0 */
+ return 0;
+ }
+ else
+ return MB_ERR_INVALID_CHARS;
+}
+
/*
- * Decode MBCS string into unicode object. If 'final' is set, converts
- * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise.
+ * Decode a byte string from a Windows code page into unicode object in strict
+ * mode.
+ *
+ * Returns consumed size if succeed, returns -2 on decode error, or raise a
+ * WindowsError and returns -1 on other error.
*/
static int
-decode_mbcs(PyUnicodeObject **v,
- const char *s, /* MBCS string */
- int size, /* sizeof MBCS string */
- int final,
- const char *errors)
+decode_code_page_strict(UINT code_page,
+ PyUnicodeObject **v,
+ const char *in,
+ int insize)
{
- Py_UNICODE *p;
- Py_ssize_t n;
- DWORD usize;
- DWORD flags;
-
- assert(size >= 0);
-
- /* check and handle 'errors' arg */
- if (errors==NULL || strcmp(errors, "strict")==0)
- flags = MB_ERR_INVALID_CHARS;
- else if (strcmp(errors, "ignore")==0)
- flags = 0;
- else {
- PyErr_Format(PyExc_ValueError,
- "mbcs encoding does not support errors='%s'",
- errors);
- return -1;
- }
-
- /* Skip trailing lead-byte unless 'final' is set */
- if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1))
- --size;
+ const DWORD flags = decode_code_page_flags(code_page);
+ Py_UNICODE *out;
+ DWORD outsize;
/* First get the size of the result */
- if (size > 0) {
- usize = MultiByteToWideChar(CP_ACP, flags, s, size, NULL, 0);
- if (usize==0)
- goto mbcs_decode_error;
- } else
- usize = 0;
+ assert(insize > 0);
+ outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0);
+ if (outsize <= 0)
+ goto error;
if (*v == NULL) {
/* Create unicode object */
- *v = _PyUnicode_New(usize);
+ *v = _PyUnicode_New(outsize);
if (*v == NULL)
return -1;
- n = 0;
+ out = PyUnicode_AS_UNICODE(*v);
}
else {
/* Extend unicode object */
- n = PyUnicode_GET_SIZE(*v);
- if (PyUnicode_Resize((PyObject**)v, n + usize) < 0)
+ Py_ssize_t n = PyUnicode_GET_SIZE(*v);
+ if (PyUnicode_Resize((PyObject**)v, n + outsize) < 0)
return -1;
+ out = PyUnicode_AS_UNICODE(*v) + n;
}
/* Do the conversion */
- if (usize > 0) {
- p = PyUnicode_AS_UNICODE(*v) + n;
- if (0 == MultiByteToWideChar(CP_ACP, flags, s, size, p, usize)) {
- goto mbcs_decode_error;
- }
- }
- return size;
+ outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
+ if (outsize <= 0)
+ goto error;
+ return insize;
-mbcs_decode_error:
- /* If the last error was ERROR_NO_UNICODE_TRANSLATION, then
- we raise a UnicodeDecodeError - else it is a 'generic'
- windows error
- */
- if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) {
- /* Ideally, we should get reason from FormatMessage - this
- is the Windows 2000 English version of the message
- */
- PyObject *exc = NULL;
- const char *reason = "No mapping for the Unicode character exists "
- "in the target multi-byte code page.";
- make_decode_exception(&exc, "mbcs", s, size, 0, 0, reason);
- if (exc != NULL) {
- PyCodec_StrictErrors(exc);
- Py_DECREF(exc);
- }
- } else {
- PyErr_SetFromWindowsErrWithFilename(0, NULL);
- }
+error:
+ if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
+ return -2;
+ PyErr_SetFromWindowsErr(0);
return -1;
}
-PyObject *
-PyUnicode_DecodeMBCSStateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+/*
+ * Decode a byte string from a code page into unicode object with an error
+ * handler.
+ *
+ * Returns consumed size if succeed, or raise a WindowsError or
+ * UnicodeDecodeError exception and returns -1 on error.
+ */
+static int
+decode_code_page_errors(UINT code_page,
+ PyUnicodeObject **v,
+ const char *in,
+ int size,
+ const char *errors)
+{
+ const char *startin = in;
+ const char *endin = in + size;
+ const DWORD flags = decode_code_page_flags(code_page);
+ /* Ideally, we should get reason from FormatMessage. This is the Windows
+ 2000 English version of the message. */
+ const char *reason = "No mapping for the Unicode character exists "
+ "in the target code page.";
+ /* each step cannot decode more than 1 character, but a character can be
+ represented as a surrogate pair */
+ wchar_t buffer[2], *startout, *out;
+ int insize, outsize;
+ PyObject *errorHandler = NULL;
+ PyObject *exc = NULL;
+ PyObject *encoding_obj = NULL;
+ char *encoding;
+ DWORD err;
+ int ret = -1;
+
+ assert(size > 0);
+
+ encoding = code_page_name(code_page, &encoding_obj);
+ if (encoding == NULL)
+ return -1;
+
+ if (errors == NULL || strcmp(errors, "strict") == 0) {
+ /* The last error was ERROR_NO_UNICODE_TRANSLATION, then we raise a
+ UnicodeDecodeError. */
+ make_decode_exception(&exc, encoding, in, size, 0, 0, reason);
+ if (exc != NULL) {
+ PyCodec_StrictErrors(exc);
+ Py_CLEAR(exc);
+ }
+ goto error;
+ }
+
+ if (*v == NULL) {
+ /* Create unicode object */
+ if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ *v = _PyUnicode_New(size * Py_ARRAY_LENGTH(buffer));
+ if (*v == NULL)
+ goto error;
+ startout = PyUnicode_AS_UNICODE(*v);
+ }
+ else {
+ /* Extend unicode object */
+ Py_ssize_t n = PyUnicode_GET_SIZE(*v);
+ if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ if (PyUnicode_Resize((PyObject**)v, n + size * Py_ARRAY_LENGTH(buffer)) < 0)
+ goto error;
+ startout = PyUnicode_AS_UNICODE(*v) + n;
+ }
+
+ /* Decode the byte string character per character */
+ out = startout;
+ while (in < endin)
+ {
+ /* Decode a character */
+ insize = 1;
+ do
+ {
+ outsize = MultiByteToWideChar(code_page, flags,
+ in, insize,
+ buffer, Py_ARRAY_LENGTH(buffer));
+ if (outsize > 0)
+ break;
+ err = GetLastError();
+ if (err != ERROR_NO_UNICODE_TRANSLATION
+ && err != ERROR_INSUFFICIENT_BUFFER)
+ {
+ PyErr_SetFromWindowsErr(0);
+ goto error;
+ }
+ insize++;
+ }
+ /* 4=maximum length of a UTF-8 sequence */
+ while (insize <= 4 && (in + insize) <= endin);
+
+ if (outsize <= 0) {
+ Py_ssize_t startinpos, endinpos, outpos;
+
+ startinpos = in - startin;
+ endinpos = startinpos + 1;
+ outpos = out - PyUnicode_AS_UNICODE(*v);
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ encoding, reason,
+ &startin, &endin, &startinpos, &endinpos, &exc, &in,
+ v, &outpos, &out))
+ {
+ goto error;
+ }
+ }
+ else {
+ in += insize;
+ memcpy(out, buffer, outsize * sizeof(wchar_t));
+ out += outsize;
+ }
+ }
+
+ /* write a NUL character at the end */
+ *out = 0;
+
+ /* Extend unicode object */
+ outsize = out - startout;
+ assert(outsize <= PyUnicode_WSTR_LENGTH(*v));
+ if (PyUnicode_Resize((PyObject**)v, outsize) < 0)
+ goto error;
+ ret = 0;
+
+error:
+ Py_XDECREF(encoding_obj);
+ Py_XDECREF(errorHandler);
+ Py_XDECREF(exc);
+ return ret;
+}
+
+/*
+ * Decode a byte string from a Windows code page into unicode object. If
+ * 'final' is set, converts trailing lead-byte too.
+ *
+ * Returns consumed size if succeed, or raise a WindowsError or
+ * UnicodeDecodeError exception and returns -1 on error.
+ */
+static int
+decode_code_page(UINT code_page,
+ PyUnicodeObject **v,
+ const char *s, int size,
+ int final, const char *errors)
+{
+ int done;
+
+ /* Skip trailing lead-byte unless 'final' is set */
+ if (size == 0) {
+ if (*v == NULL) {
+ Py_INCREF(unicode_empty);
+ *v = (PyUnicodeObject*)unicode_empty;
+ if (*v == NULL)
+ return -1;
+ }
+ return 0;
+ }
+
+ if (!final && is_dbcs_lead_byte(code_page, s, size - 1))
+ --size;
+
+ done = decode_code_page_strict(code_page, v, s, size);
+ if (done == -2)
+ done = decode_code_page_errors(code_page, v, s, size, errors);
+ return done;
+}
+
+static PyObject *
+decode_code_page_stateful(int code_page,
+ const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
{
PyUnicodeObject *v = NULL;
int done;
+ if (code_page < 0) {
+ PyErr_SetString(PyExc_ValueError, "invalid code page number");
+ return NULL;
+ }
+
if (consumed)
*consumed = 0;
#ifdef NEED_RETRY
retry:
if (size > INT_MAX)
- done = decode_mbcs(&v, s, INT_MAX, 0, errors);
+ done = decode_code_page(code_page, &v, s, INT_MAX, 0, errors);
else
#endif
- done = decode_mbcs(&v, s, (int)size, !consumed, errors);
+ done = decode_code_page(code_page, &v, s, (int)size, !consumed, errors);
if (done < 0) {
Py_XDECREF(v);
@@ -7036,6 +7218,7 @@
goto retry;
}
#endif
+
#ifndef DONT_MAKE_RESULT_READY
if (_PyUnicode_READY_REPLACE(&v)) {
Py_DECREF(v);
@@ -7047,6 +7230,25 @@
}
PyObject *
+PyUnicode_DecodeCodePageStateful(int code_page,
+ const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+{
+ return decode_code_page_stateful(code_page, s, size, errors, consumed);
+}
+
+PyObject *
+PyUnicode_DecodeMBCSStateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+{
+ return decode_code_page_stateful(CP_ACP, s, size, errors, consumed);
+}
+
+PyObject *
PyUnicode_DecodeMBCS(const char *s,
Py_ssize_t size,
const char *errors)
@@ -7054,105 +7256,342 @@
return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL);
}
+static DWORD
+encode_code_page_flags(UINT code_page, const char *errors)
+{
+ if (code_page == CP_UTF8) {
+ if (winver.dwMajorVersion >= 6)
+ /* CP_UTF8 supports WC_ERR_INVALID_CHARS on Windows Vista
+ and later */
+ return WC_ERR_INVALID_CHARS;
+ else
+ /* CP_UTF8 only supports flags=0 on Windows older than Vista */
+ return 0;
+ }
+ else if (code_page == CP_UTF7) {
+ /* CP_UTF7 only supports flags=0 */
+ return 0;
+ }
+ else {
+ if (errors != NULL && strcmp(errors, "replace") == 0)
+ return 0;
+ else
+ return WC_NO_BEST_FIT_CHARS;
+ }
+}
+
/*
- * Convert unicode into string object (MBCS).
- * Returns 0 if succeed, -1 otherwise.
+ * Encode a Unicode string to a Windows code page into a byte string in strict
+ * mode.
+ *
+ * Returns consumed characters if succeed, returns -2 on encode error, or raise
+ * a WindowsError and returns -1 on other error.
*/
static int
-encode_mbcs(PyObject **repr,
- const Py_UNICODE *p, /* unicode */
- int size, /* size of unicode */
- const char* errors)
+encode_code_page_strict(UINT code_page, PyObject **outbytes,
+ const Py_UNICODE *p, const int size,
+ const char* errors)
{
BOOL usedDefaultChar = FALSE;
- BOOL *pusedDefaultChar;
- int mbcssize;
- Py_ssize_t n;
+ BOOL *pusedDefaultChar = &usedDefaultChar;
+ int outsize;
PyObject *exc = NULL;
- DWORD flags;
+ const DWORD flags = encode_code_page_flags(code_page, NULL);
+ char *out;
- assert(size >= 0);
+ assert(size > 0);
- /* check and handle 'errors' arg */
- if (errors==NULL || strcmp(errors, "strict")==0) {
- flags = WC_NO_BEST_FIT_CHARS;
+ if (code_page != CP_UTF8 && code_page != CP_UTF7)
pusedDefaultChar = &usedDefaultChar;
- } else if (strcmp(errors, "replace")==0) {
- flags = 0;
+ else
pusedDefaultChar = NULL;
- } else {
- PyErr_Format(PyExc_ValueError,
- "mbcs encoding does not support errors='%s'",
- errors);
- return -1;
- }
/* First get the size of the result */
- if (size > 0) {
- mbcssize = WideCharToMultiByte(CP_ACP, flags, p, size, NULL, 0,
- NULL, pusedDefaultChar);
- if (mbcssize == 0) {
- PyErr_SetFromWindowsErrWithFilename(0, NULL);
- return -1;
- }
- /* If we used a default char, then we failed! */
- if (pusedDefaultChar && *pusedDefaultChar)
- goto mbcs_encode_error;
- } else {
- mbcssize = 0;
- }
+ outsize = WideCharToMultiByte(code_page, flags,
+ p, size,
+ NULL, 0,
+ NULL, pusedDefaultChar);
+ if (outsize <= 0)
+ goto error;
+ /* If we used a default char, then we failed! */
+ if (pusedDefaultChar && *pusedDefaultChar)
+ return -2;
- if (*repr == NULL) {
+ if (*outbytes == NULL) {
/* Create string object */
- *repr = PyBytes_FromStringAndSize(NULL, mbcssize);
- if (*repr == NULL)
+ *outbytes = PyBytes_FromStringAndSize(NULL, outsize);
+ if (*outbytes == NULL)
return -1;
- n = 0;
+ out = PyBytes_AS_STRING(*outbytes);
}
else {
/* Extend string object */
- n = PyBytes_Size(*repr);
- if (_PyBytes_Resize(repr, n + mbcssize) < 0)
+ const Py_ssize_t n = PyBytes_Size(*outbytes);
+ if (outsize > PY_SSIZE_T_MAX - n) {
+ PyErr_NoMemory();
return -1;
+ }
+ if (_PyBytes_Resize(outbytes, n + outsize) < 0)
+ return -1;
+ out = PyBytes_AS_STRING(*outbytes) + n;
}
/* Do the conversion */
- if (size > 0) {
- char *s = PyBytes_AS_STRING(*repr) + n;
- if (0 == WideCharToMultiByte(CP_ACP, flags, p, size, s, mbcssize,
- NULL, pusedDefaultChar)) {
- PyErr_SetFromWindowsErrWithFilename(0, NULL);
- return -1;
- }
- if (pusedDefaultChar && *pusedDefaultChar)
- goto mbcs_encode_error;
- }
+ outsize = WideCharToMultiByte(code_page, flags,
+ p, size,
+ out, outsize,
+ NULL, pusedDefaultChar);
+ if (outsize <= 0)
+ goto error;
+ if (pusedDefaultChar && *pusedDefaultChar)
+ return -2;
return 0;
-mbcs_encode_error:
- raise_encode_exception(&exc, "mbcs", p, size, 0, 0, "invalid character");
- Py_XDECREF(exc);
+error:
+ if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
+ return -2;
+ PyErr_SetFromWindowsErr(0);
return -1;
}
-PyObject *
-PyUnicode_EncodeMBCS(const Py_UNICODE *p,
- Py_ssize_t size,
- const char *errors)
+/*
+ * Encode a Unicode string to a Windows code page into a byte string using a
+ * error handler.
+ *
+ * Returns consumed characters if succeed, or raise a WindowsError and returns
+ * -1 on other error.
+ */
+static int
+encode_code_page_errors(UINT code_page, PyObject **outbytes,
+ const Py_UNICODE *in, const int insize,
+ const char* errors)
{
- PyObject *repr = NULL;
+ const DWORD flags = encode_code_page_flags(code_page, errors);
+ const Py_UNICODE *startin = in;
+ const Py_UNICODE *endin = in + insize;
+ /* Ideally, we should get reason from FormatMessage. This is the Windows
+ 2000 English version of the message. */
+ const char *reason = "invalid character";
+ /* 4=maximum length of a UTF-8 sequence */
+ char buffer[4];
+ BOOL usedDefaultChar = FALSE, *pusedDefaultChar;
+ Py_ssize_t outsize;
+ char *out;
+ int charsize;
+ PyObject *errorHandler = NULL;
+ PyObject *exc = NULL;
+ PyObject *encoding_obj = NULL;
+ char *encoding;
+ int err;
+ Py_ssize_t startpos, newpos, newoutsize;
+ PyObject *rep;
+ int ret = -1;
+
+ assert(insize > 0);
+
+ encoding = code_page_name(code_page, &encoding_obj);
+ if (encoding == NULL)
+ return -1;
+
+ if (errors == NULL || strcmp(errors, "strict") == 0) {
+ /* The last error was ERROR_NO_UNICODE_TRANSLATION,
+ then we raise a UnicodeEncodeError. */
+ make_encode_exception(&exc, encoding, in, insize, 0, 0, reason);
+ if (exc != NULL) {
+ PyCodec_StrictErrors(exc);
+ Py_DECREF(exc);
+ }
+ Py_XDECREF(encoding_obj);
+ return -1;
+ }
+
+ if (code_page != CP_UTF8 && code_page != CP_UTF7)
+ pusedDefaultChar = &usedDefaultChar;
+ else
+ pusedDefaultChar = NULL;
+
+ if (Py_ARRAY_LENGTH(buffer) > PY_SSIZE_T_MAX / insize) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ outsize = insize * Py_ARRAY_LENGTH(buffer);
+
+ if (*outbytes == NULL) {
+ /* Create string object */
+ *outbytes = PyBytes_FromStringAndSize(NULL, outsize);
+ if (*outbytes == NULL)
+ goto error;
+ out = PyBytes_AS_STRING(*outbytes);
+ }
+ else {
+ /* Extend string object */
+ Py_ssize_t n = PyBytes_Size(*outbytes);
+ if (n > PY_SSIZE_T_MAX - outsize) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ if (_PyBytes_Resize(outbytes, n + outsize) < 0)
+ goto error;
+ out = PyBytes_AS_STRING(*outbytes) + n;
+ }
+
+ /* Encode the string character per character */
+ while (in < endin)
+ {
+ if ((in + 2) <= endin
+ && 0xD800 <= in[0] && in[0] <= 0xDBFF
+ && 0xDC00 <= in[1] && in[1] <= 0xDFFF)
+ charsize = 2;
+ else
+ charsize = 1;
+
+ outsize = WideCharToMultiByte(code_page, flags,
+ in, charsize,
+ buffer, Py_ARRAY_LENGTH(buffer),
+ NULL, pusedDefaultChar);
+ if (outsize > 0) {
+ if (pusedDefaultChar == NULL || !(*pusedDefaultChar))
+ {
+ in += charsize;
+ memcpy(out, buffer, outsize);
+ out += outsize;
+ continue;
+ }
+ }
+ else if (GetLastError() != ERROR_NO_UNICODE_TRANSLATION) {
+ PyErr_SetFromWindowsErr(0);
+ goto error;
+ }
+
+ charsize = Py_MAX(charsize - 1, 1);
+ startpos = in - startin;
+ rep = unicode_encode_call_errorhandler(
+ errors, &errorHandler, encoding, reason,
+ startin, insize, &exc,
+ startpos, startpos + charsize, &newpos);
+ if (rep == NULL)
+ goto error;
+ in = startin + newpos;
+
+ if (PyBytes_Check(rep)) {
+ outsize = PyBytes_GET_SIZE(rep);
+ if (outsize != 1) {
+ Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
+ newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
+ if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
+ Py_DECREF(rep);
+ goto error;
+ }
+ out = PyBytes_AS_STRING(*outbytes) + offset;
+ }
+ memcpy(out, PyBytes_AS_STRING(rep), outsize);
+ out += outsize;
+ }
+ else {
+ Py_ssize_t i;
+ enum PyUnicode_Kind kind;
+ void *data;
+
+ if (PyUnicode_READY(rep) < 0) {
+ Py_DECREF(rep);
+ goto error;
+ }
+
+ outsize = PyUnicode_GET_LENGTH(rep);
+ if (outsize != 1) {
+ Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
+ newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
+ if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
+ Py_DECREF(rep);
+ goto error;
+ }
+ out = PyBytes_AS_STRING(*outbytes) + offset;
+ }
+ kind = PyUnicode_KIND(rep);
+ data = PyUnicode_DATA(rep);
+ for (i=0; i < outsize; i++) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ if (ch > 127) {
+ raise_encode_exception(&exc,
+ encoding,
+ startin, insize,
+ startpos, startpos + charsize,
+ "unable to encode error handler result to ASCII");
+ Py_DECREF(rep);
+ goto error;
+ }
+ *out = (unsigned char)ch;
+ out++;
+ }
+ }
+ Py_DECREF(rep);
+ }
+ /* write a NUL byte */
+ *out = 0;
+ outsize = out - PyBytes_AS_STRING(*outbytes);
+ assert(outsize <= PyBytes_GET_SIZE(*outbytes));
+ if (_PyBytes_Resize(outbytes, outsize) < 0)
+ goto error;
+ ret = 0;
+
+error:
+ Py_XDECREF(encoding_obj);
+ Py_XDECREF(errorHandler);
+ Py_XDECREF(exc);
+ return ret;
+}
+
+/*
+ * Encode a Unicode string to a Windows code page into a byte string.
+ *
+ * Returns consumed characters if succeed, or raise a WindowsError and returns
+ * -1 on other error.
+ */
+static int
+encode_code_page_chunk(UINT code_page, PyObject **outbytes,
+ const Py_UNICODE *p, int size,
+ const char* errors)
+{
+ int done;
+
+ if (size == 0) {
+ if (*outbytes == NULL) {
+ *outbytes = PyBytes_FromStringAndSize(NULL, 0);
+ if (*outbytes == NULL)
+ return -1;
+ }
+ return 0;
+ }
+
+ done = encode_code_page_strict(code_page, outbytes, p, size, errors);
+ if (done == -2)
+ done = encode_code_page_errors(code_page, outbytes, p, size, errors);
+ return done;
+}
+
+static PyObject *
+encode_code_page(int code_page,
+ const Py_UNICODE *p, Py_ssize_t size,
+ const char *errors)
+{
+ PyObject *outbytes = NULL;
int ret;
+ if (code_page < 0) {
+ PyErr_SetString(PyExc_ValueError, "invalid code page number");
+ return NULL;
+ }
+
#ifdef NEED_RETRY
retry:
if (size > INT_MAX)
- ret = encode_mbcs(&repr, p, INT_MAX, errors);
+ ret = encode_code_page_chunk(code_page, &outbytes, p, INT_MAX, errors);
else
#endif
- ret = encode_mbcs(&repr, p, (int)size, errors);
+ ret = encode_code_page_chunk(code_page, &outbytes, p, (int)size, errors);
if (ret < 0) {
- Py_XDECREF(repr);
+ Py_XDECREF(outbytes);
return NULL;
}
@@ -7164,7 +7603,28 @@
}
#endif
- return repr;
+ return outbytes;
+}
+
+PyObject *
+PyUnicode_EncodeMBCS(const Py_UNICODE *p,
+ Py_ssize_t size,
+ const char *errors)
+{
+ return encode_code_page(CP_ACP, p, size, errors);
+}
+
+PyObject *
+PyUnicode_EncodeCodePage(int code_page,
+ PyObject *unicode,
+ const char *errors)
+{
+ const Py_UNICODE *p;
+ Py_ssize_t size;
+ p = PyUnicode_AsUnicodeAndSize(unicode, &size);
+ if (p == NULL)
+ return NULL;
+ return encode_code_page(code_page, p, size, errors);
}
PyObject *
@@ -13434,7 +13894,7 @@
/* Initialize the Unicode implementation */
-void _PyUnicode_Init(void)
+int _PyUnicode_Init(void)
{
int i;
@@ -13467,6 +13927,15 @@
Py_ARRAY_LENGTH(linebreak));
PyType_Ready(&EncodingMapType);
+
+#ifdef HAVE_MBCS
+ winver.dwOSVersionInfoSize = sizeof(winver);
+ if (!GetVersionEx((OSVERSIONINFO*)&winver)) {
+ PyErr_SetFromWindowsErr(0);
+ return -1;
+ }
+#endif
+ return 0;
}
/* Finalize the Unicode implementation */
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index a6e7c46..0f2f050 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -67,7 +67,7 @@
static void call_py_exitfuncs(void);
static void wait_for_thread_shutdown(void);
static void call_ll_exitfuncs(void);
-extern void _PyUnicode_Init(void);
+extern int _PyUnicode_Init(void);
extern void _PyUnicode_Fini(void);
extern int _PyLong_Init(void);
extern void PyLong_Fini(void);
@@ -261,7 +261,8 @@
Py_FatalError("Py_Initialize: can't make modules_reloading dictionary");
/* Init Unicode implementation; relies on the codec registry */
- _PyUnicode_Init();
+ if (_PyUnicode_Init() < 0)
+ Py_FatalError("Py_Initialize: can't initialize unicode");
bimod = _PyBuiltin_Init();
if (bimod == NULL)