Buffered I/O: optimize lock taking in the common non-contended case.
diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c
index 040f3bf..9aa7d4b 100644
--- a/Modules/_io/bufferedio.c
+++ b/Modules/_io/bufferedio.c
@@ -260,9 +260,11 @@
 
 #ifdef WITH_THREAD
 #define ENTER_BUFFERED(self) \
-    Py_BEGIN_ALLOW_THREADS \
-    PyThread_acquire_lock(self->lock, 1); \
-    Py_END_ALLOW_THREADS
+    if (!PyThread_acquire_lock(self->lock, 0)) { \
+        Py_BEGIN_ALLOW_THREADS \
+        PyThread_acquire_lock(self->lock, 1); \
+        Py_END_ALLOW_THREADS \
+    }
 
 #define LEAVE_BUFFERED(self) \
     PyThread_release_lock(self->lock);