use Py_LOCAL also for string and unicode objects
diff --git a/Include/pyport.h b/Include/pyport.h
index 7d51d0a..9d46470 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -139,12 +139,17 @@
 
 /* PY_LOCAL can be used instead of static to get the fastest possible calling
  * convention for functions that are local to a given module.  It also enables
- * inlining, where suitable. */
+ * inlining, where suitable.
+ *
+ * NOTE: You can only use this for functions that are entirely local to a
+ * module; functions that are exported via method tables, callbacks, etc,
+ * should keep using static.
+ */
 
 #undef USE_INLINE /* XXX - set via configure? */
 
 #if defined(_MSC_VER)
- /* ignore warnings if the compiler decides not to inline a function */ 
+/* ignore warnings if the compiler decides not to inline a function */ 
 #pragma warning(disable: 4710)
 /* fastest possible local call under MSVC */
 #define Py_LOCAL(type) static __inline type __fastcall
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index b4c8950..b186594 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -5,18 +5,6 @@
 
 #include <ctype.h>
 
-#undef USE_INLINE /* XXX - set via configure? */
-
-#if defined(_MSC_VER) /* this is taken from _sre.c */
-#pragma warning(disable: 4710)
-/* fastest possible local call under MSVC */
-#define LOCAL(type) static __inline type __fastcall
-#elif defined(USE_INLINE)
-#define LOCAL(type) static inline type
-#else
-#define LOCAL(type) static type
-#endif
-
 #ifdef COUNT_ALLOCS
 int null_strings, one_strings;
 #endif
@@ -798,7 +786,7 @@
 #define FAST_COUNT 0
 #define FAST_SEARCH 1
 
-LOCAL(Py_ssize_t)
+Py_LOCAL(Py_ssize_t)
 fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode)
 {
 	long mask;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0dea75d..ab638350 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -49,18 +49,6 @@
 #include <windows.h>
 #endif
 
-#undef USE_INLINE /* XXX - set via configure? */
-
-#if defined(_MSC_VER) /* this is taken from _sre.c */
-#pragma warning(disable: 4710)
-/* fastest possible local call under MSVC */
-#define LOCAL(type) static __inline type __fastcall
-#elif defined(USE_INLINE)
-#define LOCAL(type) static inline type
-#else
-#define LOCAL(type) static type
-#endif
-
 /* Limit for the Unicode object free list */
 
 #define MAX_UNICODE_FREELIST_SIZE       1024
@@ -153,7 +141,7 @@
 #define BLOOM_LINEBREAK(ch)\
     (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK((ch)))
 
-LOCAL(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
+Py_LOCAL(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
 {
     /* calculate simple bloom-style bitmask for a given unicode string */
 
@@ -167,7 +155,7 @@
     return mask;
 }
 
-LOCAL(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen)
+Py_LOCAL(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen)
 {
     Py_ssize_t i;
 
@@ -2027,9 +2015,9 @@
 
 */
 
-LOCAL(const Py_UNICODE *) findchar(const Py_UNICODE *s,
-                                   Py_ssize_t size,
-                                   Py_UNICODE ch)
+Py_LOCAL(const Py_UNICODE *) findchar(const Py_UNICODE *s,
+                                      Py_ssize_t size,
+                                      Py_UNICODE ch)
 {
     /* like wcschr, but doesn't stop at NULL characters */
 
@@ -3880,7 +3868,7 @@
 #define FAST_COUNT 0
 #define FAST_SEARCH 1
 
-LOCAL(Py_ssize_t)
+Py_LOCAL(Py_ssize_t)
 fastsearch(Py_UNICODE* s, Py_ssize_t n, Py_UNICODE* p, Py_ssize_t m, int mode)
 {
     long mask;
@@ -3955,10 +3943,10 @@
     return count;
 }
 
-LOCAL(Py_ssize_t) count(PyUnicodeObject *self,
-		 Py_ssize_t start,
-		 Py_ssize_t end,
-		 PyUnicodeObject *substring)
+Py_LOCAL(Py_ssize_t) count(PyUnicodeObject *self,
+                           Py_ssize_t start,
+                           Py_ssize_t end,
+                           PyUnicodeObject *substring)
 {
     Py_ssize_t count = 0;