diff --git a/gyp/core.gypi b/gyp/core.gypi
index c74c487..84f2f51 100644
--- a/gyp/core.gypi
+++ b/gyp/core.gypi
@@ -293,7 +293,6 @@
         '<(skia_include_path)/core/SkTSearch.h',
         '<(skia_include_path)/core/SkTemplates.h',
         '<(skia_include_path)/core/SkThread.h',
-        '<(skia_include_path)/core/SkThread_platform.h',
         '<(skia_include_path)/core/SkTime.h',
         '<(skia_include_path)/core/SkTLazy.h',
         '<(skia_include_path)/core/SkTrace.h',
diff --git a/gyp/ports.gyp b/gyp/ports.gyp
index 26003de..a627910 100644
--- a/gyp/ports.gyp
+++ b/gyp/ports.gyp
@@ -23,6 +23,10 @@
         '../src/utils',
       ],
       'sources': [
+        '../src/ports/SkAtomics_android.h',
+        '../src/ports/SkAtomics_none.h',
+        '../src/ports/SkAtomics_sync.h',
+        '../src/ports/SkAtomics_win.h',
         '../src/ports/SkDebug_nacl.cpp',
         '../src/ports/SkDebug_stdio.cpp',
         '../src/ports/SkDebug_win.cpp',
@@ -34,14 +38,14 @@
 
         '../src/ports/SkGlobalInitialization_default.cpp',
         '../src/ports/SkMemory_malloc.cpp',
+        '../src/ports/SkMutex_none.h',
+        '../src/ports/SkMutex_pthread.h',
+        '../src/ports/SkMutex_win.h',
         '../src/ports/SkOSFile_posix.cpp',
         '../src/ports/SkOSFile_stdio.cpp',
         '../src/ports/SkOSFile_win.cpp',
         '../src/ports/SkDiscardableMemory_none.cpp',
         '../src/ports/SkPurgeableMemoryBlock_none.cpp',
-       #'../src/ports/SkThread_none.cpp',
-        '../src/ports/SkThread_pthread.cpp',
-        '../src/ports/SkThread_win.cpp',
         '../src/ports/SkTime_Unix.cpp',
         '../src/ports/SkTime_win.cpp',
        #'../src/ports/SkTLS_none.cpp',
@@ -121,7 +125,6 @@
           'sources!': [ # these are used everywhere but windows
             '../src/ports/SkDebug_stdio.cpp',
             '../src/ports/SkOSFile_posix.cpp',
-            '../src/ports/SkThread_pthread.cpp',
             '../src/ports/SkTime_Unix.cpp',
             '../src/ports/SkTLS_pthread.cpp',
           ],
@@ -145,7 +148,6 @@
             '../src/ports/SkFontMgr_default_gdi.cpp',
             '../src/ports/SkFontMgr_default_dw.cpp',
             '../src/ports/SkOSFile_win.cpp',
-            '../src/ports/SkThread_win.cpp',
             '../src/ports/SkTime_win.cpp',
             '../src/ports/SkTLS_win.cpp',
           ],
diff --git a/gyp/public_headers.gypi b/gyp/public_headers.gypi
index b8c5016..a9ef4dd 100644
--- a/gyp/public_headers.gypi
+++ b/gyp/public_headers.gypi
@@ -227,7 +227,6 @@
       'core/SkChecksum.h',
       'core/SkMath.h',
       'core/SkDrawLooper.h',
-      'core/SkThread_platform.h',
       'core/SkFlattenableBuffers.h',
       'core/SkTemplates.h',
       'core/SkMask.h',
diff --git a/include/config/SkUserConfig.h b/include/config/SkUserConfig.h
index 72994e4..534c79d 100644
--- a/include/config/SkUserConfig.h
+++ b/include/config/SkUserConfig.h
@@ -186,4 +186,13 @@
  */
 //#define SK_PDF_USE_PATHOPS
 
+/* Skia uses these defines as the target of include preprocessor directives.
+ * The header files pointed to by these defines provide declarations and
+ * possibly inline implementations of threading primitives.
+ *
+ * See SkThread.h for documentation on what these includes must contain.
+ */
+//#define SK_ATOMICS_PLATFORM_H "SkAtomics_xxx.h"
+//#define SK_MUTEX_PLATFORM_H "SkMutex_xxx.h"
+
 #endif
diff --git a/include/core/SkInstCnt.h b/include/core/SkInstCnt.h
index e38c42d..89bbfa1 100644
--- a/include/core/SkInstCnt.h
+++ b/include/core/SkInstCnt.h
@@ -21,7 +21,7 @@
 
 #if SK_ENABLE_INST_COUNT
 #include "SkTArray.h"
-#include "SkThread_platform.h"
+#include "SkThread.h"
 
 extern bool gPrintInstCount;
 
diff --git a/include/core/SkPostConfig.h b/include/core/SkPostConfig.h
index 0d904ea..323d1e8 100644
--- a/include/core/SkPostConfig.h
+++ b/include/core/SkPostConfig.h
@@ -371,4 +371,24 @@
 #  define SK_ALLOW_STATIC_GLOBAL_INITIALIZERS 1
 #endif
 
+//////////////////////////////////////////////////////////////////////
+
+#ifndef SK_ATOMICS_PLATFORM_H
+#  if defined(SK_BUILD_FOR_WIN)
+#    define SK_ATOMICS_PLATFORM_H "../../src/ports/SkAtomics_win.h"
+#  elif defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+#    define SK_ATOMICS_PLATFORM_H "../../src/ports/SkAtomics_android.h"
+#  else
+#    define SK_ATOMICS_PLATFORM_H "../../src/ports/SkAtomics_sync.h"
+#  endif
+#endif
+
+#ifndef SK_MUTEX_PLATFORM_H
+#  if defined(SK_BUILD_FOR_WIN)
+#    define SK_MUTEX_PLATFORM_H "../../src/ports/SkMutex_win.h"
+#  else
+#    define SK_MUTEX_PLATFORM_H "../../src/ports/SkMutex_pthread.h"
+#  endif
+#endif
+
 #endif // SkPostConfig_DEFINED
diff --git a/include/core/SkRefCnt.h b/include/core/SkRefCnt.h
index b010faf..2859192 100644
--- a/include/core/SkRefCnt.h
+++ b/include/core/SkRefCnt.h
@@ -50,7 +50,7 @@
     bool unique() const {
         bool const unique = (1 == fRefCnt);
         if (unique) {
-            // Aquire barrier (L/SL), if not provided by load of fRefCnt.
+            // Acquire barrier (L/SL), if not provided by load of fRefCnt.
             // Prevents user's 'unique' code from happening before decrements.
             //TODO: issue the barrier.
         }
@@ -72,9 +72,9 @@
         SkASSERT(fRefCnt > 0);
         // Release barrier (SL/S), if not provided below.
         if (sk_atomic_dec(&fRefCnt) == 1) {
-            // Aquire barrier (L/SL), if not provided above.
+            // Acquire barrier (L/SL), if not provided above.
             // Prevents code in dispose from happening before the decrement.
-            sk_membar_aquire__after_atomic_dec();
+            sk_membar_acquire__after_atomic_dec();
             internal_dispose();
         }
     }
diff --git a/include/core/SkThread.h b/include/core/SkThread.h
index 487c2bd..412ace3 100644
--- a/include/core/SkThread.h
+++ b/include/core/SkThread.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2006 The Android Open Source Project
  *
@@ -6,30 +5,67 @@
  * found in the LICENSE file.
  */
 
-
 #ifndef SkThread_DEFINED
 #define SkThread_DEFINED
 
 #include "SkTypes.h"
-#include "SkThread_platform.h"
 
-/****** SkThread_platform needs to define the following...
+// SK_ATOMICS_PLATFORM_H must provide inline implementations for the following declarations.
 
-int32_t sk_atomic_inc(int32_t*);
-int32_t sk_atomic_add(int32_t*, int32_t);
-int32_t sk_atomic_dec(int32_t*);
-int32_t sk_atomic_conditional_inc(int32_t*);
+/** Atomically adds one to the int referenced by addr and returns the previous value.
+ *  No additional memory barrier is required; this must act as a compiler barrier.
+ */
+static int32_t sk_atomic_inc(int32_t* addr);
 
-class SkMutex {
+/** Atomically adds inc to the int referenced by addr and returns the previous value.
+ *  No additional memory barrier is required; this must act as a compiler barrier.
+ */
+static int32_t sk_atomic_add(int32_t* addr, int32_t inc);
+
+/** Atomically subtracts one from the int referenced by addr and returns the previous value.
+ *  This must act as a release (SL/S) memory barrier and as a compiler barrier.
+ */
+static int32_t sk_atomic_dec(int32_t* addr);
+
+/** Atomically adds one to the int referenced by addr iff the referenced int was not 0
+ *  and returns the previous value.
+ *  No additional memory barrier is required; this must act as a compiler barrier.
+ */
+static int32_t sk_atomic_conditional_inc(int32_t* addr);
+
+/** If sk_atomic_dec does not act as an acquire (L/SL) barrier,
+ *  this must act as an acquire (L/SL) memory barrier and as a compiler barrier.
+ */
+static void sk_membar_acquire__after_atomic_dec();
+
+/** If sk_atomic_conditional_inc does not act as an acquire (L/SL) barrier,
+ *  this must act as an acquire (L/SL) memory barrier and as a compiler barrier.
+ */
+static void sk_membar_acquire__after_atomic_conditional_inc();
+
+#include SK_ATOMICS_PLATFORM_H
+
+
+/** SK_MUTEX_PLATFORM_H must provide the following (or equivalent) declarations.
+
+class SkBaseMutex {
+public:
+    void acquire();
+    void release();
+};
+
+class SkMutex : SkBaseMutex {
 public:
     SkMutex();
     ~SkMutex();
-
-    void    acquire();
-    void    release();
 };
 
-****************/
+#define SK_DECLARE_STATIC_MUTEX(name) static SkBaseMutex name = ...
+#define SK_DECLARE_GLOBAL_MUTEX(name) SkBaseMutex name = ...
+*/
+
+#include SK_MUTEX_PLATFORM_H
+
 
 class SkAutoMutexAcquire : SkNoncopyable {
 public:
@@ -38,22 +74,20 @@
         mutex.acquire();
     }
 
-    SkAutoMutexAcquire(SkBaseMutex* mutex) : fMutex(mutex) {
+    explicit SkAutoMutexAcquire(SkBaseMutex* mutex) : fMutex(mutex) {
         if (mutex) {
             mutex->acquire();
         }
     }
 
-    /** If the mutex has not been release, release it now.
-    */
+    /** If the mutex has not been released, release it now. */
     ~SkAutoMutexAcquire() {
         if (fMutex) {
             fMutex->release();
         }
     }
 
-    /** If the mutex has not been release, release it now.
-    */
+    /** If the mutex has not been released, release it now. */
     void release() {
         if (fMutex) {
             fMutex->release();
diff --git a/include/core/SkThread_platform.h b/include/core/SkThread_platform.h
deleted file mode 100644
index 7df778c..0000000
--- a/include/core/SkThread_platform.h
+++ /dev/null
@@ -1,194 +0,0 @@
-
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-
-#ifndef SkThread_platform_DEFINED
-#define SkThread_platform_DEFINED
-
-#if defined(SK_BUILD_FOR_ANDROID)
-
-#if !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
-
-#include <stdint.h>
-
-/* Just use the GCC atomic intrinsics. They're supported by the NDK toolchain,
- * have reasonable performance, and provide full memory barriers
- */
-static inline __attribute__((always_inline)) int32_t sk_atomic_inc(int32_t *addr) {
-    return __sync_fetch_and_add(addr, 1);
-}
-
-static inline __attribute__((always_inline)) int32_t sk_atomic_add(int32_t *addr, int32_t inc) {
-    return __sync_fetch_and_add(addr, inc);
-}
-
-static inline __attribute__((always_inline)) int32_t sk_atomic_dec(int32_t *addr) {
-    return __sync_fetch_and_add(addr, -1);
-}
-static inline __attribute__((always_inline)) void sk_membar_aquire__after_atomic_dec() { }
-
-static inline __attribute__((always_inline)) int32_t sk_atomic_conditional_inc(int32_t* addr) {
-    int32_t value = *addr;
-
-    while (true) {
-        if (value == 0) {
-            return 0;
-        }
-
-        int32_t before = __sync_val_compare_and_swap(addr, value, value + 1);
-
-        if (before == value) {
-            return value;
-        } else {
-            value = before;
-        }
-    }
-}
-static inline __attribute__((always_inline)) void sk_membar_aquire__after_atomic_conditional_inc() { }
-
-#else // SK_BUILD_FOR_ANDROID_FRAMEWORK
-
-/* The platform atomics operations are slightly more efficient than the
- * GCC built-ins, so use them.
- */
-#include <utils/Atomic.h>
-
-#define sk_atomic_inc(addr)         android_atomic_inc(addr)
-#define sk_atomic_add(addr, inc)    android_atomic_add(inc, addr)
-#define sk_atomic_dec(addr)         android_atomic_dec(addr)
-
-static inline __attribute__((always_inline)) void sk_membar_aquire__after_atomic_dec() {
-    //HACK: Android is actually using full memory barriers.
-    //      Should this change, uncomment below.
-    //int dummy;
-    //android_atomic_aquire_store(0, &dummy);
-}
-static inline __attribute__((always_inline)) int32_t sk_atomic_conditional_inc(int32_t* addr) {
-    while (true) {
-        int32_t value = *addr;
-        if (value == 0) {
-            return 0;
-        }
-        if (0 == android_atomic_release_cas(value, value + 1, addr)) {
-            return value;
-        }
-    }
-}
-static inline __attribute__((always_inline)) void sk_membar_aquire__after_atomic_conditional_inc() {
-    //HACK: Android is actually using full memory barriers.
-    //      Should this change, uncomment below.
-    //int dummy;
-    //android_atomic_aquire_store(0, &dummy);
-}
-
-#endif // SK_BUILD_FOR_ANDROID_FRAMEWORK
-
-#else  // !SK_BUILD_FOR_ANDROID
-
-/** Implemented by the porting layer, this function adds one to the int
-    specified by the address (in a thread-safe manner), and returns the
-    previous value.
-    No additional memory barrier is required.
-    This must act as a compiler barrier.
-*/
-SK_API int32_t sk_atomic_inc(int32_t* addr);
-
-/** Implemented by the porting layer, this function adds inc to the int
-    specified by the address (in a thread-safe manner), and returns the
-    previous value.
-    No additional memory barrier is required.
-    This must act as a compiler barrier.
- */
-SK_API int32_t sk_atomic_add(int32_t* addr, int32_t inc);
-
-/** Implemented by the porting layer, this function subtracts one from the int
-    specified by the address (in a thread-safe manner), and returns the
-    previous value.
-    Expected to act as a release (SL/S) memory barrier and a compiler barrier.
-*/
-SK_API int32_t sk_atomic_dec(int32_t* addr);
-/** If sk_atomic_dec does not act as an aquire (L/SL) barrier, this is expected
-    to act as an aquire (L/SL) memory barrier and as a compiler barrier.
-*/
-SK_API void sk_membar_aquire__after_atomic_dec();
-
-/** Implemented by the porting layer, this function adds one to the int
-    specified by the address iff the int specified by the address is not zero
-    (in a thread-safe manner), and returns the previous value.
-    No additional memory barrier is required.
-    This must act as a compiler barrier.
-*/
-SK_API int32_t sk_atomic_conditional_inc(int32_t*);
-/** If sk_atomic_conditional_inc does not act as an aquire (L/SL) barrier, this
-    is expected to act as an aquire (L/SL) memory barrier and as a compiler
-    barrier.
-*/
-SK_API void sk_membar_aquire__after_atomic_conditional_inc();
-
-#endif // !SK_BUILD_FOR_ANDROID
-
-#ifdef SK_USE_POSIX_THREADS
-
-#include <pthread.h>
-
-// A SkBaseMutex is a POD structure that can be directly initialized
-// at declaration time with SK_DECLARE_STATIC/GLOBAL_MUTEX. This avoids the
-// generation of a static initializer in the final machine code (and
-// a corresponding static finalizer).
-//
-struct SkBaseMutex {
-    void    acquire() { pthread_mutex_lock(&fMutex); }
-    void    release() { pthread_mutex_unlock(&fMutex); }
-    pthread_mutex_t  fMutex;
-};
-
-// Using POD-style initialization prevents the generation of a static initializer
-// and keeps the acquire() implementation small and fast.
-#define SK_DECLARE_STATIC_MUTEX(name)   static SkBaseMutex  name = { PTHREAD_MUTEX_INITIALIZER }
-
-// Special case used when the static mutex must be available globally.
-#define SK_DECLARE_GLOBAL_MUTEX(name)   SkBaseMutex  name = { PTHREAD_MUTEX_INITIALIZER }
-
-// A normal mutex that requires to be initialized through normal C++ construction,
-// i.e. when it's a member of another class, or allocated on the heap.
-class SK_API SkMutex : public SkBaseMutex, SkNoncopyable {
-public:
-    SkMutex();
-    ~SkMutex();
-};
-
-#else // !SK_USE_POSIX_THREADS
-
-// In the generic case, SkBaseMutex and SkMutex are the same thing, and we
-// can't easily get rid of static initializers.
-//
-class SK_API SkMutex : SkNoncopyable {
-public:
-    SkMutex();
-    ~SkMutex();
-
-    void    acquire();
-    void    release();
-
-private:
-    bool fIsGlobal;
-    enum {
-        kStorageIntCount = 64
-    };
-    uint32_t    fStorage[kStorageIntCount];
-};
-
-typedef SkMutex SkBaseMutex;
-
-#define SK_DECLARE_STATIC_MUTEX(name)           static SkBaseMutex  name
-#define SK_DECLARE_GLOBAL_MUTEX(name)           SkBaseMutex  name
-
-#endif // !SK_USE_POSIX_THREADS
-
-
-#endif
diff --git a/include/core/SkWeakRefCnt.h b/include/core/SkWeakRefCnt.h
index e2a7308..210dcc9 100644
--- a/include/core/SkWeakRefCnt.h
+++ b/include/core/SkWeakRefCnt.h
@@ -89,9 +89,9 @@
     */
     bool SK_WARN_UNUSED_RESULT try_ref() const {
         if (sk_atomic_conditional_inc(&fRefCnt) != 0) {
-            // Aquire barrier (L/SL), if not provided above.
+            // Acquire barrier (L/SL), if not provided above.
             // Prevents subsequent code from happening before the increment.
-            sk_membar_aquire__after_atomic_conditional_inc();
+            sk_membar_acquire__after_atomic_conditional_inc();
             return true;
         }
         return false;
@@ -115,9 +115,9 @@
         SkASSERT(fWeakCnt > 0);
         // Release barrier (SL/S), if not provided below.
         if (sk_atomic_dec(&fWeakCnt) == 1) {
-            // Aquire barrier (L/SL), if not provided above.
+            // Acquire barrier (L/SL), if not provided above.
             // Prevents code in destructor from happening before the decrement.
-            sk_membar_aquire__after_atomic_dec();
+            sk_membar_acquire__after_atomic_dec();
 #ifdef SK_DEBUG
             // so our destructor won't complain
             fWeakCnt = 1;
diff --git a/include/gpu/GrBackendEffectFactory.h b/include/gpu/GrBackendEffectFactory.h
index b3f52fb..d115a54 100644
--- a/include/gpu/GrBackendEffectFactory.h
+++ b/include/gpu/GrBackendEffectFactory.h
@@ -10,7 +10,7 @@
 
 #include "GrTypes.h"
 #include "SkTemplates.h"
-#include "SkThread_platform.h"
+#include "SkThread.h"
 #include "SkTypes.h"
 
 /** Given a GrEffect of a particular type, creates the corresponding graphics-backend-specific
diff --git a/src/ports/SkAtomics_android.h b/src/ports/SkAtomics_android.h
new file mode 100644
index 0000000..5cc8ba5
--- /dev/null
+++ b/src/ports/SkAtomics_android.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkAtomics_android_DEFINED
+#define SkAtomics_android_DEFINED
+
+/** Android framework atomics. */
+
+#include <cutils/atomic.h>
+#include <stdint.h>
+
+#define sk_atomic_inc(addr)         android_atomic_inc(addr)
+#define sk_atomic_add(addr, inc)    android_atomic_add(inc, addr)
+#define sk_atomic_dec(addr)         android_atomic_dec(addr)
+
+static inline __attribute__((always_inline)) void sk_membar_acquire__after_atomic_dec() {
+    //HACK: Android is actually using full memory barriers.
+    //      Should this change, uncomment below.
+    //int dummy;
+    //android_atomic_acquire_store(0, &dummy);
+}
+
+static inline __attribute__((always_inline)) int32_t sk_atomic_conditional_inc(int32_t* addr) {
+    while (true) {
+        int32_t value = *addr;
+        if (value == 0) {
+            return 0;
+        }
+        if (0 == android_atomic_release_cas(value, value + 1, addr)) {
+            return value;
+        }
+    }
+}
+
+static inline __attribute__((always_inline)) void sk_membar_acquire__after_atomic_conditional_inc() {
+    //HACK: Android is actually using full memory barriers.
+    //      Should this change, uncomment below.
+    //int dummy;
+    //android_atomic_acquire_store(0, &dummy);
+}
+
+#endif
diff --git a/src/ports/SkAtomics_none.h b/src/ports/SkAtomics_none.h
new file mode 100644
index 0000000..345cb09
--- /dev/null
+++ b/src/ports/SkAtomics_none.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkAtomics_none_DEFINED
+#define SkAtomics_none_DEFINED
+
+/** Non-atomic atomics for uniprocessor systems. */
+
+#include <stdint.h>
+
+static inline int32_t sk_atomic_inc(int32_t* addr) {
+    int32_t value = *addr;
+    *addr = value + 1;
+    return value;
+}
+
+static inline int32_t sk_atomic_add(int32_t* addr, int32_t inc) {
+    int32_t value = *addr;
+    *addr = value + inc;
+    return value;
+}
+
+static inline int32_t sk_atomic_dec(int32_t* addr) {
+    int32_t value = *addr;
+    *addr = value - 1;
+    return value;
+}
+
+static inline void sk_membar_acquire__after_atomic_dec() { }
+
+static inline int32_t sk_atomic_conditional_inc(int32_t* addr) {
+    int32_t value = *addr;
+    if (value != 0) ++*addr;
+    return value;
+}
+
+static inline void sk_membar_acquire__after_atomic_conditional_inc() { }
+
+#endif
diff --git a/src/ports/SkAtomics_sync.h b/src/ports/SkAtomics_sync.h
new file mode 100644
index 0000000..fccd4f5
--- /dev/null
+++ b/src/ports/SkAtomics_sync.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkAtomics_sync_DEFINED
+#define SkAtomics_sync_DEFINED
+
+/** GCC/Clang __sync based atomics. */
+
+#include <stdint.h>
+
+static inline __attribute__((always_inline)) int32_t sk_atomic_inc(int32_t* addr) {
+    return __sync_fetch_and_add(addr, 1);
+}
+
+static inline __attribute__((always_inline)) int32_t sk_atomic_add(int32_t* addr, int32_t inc) {
+    return __sync_fetch_and_add(addr, inc);
+}
+
+static inline __attribute__((always_inline)) int32_t sk_atomic_dec(int32_t* addr) {
+    return __sync_fetch_and_add(addr, -1);
+}
+
+static inline __attribute__((always_inline)) void sk_membar_acquire__after_atomic_dec() { }
+
+static inline __attribute__((always_inline)) int32_t sk_atomic_conditional_inc(int32_t* addr) {
+    int32_t value = *addr;
+
+    while (true) {
+        if (value == 0) {
+            return 0;
+        }
+
+        int32_t before = __sync_val_compare_and_swap(addr, value, value + 1);
+
+        if (before == value) {
+            return value;
+        } else {
+            value = before;
+        }
+    }
+}
+
+static inline __attribute__((always_inline)) void sk_membar_acquire__after_atomic_conditional_inc() { }
+
+#endif
diff --git a/src/ports/SkAtomics_win.h b/src/ports/SkAtomics_win.h
new file mode 100644
index 0000000..4fb5e17
--- /dev/null
+++ b/src/ports/SkAtomics_win.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkAtomics_win_DEFINED
+#define SkAtomics_win_DEFINED
+
+/** Windows Interlocked atomics. */
+
+#include <intrin.h>
+#include <stdint.h>
+
+//MSDN says in order to declare an interlocked function for use as an
+//intrinsic, include intrin.h and put the function in a #pragma intrinsic
+//directive.
+//The pragma appears to be unnecessary, but doesn't hurt.
+#pragma intrinsic(_InterlockedIncrement, _InterlockedExchangeAdd, _InterlockedDecrement)
+#pragma intrinsic(_InterlockedCompareExchange)
+
+static inline int32_t sk_atomic_inc(int32_t* addr) {
+    // InterlockedIncrement returns the new value, we want to return the old.
+    return _InterlockedIncrement(reinterpret_cast<long*>(addr)) - 1;
+}
+
+static inline int32_t sk_atomic_add(int32_t* addr, int32_t inc) {
+    return _InterlockedExchangeAdd(reinterpret_cast<long*>(addr), static_cast<long>(inc));
+}
+
+static inline int32_t sk_atomic_dec(int32_t* addr) {
+    // InterlockedDecrement returns the new value, we want to return the old.
+    return _InterlockedDecrement(reinterpret_cast<long*>(addr)) + 1;
+}
+
+static inline void sk_membar_acquire__after_atomic_dec() { }
+
+static inline int32_t sk_atomic_conditional_inc(int32_t* addr) {
+    long value = *addr;
+    while (true) {
+        if (value == 0) {
+            return 0;
+        }
+
+        long before = _InterlockedCompareExchange(reinterpret_cast<long*>(addr), value + 1, value);
+
+        if (before == value) {
+            return value;
+        } else {
+            value = before;
+        }
+    }
+}
+
+static inline void sk_membar_acquire__after_atomic_conditional_inc() { }
+
+#endif
diff --git a/src/ports/SkMutex_none.h b/src/ports/SkMutex_none.h
new file mode 100644
index 0000000..568ec83
--- /dev/null
+++ b/src/ports/SkMutex_none.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMutex_none_DEFINED
+#define SkMutex_none_DEFINED
+
+/** Non-mutex mutex for uniprocessor systems. */
+
+struct SkBaseMutex {
+    void acquire() { }
+    void release() { }
+};
+
+class SkMutex : public SkBaseMutex {
+public:
+    SkMutex() { }
+    ~SkMutex() { }
+
+private:
+    SkMutex(const SkMutex&);
+    SkMutex& operator=(const SkMutex&);
+};
+
+// Using POD-style initialization prevents the generation of a static initializer.
+#define SK_DECLARE_STATIC_MUTEX(name) static SkBaseMutex name = { }
+
+// Special case used when the static mutex must be available globally.
+#define SK_DECLARE_GLOBAL_MUTEX(name) SkBaseMutex name = { }
+
+#endif
diff --git a/src/ports/SkMutex_pthread.h b/src/ports/SkMutex_pthread.h
new file mode 100644
index 0000000..d9f1ae3
--- /dev/null
+++ b/src/ports/SkMutex_pthread.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMutex_pthread_DEFINED
+#define SkMutex_pthread_DEFINED
+
+/** Posix pthread_mutex based mutex. */
+
+#ifdef SK_DEBUG_PTHREAD_MUTEX
+#include "SkTypes.h"
+#define SkDEBUGCODE_PTHREAD_MUTEX(code) code
+#else
+#define SkDEBUGCODE_PTHREAD_MUTEX(code)
+#ifndef SkDebugf
+    void SkDebugf(const char format[], ...);
+#endif
+#endif
+
+#include <errno.h>
+#include <pthread.h>
+
+// A SkBaseMutex is a POD structure that can be directly initialized
+// at declaration time with SK_DECLARE_STATIC/GLOBAL_MUTEX. This avoids the
+// generation of a static initializer in the final machine code (and
+// a corresponding static finalizer).
+struct SkBaseMutex {
+    void acquire() { pthread_mutex_lock(&fMutex); }
+    void release() { pthread_mutex_unlock(&fMutex); }
+    pthread_mutex_t fMutex;
+};
+
+// A normal mutex that requires to be initialized through normal C++ construction,
+// i.e. when it's a member of another class, or allocated on the heap.
+class SkMutex : public SkBaseMutex {
+public:
+    SkMutex() {
+        SkDEBUGCODE_PTHREAD_MUTEX(int status = )pthread_mutex_init(&fMutex, NULL);
+        SkDEBUGCODE_PTHREAD_MUTEX(
+            if (status != 0) {
+                print_pthread_error(status);
+                SkASSERT(0 == status);
+            }
+        )
+    }
+
+    ~SkMutex() {
+        SkDEBUGCODE_PTHREAD_MUTEX(int status = )pthread_mutex_destroy(&fMutex);
+        SkDEBUGCODE_PTHREAD_MUTEX(
+            if (status != 0) {
+                print_pthread_error(status);
+                SkASSERT(0 == status);
+            }
+        )
+    }
+
+private:
+    SkMutex(const SkMutex&);
+    SkMutex& operator=(const SkMutex&);
+
+    static void print_pthread_error(int status) {
+        switch (status) {
+        case 0: // success
+            break;
+        case EINVAL:
+            SkDebugf("pthread error [%d] EINVAL\n", status);
+            break;
+        case EBUSY:
+            SkDebugf("pthread error [%d] EBUSY\n", status);
+            break;
+        default:
+            SkDebugf("pthread error [%d] unknown\n", status);
+            break;
+        }
+    }
+};
+
+// Using POD-style initialization prevents the generation of a static initializer.
+#define SK_DECLARE_STATIC_MUTEX(name) static SkBaseMutex name = { PTHREAD_MUTEX_INITIALIZER }
+
+// Special case used when the static mutex must be available globally.
+#define SK_DECLARE_GLOBAL_MUTEX(name) SkBaseMutex name = { PTHREAD_MUTEX_INITIALIZER }
+
+#endif
diff --git a/src/ports/SkMutex_win.h b/src/ports/SkMutex_win.h
new file mode 100644
index 0000000..f38e504
--- /dev/null
+++ b/src/ports/SkMutex_win.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMutex_win_DEFINED
+#define SkMutex_win_DEFINED
+
+/** Windows CriticalSection based mutex. */
+
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#  define WIN32_IS_MEAN_WAS_LOCALLY_DEFINED
+#endif
+#ifndef NOMINMAX
+#  define NOMINMAX
+#  define NOMINMAX_WAS_LOCALLY_DEFINED
+#endif
+#
+#include <windows.h>
+#
+#ifdef WIN32_IS_MEAN_WAS_LOCALLY_DEFINED
+#  undef WIN32_IS_MEAN_WAS_LOCALLY_DEFINED
+#  undef WIN32_LEAN_AND_MEAN
+#endif
+#ifdef NOMINMAX_WAS_LOCALLY_DEFINED
+#  undef NOMINMAX_WAS_LOCALLY_DEFINED
+#  undef NOMINMAX
+#endif
+
+// TODO: this exists because SK_DECLARE_STATIC_ONCE in methods is currently
+// relying on a compiler bug which allows the '=' to work.
+// All use of SK_DECLARE_STATIC_ONCE in methods is unsafe, and must be removed.
+// To find these cases, make SkMutex's copy and assignement private directly.
+class SkNoncopyableMutex {
+public:
+    SkNoncopyableMutex() { }
+
+private:
+    SkNoncopyableMutex(const SkNoncopyableMutex&);
+    SkNoncopyableMutex& operator=(const SkNoncopyableMutex&);
+};
+
+// On Windows, SkBaseMutex and SkMutex are the same thing,
+// we can't easily get rid of static initializers.
+class SkMutex : SkNoncopyableMutex {
+public:
+    SkMutex() {
+        InitializeCriticalSection(&fStorage);
+    }
+
+    ~SkMutex() {
+        DeleteCriticalSection(&fStorage);
+    }
+
+    void acquire() {
+        EnterCriticalSection(&fStorage);
+    }
+
+    void release() {
+        LeaveCriticalSection(&fStorage);
+    }
+
+private:
+    CRITICAL_SECTION fStorage;
+};
+
+typedef SkMutex SkBaseMutex;
+
+// Windows currently provides no documented means of POD initializing a CRITICAL_SECTION.
+#define SK_DECLARE_STATIC_MUTEX(name) static SkBaseMutex name
+#define SK_DECLARE_GLOBAL_MUTEX(name) SkBaseMutex name
+
+#endif
diff --git a/src/ports/SkThread_none.cpp b/src/ports/SkThread_none.cpp
deleted file mode 100644
index 638d7d0..0000000
--- a/src/ports/SkThread_none.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkThread.h"
-
-int32_t sk_atomic_inc(int32_t* addr) {
-    int32_t value = *addr;
-    *addr = value + 1;
-    return value;
-}
-
-int32_t sk_atomic_add(int32_t* addr, int32_t inc) {
-    int32_t value = *addr;
-    *addr = value + inc;
-    return value;
-}
-
-int32_t sk_atomic_dec(int32_t* addr) {
-    int32_t value = *addr;
-    *addr = value - 1;
-    return value;
-}
-void sk_membar_aquire__after_atomic_dec() { }
-
-int32_t sk_atomic_conditional_inc(int32_t* addr) {
-    int32_t value = *addr;
-    if (value != 0) ++*addr;
-    return value;
-}
-void sk_membar_aquire__after_atomic_conditional_inc() { }
-
-SkMutex::SkMutex() {}
-
-SkMutex::~SkMutex() {}
-
-#ifndef SK_USE_POSIX_THREADS
-void SkMutex::acquire() {}
-void SkMutex::release() {}
-#endif
diff --git a/src/ports/SkThread_pthread.cpp b/src/ports/SkThread_pthread.cpp
deleted file mode 100644
index a78c7b2..0000000
--- a/src/ports/SkThread_pthread.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-
-/*
- * Copyright 2011 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-#include "SkThread.h"
-
-#include <pthread.h>
-#include <errno.h>
-
-#ifndef SK_BUILD_FOR_ANDROID
-
-/**
- We prefer the GCC intrinsic implementation of the atomic operations over the
- SkMutex-based implementation. The SkMutex version suffers from static
- destructor ordering problems.
- Note clang also defines the GCC version macros and implements the intrinsics.
- TODO: Verify that gcc-style __sync_* intrinsics work on ARM
- According to this the intrinsics are supported on ARM in LLVM 2.7+
- http://llvm.org/releases/2.7/docs/ReleaseNotes.html
-*/
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4
-    #if (defined(__x86_64) || defined(__i386__))
-        #define GCC_INTRINSIC
-    #endif
-#endif
-
-#if defined(GCC_INTRINSIC)
-
-int32_t sk_atomic_inc(int32_t* addr)
-{
-    return __sync_fetch_and_add(addr, 1);
-}
-
-int32_t sk_atomic_add(int32_t* addr, int32_t inc)
-{
-    return __sync_fetch_and_add(addr, inc);
-}
-
-int32_t sk_atomic_dec(int32_t* addr)
-{
-    return __sync_fetch_and_add(addr, -1);
-}
-void sk_membar_aquire__after_atomic_dec() { }
-
-int32_t sk_atomic_conditional_inc(int32_t* addr)
-{
-    int32_t value = *addr;
-
-    while (true) {
-        if (value == 0) {
-            return 0;
-        }
-
-        int32_t before = __sync_val_compare_and_swap(addr, value, value + 1);
-
-        if (before == value) {
-            return value;
-        } else {
-            value = before;
-        }
-    }
-}
-void sk_membar_aquire__after_atomic_conditional_inc() { }
-
-#else
-
-SkMutex gAtomicMutex;
-
-int32_t sk_atomic_inc(int32_t* addr)
-{
-    SkAutoMutexAcquire ac(gAtomicMutex);
-
-    int32_t value = *addr;
-    *addr = value + 1;
-    return value;
-}
-
-int32_t sk_atomic_add(int32_t* addr, int32_t inc)
-{
-    SkAutoMutexAcquire ac(gAtomicMutex);
-
-    int32_t value = *addr;
-    *addr = value + inc;
-    return value;
-}
-
-int32_t sk_atomic_dec(int32_t* addr)
-{
-    SkAutoMutexAcquire ac(gAtomicMutex);
-
-    int32_t value = *addr;
-    *addr = value - 1;
-    return value;
-}
-void sk_membar_aquire__after_atomic_dec() { }
-
-int32_t sk_atomic_conditional_inc(int32_t* addr)
-{
-    SkAutoMutexAcquire ac(gAtomicMutex);
-
-    int32_t value = *addr;
-    if (value != 0) ++*addr;
-    return value;
-}
-void sk_membar_aquire__after_atomic_conditional_inc() { }
-
-#endif
-
-#endif // SK_BUILD_FOR_ANDROID
-
-//////////////////////////////////////////////////////////////////////////////
-
-static void print_pthread_error(int status) {
-    switch (status) {
-    case 0: // success
-        break;
-    case EINVAL:
-        SkDebugf("pthread error [%d] EINVAL\n", status);
-        break;
-    case EBUSY:
-        SkDebugf("pthread error [%d] EBUSY\n", status);
-        break;
-    default:
-        SkDebugf("pthread error [%d] unknown\n", status);
-        break;
-    }
-}
-
-#ifdef SK_USE_POSIX_THREADS
-
-SkMutex::SkMutex() {
-    int status;
-
-    status = pthread_mutex_init(&fMutex, NULL);
-    if (status != 0) {
-        print_pthread_error(status);
-        SkASSERT(0 == status);
-    }
-}
-
-SkMutex::~SkMutex() {
-    int status = pthread_mutex_destroy(&fMutex);
-
-    // only report errors on non-global mutexes
-    if (status != 0) {
-        print_pthread_error(status);
-        SkASSERT(0 == status);
-    }
-}
-
-#else // !SK_USE_POSIX_THREADS
-
-SkMutex::SkMutex() {
-    if (sizeof(pthread_mutex_t) > sizeof(fStorage)) {
-        SkDEBUGF(("pthread mutex size = %d\n", sizeof(pthread_mutex_t)));
-        SkDEBUGFAIL("mutex storage is too small");
-    }
-
-    int status;
-    pthread_mutexattr_t attr;
-
-    status = pthread_mutexattr_init(&attr);
-    print_pthread_error(status);
-    SkASSERT(0 == status);
-
-    status = pthread_mutex_init((pthread_mutex_t*)fStorage, &attr);
-    print_pthread_error(status);
-    SkASSERT(0 == status);
-}
-
-SkMutex::~SkMutex() {
-    int status = pthread_mutex_destroy((pthread_mutex_t*)fStorage);
-#if 0
-    // only report errors on non-global mutexes
-    if (!fIsGlobal) {
-        print_pthread_error(status);
-        SkASSERT(0 == status);
-    }
-#endif
-}
-
-void SkMutex::acquire() {
-    int status = pthread_mutex_lock((pthread_mutex_t*)fStorage);
-    print_pthread_error(status);
-    SkASSERT(0 == status);
-}
-
-void SkMutex::release() {
-    int status = pthread_mutex_unlock((pthread_mutex_t*)fStorage);
-    print_pthread_error(status);
-    SkASSERT(0 == status);
-}
-
-#endif // !SK_USE_POSIX_THREADS
diff --git a/src/ports/SkThread_win.cpp b/src/ports/SkThread_win.cpp
deleted file mode 100644
index 708db24..0000000
--- a/src/ports/SkThread_win.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright 2008 The Android Open Source Project
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include <windows.h>
-#include <intrin.h>
-#include "SkThread.h"
-
-//MSDN says in order to declare an interlocked function for use as an
-//intrinsic, include intrin.h and put the function in a #pragma intrinsic
-//directive.
-//The pragma appears to be unnecessary, but doesn't hurt.
-#pragma intrinsic(_InterlockedIncrement, _InterlockedExchangeAdd, _InterlockedDecrement)
-#pragma intrinsic(_InterlockedCompareExchange)
-
-int32_t sk_atomic_inc(int32_t* addr) {
-    // InterlockedIncrement returns the new value, we want to return the old.
-    return _InterlockedIncrement(reinterpret_cast<LONG*>(addr)) - 1;
-}
-
-int32_t sk_atomic_add(int32_t* addr, int32_t inc) {
-    return _InterlockedExchangeAdd(reinterpret_cast<LONG*>(addr),
-                                   static_cast<LONG>(inc));
-}
-
-int32_t sk_atomic_dec(int32_t* addr) {
-    return _InterlockedDecrement(reinterpret_cast<LONG*>(addr)) + 1;
-}
-void sk_membar_aquire__after_atomic_dec() { }
-
-int32_t sk_atomic_conditional_inc(int32_t* addr) {
-    while (true) {
-        LONG value = static_cast<int32_t const volatile&>(*addr);
-        if (value == 0) {
-            return 0;
-        }
-        if (_InterlockedCompareExchange(reinterpret_cast<LONG*>(addr),
-                                        value + 1,
-                                        value) == value) {
-            return value;
-        }
-    }
-}
-void sk_membar_aquire__after_atomic_conditional_inc() { }
-
-SkMutex::SkMutex() {
-    SK_COMPILE_ASSERT(sizeof(fStorage) > sizeof(CRITICAL_SECTION),
-                      NotEnoughSizeForCriticalSection);
-    InitializeCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&fStorage));
-}
-
-SkMutex::~SkMutex() {
-    DeleteCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&fStorage));
-}
-
-void SkMutex::acquire() {
-    EnterCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&fStorage));
-}
-
-void SkMutex::release() {
-    LeaveCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&fStorage));
-}
