small-object optimization for SkFunction

Anything <= sizeof(void*) will be inlined, avoiding heap allocation.

BUG=skia:

Review URL: https://codereview.chromium.org/1048243002
diff --git a/src/core/SkFunction.h b/src/core/SkFunction.h
index 9ec421e..3009593 100644
--- a/src/core/SkFunction.h
+++ b/src/core/SkFunction.h
@@ -11,6 +11,7 @@
 // TODO: document
 
 #include "SkTypes.h"
+#include "SkTLogic.h"
 
 template <typename> class SkFunction;
 
@@ -23,31 +24,41 @@
     }
 
     template <typename Fn>
-    explicit SkFunction(Fn fn) : fVTable(GetVTable<Fn>()) {
-        // We've got a functor.  The basic thing we can always do is copy it onto the heap.
+    explicit SkFunction(Fn fn, SK_WHEN_C((sizeof(Fn) > sizeof(void*)), void*) = nullptr)
+            : fVTable(GetOutlineVTable<Fn>()) {
+        // We've got a functor larger than a pointer.  We've go to copy it onto the heap.
         fFunction = SkNEW_ARGS(Fn, (fn));
     }
 
-    ~SkFunction() { fVTable.fDelete(fFunction); }
+    template <typename Fn>
+    explicit SkFunction(Fn fn, SK_WHEN_C((sizeof(Fn) <= sizeof(void*)), void*) = nullptr)
+            : fVTable(GetInlineVTable<Fn>()) {
+        // We've got a functor that fits in a pointer.  We copy it right inline.
+        SkNEW_PLACEMENT_ARGS(&fFunction, Fn, (fn));
+    }
+
+    ~SkFunction() { fVTable.fCleanUp(fFunction); }
 
     R operator()(Args... args) { return fVTable.fCall(fFunction, args...); }
 
 private:
     struct VTable {
         R (*fCall)(void*, Args...);
-        void (*fDelete)(void*);
+        void (*fCleanUp)(void*);
     };
 
+    // Used when fFunction is a function pointer of type R(*)(Args...).
     static const VTable& GetFunctionPointerVTable() {
         static const VTable vtable = {
             [](void* fn, Args... args) { return reinterpret_cast<R(*)(Args...)>(fn)(args...); },
-            [](void*) { /* Don't delete function pointers. */ },
+            [](void*) { /* Nothing to clean up for function pointers. */ }
         };
         return vtable;
     }
 
+    // Used when fFunction is a pointer to a functor of type Fn on the heap (we own it).
     template <typename Fn>
-    static const VTable& GetVTable() {
+    static const VTable& GetOutlineVTable() {
         static const VTable vtable = {
             [](void* fn, Args... args) { return (*static_cast<Fn*>(fn))(args...); },
             [](void* fn) { SkDELETE(static_cast<Fn*>(fn)); },
@@ -55,7 +66,25 @@
         return vtable;
     }
 
-    void* fFunction;        // Either a function pointer, or a pointer to a functor.
+    // Used when fFunction _is_ a functor of type Fn, not a pointer to the functor.
+    template <typename Fn>
+    static const VTable& GetInlineVTable() {
+        static const VTable vtable = {
+            [](void* fn, Args... args) {
+                union { void* p; Fn f; } pun = { fn };
+                return pun.f(args...);
+            },
+            [](void* fn) {
+                union { void* p; Fn f; } pun = { fn };
+                pun.f.~Fn();
+                (void)(pun.f);   // Otherwise, when ~Fn() is trivial, MSVC complains pun is unused.
+            }
+        };
+        return vtable;
+    }
+
+
+    void* fFunction;        // A function pointer, a pointer to a functor, or an inlined functor.
     const VTable& fVTable;  // How to call, delete (and one day copy, move) fFunction.
 };
 
@@ -65,6 +94,5 @@
 //   - make SkFunction copyable
 //   - emulate std::forward for moveable functors (e.g. lambdas)
 //   - forward args too?
-//   - implement small-object optimization to store functors inline
 
 #endif//SkFunction_DEFINED
diff --git a/src/utils/SkTLogic.h b/src/utils/SkTLogic.h
index 2b5df0b..d188242 100644
--- a/src/utils/SkTLogic.h
+++ b/src/utils/SkTLogic.h
@@ -82,6 +82,7 @@
  *  SK_WHEN(!SkTrue, int) f(void* ptr) { return 2; }
  */
 #define SK_WHEN(cond_prefix, T) typename SkTEnableIf_c<cond_prefix::value, T>::type
+#define SK_WHEN_C(cond, T) typename SkTEnableIf_c<cond, T>::type
 
 // See http://en.wikibooks.org/wiki/More_C++_Idioms/Member_Detector
 #define SK_CREATE_MEMBER_DETECTOR(member)                                           \
diff --git a/tests/FunctionTest.cpp b/tests/FunctionTest.cpp
index 5611b80..6fffcbd 100644
--- a/tests/FunctionTest.cpp
+++ b/tests/FunctionTest.cpp
@@ -9,6 +9,7 @@
 #include "Test.h"
 
 static void test_add_five(skiatest::Reporter* r, SkFunction<int(int)>&& f) {
+    REPORTER_ASSERT(r, f(3) == 8);
     REPORTER_ASSERT(r, f(4) == 9);
 }
 
@@ -24,4 +25,9 @@
     test_add_five(r, SkFunction<int(int)>(&add_five));
     test_add_five(r, SkFunction<int(int)>(AddFive()));
     test_add_five(r, SkFunction<int(int)>([](int x) { return x + 5; }));
+
+    // AddFive and the lambda above are both small enough to test small-object optimization.
+    // Now test a lambda that's much too large for the small-object optimization.
+    int a = 1, b = 1, c = 1, d = 1, e = 1;
+    test_add_five(r, SkFunction<int(int)>([&](int x) { return x + a + b + c + d + e; }));
 }