speedup dynamicwstream

- move bytesWritten calculation to query the tail, allowing write() to be faster since it doesn't have to update anything extra per-write.
- enforce that all blocks are multiple-of-4 bytes big
- update the minimum block size to 4K

Before: 30ms
After:  23ms for non-4-bytes writes
        13ms for 4-bytes writes

BUG=skia:

Change-Id: Id06ecad3b9fe426747e02accf1393595e3356ce3
Reviewed-on: https://skia-review.googlesource.com/6087
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Mike Reed <reed@google.com>
diff --git a/bench/StreamBench.cpp b/bench/StreamBench.cpp
index 0650a99..e89b207 100644
--- a/bench/StreamBench.cpp
+++ b/bench/StreamBench.cpp
@@ -9,10 +9,11 @@
 #include "SkStream.h"
 
 class StreamBench : public Benchmark {
-    SkString fName;
+    SkString    fName;
+    const bool  fTestWrite4;
 public:
-    StreamBench()  {
-        fName.printf("wstream");
+    StreamBench(bool testWrite4) : fTestWrite4(testWrite4) {
+        fName.printf("wstream_%d", testWrite4);
     }
 
     bool isSuitableFor(Backend backend) override {
@@ -23,11 +24,18 @@
     const char* onGetName() override { return fName.c_str(); }
 
     void onDraw(int loops, SkCanvas* canvas) override {
+        const char t3[] = { 1, 2, 3 };
+        const char t5[] = { 1, 2, 3, 4, 5 };
         for (int i = 0; i < loops*100; ++i) {
             SkDynamicMemoryWStream stream;
-            for (int j = 0; j < 100000; ++j) {
-                stream.write32(j);
-                stream.write32(j+j);
+            for (int j = 0; j < 10000; ++j) {
+                if (fTestWrite4) {
+                    stream.write32(j);
+                    stream.write32(j+j);
+                } else {
+                    stream.write(t3, 3);
+                    stream.write(t5, 5);
+                }
             }
         }
     }
@@ -38,4 +46,5 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-DEF_BENCH(return new StreamBench;)
+DEF_BENCH(return new StreamBench(false);)
+DEF_BENCH(return new StreamBench(true);)