Merge pull request #1274 from ctiller/inline-slice-buffer-for-realsy

Inline initial slice buffer allocation
diff --git a/include/grpc/support/slice_buffer.h b/include/grpc/support/slice_buffer.h
index c7e5dbc..1545dbf 100644
--- a/include/grpc/support/slice_buffer.h
+++ b/include/grpc/support/slice_buffer.h
@@ -40,6 +40,8 @@
 extern "C" {
 #endif
 
+#define GRPC_SLICE_BUFFER_INLINE_ELEMENTS 8
+
 /* Represents an expandable array of slices, to be interpreted as a single item
    TODO(ctiller): inline some small number of elements into the struct, to
                   avoid per-call allocations */
@@ -52,6 +54,8 @@
   size_t capacity;
   /* the combined length of all slices in the array */
   size_t length;
+  /* inlined elements to avoid allocations */
+  gpr_slice inlined[GRPC_SLICE_BUFFER_INLINE_ELEMENTS];
 } gpr_slice_buffer;
 
 /* initialize a slice buffer */
@@ -78,9 +82,11 @@
 void gpr_slice_buffer_pop(gpr_slice_buffer *sb);
 /* clear a slice buffer, unref all elements */
 void gpr_slice_buffer_reset_and_unref(gpr_slice_buffer *sb);
+/* swap the contents of two slice buffers */
+void gpr_slice_buffer_swap(gpr_slice_buffer *a, gpr_slice_buffer *b);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  /* GRPC_SUPPORT_SLICE_BUFFER_H */
+#endif /* GRPC_SUPPORT_SLICE_BUFFER_H */
diff --git a/src/core/support/slice_buffer.c b/src/core/support/slice_buffer.c
index b280e4b..3b1daa0 100644
--- a/src/core/support/slice_buffer.c
+++ b/src/core/support/slice_buffer.c
@@ -38,21 +38,34 @@
 #include <grpc/support/alloc.h>
 #include <grpc/support/log.h>
 
-/* initial allocation size (# of slices) */
-#define INITIAL_CAPACITY 4
-/* grow a buffer; requires INITIAL_CAPACITY > 1 */
+/* grow a buffer; requires GRPC_SLICE_BUFFER_INLINE_ELEMENTS > 1 */
 #define GROW(x) (3 * (x) / 2)
 
+static void maybe_embiggen(gpr_slice_buffer *sb) {
+  if (sb->count == sb->capacity) {
+    sb->capacity = GROW(sb->capacity);
+    GPR_ASSERT(sb->capacity > sb->count);
+    if (sb->slices == sb->inlined) {
+      sb->slices = gpr_malloc(sb->capacity * sizeof(gpr_slice));
+      memcpy(sb->slices, sb->inlined, sb->count * sizeof(gpr_slice));
+    } else {
+      sb->slices = gpr_realloc(sb->slices, sb->capacity * sizeof(gpr_slice));
+    }
+  }
+}
+
 void gpr_slice_buffer_init(gpr_slice_buffer *sb) {
   sb->count = 0;
   sb->length = 0;
-  sb->capacity = INITIAL_CAPACITY;
-  sb->slices = gpr_malloc(sizeof(gpr_slice) * INITIAL_CAPACITY);
+  sb->capacity = GRPC_SLICE_BUFFER_INLINE_ELEMENTS;
+  sb->slices = sb->inlined;
 }
 
 void gpr_slice_buffer_destroy(gpr_slice_buffer *sb) {
   gpr_slice_buffer_reset_and_unref(sb);
-  gpr_free(sb->slices);
+  if (sb->slices != sb->inlined) {
+    gpr_free(sb->slices);
+  }
 }
 
 gpr_uint8 *gpr_slice_buffer_tiny_add(gpr_slice_buffer *sb, unsigned n) {
@@ -71,11 +84,7 @@
   return out;
 
 add_new:
-  if (sb->count == sb->capacity) {
-    sb->capacity = GROW(sb->capacity);
-    GPR_ASSERT(sb->capacity > sb->count);
-    sb->slices = gpr_realloc(sb->slices, sb->capacity * sizeof(gpr_slice));
-  }
+  maybe_embiggen(sb);
   back = &sb->slices[sb->count];
   sb->count++;
   back->refcount = NULL;
@@ -85,11 +94,7 @@
 
 size_t gpr_slice_buffer_add_indexed(gpr_slice_buffer *sb, gpr_slice s) {
   size_t out = sb->count;
-  if (out == sb->capacity) {
-    sb->capacity = GROW(sb->capacity);
-    GPR_ASSERT(sb->capacity > sb->count);
-    sb->slices = gpr_realloc(sb->slices, sb->capacity * sizeof(gpr_slice));
-  }
+  maybe_embiggen(sb);
   sb->slices[out] = s;
   sb->length += GPR_SLICE_LENGTH(s);
   sb->count = out + 1;
@@ -116,12 +121,7 @@
         memcpy(back->data.inlined.bytes + back->data.inlined.length,
                s.data.inlined.bytes, cp1);
         back->data.inlined.length = GPR_SLICE_INLINED_SIZE;
-        if (n == sb->capacity) {
-          sb->capacity = GROW(sb->capacity);
-          GPR_ASSERT(sb->capacity > sb->count);
-          sb->slices =
-              gpr_realloc(sb->slices, sb->capacity * sizeof(gpr_slice));
-        }
+        maybe_embiggen(sb);
         back = &sb->slices[n];
         sb->count = n + 1;
         back->refcount = NULL;
@@ -160,3 +160,16 @@
   sb->count = 0;
   sb->length = 0;
 }
+
+void gpr_slice_buffer_swap(gpr_slice_buffer *a, gpr_slice_buffer *b) {
+  gpr_slice_buffer temp = *a;
+  *a = *b;
+  *b = temp;
+
+  if (a->slices == b->inlined) {
+    a->slices = a->inlined;
+  }
+  if (b->slices == a->inlined) {
+    b->slices = b->inlined;
+  }
+}
diff --git a/src/core/transport/chttp2_transport.c b/src/core/transport/chttp2_transport.c
index 4c0394d..110a4b5 100644
--- a/src/core/transport/chttp2_transport.c
+++ b/src/core/transport/chttp2_transport.c
@@ -834,13 +834,10 @@
 
 static int prepare_write(transport *t) {
   stream *s;
-  gpr_slice_buffer tempbuf;
   gpr_uint32 window_delta;
 
   /* simple writes are queued to qbuf, and flushed here */
-  tempbuf = t->qbuf;
-  t->qbuf = t->outbuf;
-  t->outbuf = tempbuf;
+  gpr_slice_buffer_swap(&t->qbuf, &t->outbuf);
   GPR_ASSERT(t->qbuf.count == 0);
 
   if (t->dirtied_local_settings && !t->sent_local_settings) {