Skia Compute core files

Bug: skia:
Change-Id: I4bba49cf20eff013e581800a3f114c85acd8498c
Reviewed-on: https://skia-review.googlesource.com/135782
Reviewed-by: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/compute/skc/suballocator.c b/src/compute/skc/suballocator.c
new file mode 100644
index 0000000..382e818
--- /dev/null
+++ b/src/compute/skc/suballocator.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ *
+ */
+
+//
+//
+//
+
+#include <assert.h>
+#include <memory.h>
+
+#include "runtime_cl_12.h"
+#include "scheduler.h"
+
+//
+//
+//
+
+#ifndef NDEBUG
+
+#include <stdio.h>
+
+#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)         \
+  fprintf(stderr,                                                       \
+          "suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n",      \
+          suballocator->name,                                           \
+          suballocator->rem.avail,                                      \
+          (skc_uint)ss,                                                 \
+          subbuf_id,                                                    \
+          (skc_uint)suballocator->total);
+
+#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)          \
+  fprintf(stderr,                                                       \
+          "suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n",      \
+          suballocator->name,                                           \
+          suballocator->rem.avail,                                      \
+          (skc_uint)ss,                                                 \
+          subbuf_id,                                                    \
+          (skc_uint)suballocator->total);
+
+#else
+
+#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)
+#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)
+
+#endif
+
+//
+//
+//
+
+void
+skc_suballocator_create(struct skc_runtime      * const runtime,
+                        struct skc_suballocator * const suballocator,
+                        char              const * const name,
+                        skc_uint                  const subbufs,
+                        size_t                    const align,
+                        size_t                    const size)
+{
+  size_t const subbufs_size = sizeof(*suballocator->subbufs) * subbufs;
+
+  // allocate array of subbuf records
+  suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size);
+
+  // zero subbufs
+  memset(suballocator->subbufs,0,subbufs_size);
+
+  // initialize starting subbuf
+  suballocator->subbufs[0].size = (skc_subbuf_size_t)size;
+
+  // allocate array of ids
+  suballocator->ids = skc_runtime_host_perm_alloc(runtime,
+                                                  SKC_MEM_FLAGS_READ_WRITE,
+                                                  sizeof(*suballocator->ids) * subbufs);
+  for (skc_uint ii=0; ii<subbufs; ii++)
+    suballocator->ids[ii] = ii;
+
+  suballocator->rem.avail = 1;
+  suballocator->rem.spare = subbufs - 1;
+
+  suballocator->align     = (skc_uint)align;
+  suballocator->count     = subbufs;
+
+  suballocator->size      = (skc_subbuf_size_t)size;
+  suballocator->total     = 0;
+
+  suballocator->name      = name;
+}
+
+void
+skc_suballocator_dispose(struct skc_runtime      * const runtime,
+                         struct skc_suballocator * const suballocator)
+{
+  skc_runtime_host_perm_free(runtime,suballocator->ids);
+  skc_runtime_host_perm_free(runtime,suballocator->subbufs);
+}
+
+
+//
+// Sets id and returns origin
+//
+
+size_t
+skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,
+                              struct skc_scheduler    * const scheduler,
+                              size_t                    const size,
+                              skc_subbuf_id_t         * const subbuf_id,
+                              size_t                  * const subbuf_size)
+{
+  //
+  // Note that we can't deadlock here because everything allocated is
+  // expected to be freed within msecs.  Worst case, we wait for a
+  // availability of resources while a fully utilized GPU is making
+  // forward progress on kernels.
+  //
+  // This behavior should guide the sizing of the suballocator's
+  // number of subbuffers and extent.
+  //
+  // We want to allocate a large enough extent and enough subbuffer
+  // records so that the CPU/GPU is never starved.
+  //
+
+  // round up the size
+  skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align);
+
+  // save it
+  if (subbuf_size != NULL)
+    *subbuf_size = size_ru;
+
+  //
+  // We precheck to see there is at least one region of memory
+  // available but do not check to see if there is a spare. Instead,
+  // we simply keep looking for an exact fit.
+  //
+  skc_subbuf_id_t * const ids = suballocator->ids;
+
+  while (true)
+    {
+      skc_uint avail_rem = suballocator->rem.avail;
+      skc_uint spare_rem = suballocator->rem.spare;
+
+      for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++)
+        {
+          skc_subbuf_id_t     const avail_id = ids[avail_idx];
+          struct skc_subbuf * const avail    = suballocator->subbufs + avail_id;
+
+          assert(avail->inuse == 0);
+
+          if (avail->size == size_ru) // size matches exactly
+            {
+              suballocator->total += size_ru;
+
+              // return this id
+              *subbuf_id = avail_id;
+
+              SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru);
+
+              // mark the subbuffer as in use
+              avail->inuse += 1;
+
+              assert(avail->inuse == 1);
+
+              // update rem avail count
+              suballocator->rem.avail = --avail_rem;
+
+              // replace now inuse id with last avail id
+              if ((avail_rem > 0) && (avail_idx != avail_rem))
+                {
+                  skc_subbuf_id_t     const last_id = ids[avail_rem];
+                  struct skc_subbuf * const last    = suballocator->subbufs + last_id;
+
+                  ids[avail_idx] = last_id;   // move id
+                  last->idx      = avail_idx; // update idx[]
+                }
+
+              assert(suballocator->rem.avail > 0);
+              
+              // return origin
+              return avail->origin;
+            }
+          else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it
+            {
+              suballocator->total += size_ru;
+
+              skc_uint                  spare_idx = suballocator->count - spare_rem;
+              skc_subbuf_id_t     const spare_id  = ids[spare_idx];
+              struct skc_subbuf * const spare     = suballocator->subbufs + spare_id;
+              
+              assert(spare->inuse == 0);
+
+              // simple -- we're popping the top-of-stack of spares
+              suballocator->rem.spare -= 1;
+
+              // return id
+              *subbuf_id = spare_id;
+
+              SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru);
+              
+              // get prev
+              struct skc_subbuf * const prev = avail->prev;
+
+              if (prev != NULL)
+                prev->next = spare;
+
+              // init spare
+              spare->prev    = prev;
+              spare->next    = avail;
+              spare->size    = size_ru;
+              spare->origin  = avail->origin;
+              spare->idx     = SKC_UINT_MAX; // defensive
+              spare->inuse  += 1;
+
+              // update curr
+              avail->prev    = spare;
+              avail->size   -= size_ru;
+              avail->origin += size_ru;
+
+              assert(suballocator->rem.avail > 0);
+
+              return spare->origin;
+            }
+        }
+
+      // uh oh... couldn't find enough memory
+      skc_scheduler_wait(scheduler);
+    }
+}
+
+//
+// FIXME -- simplify this with a merge-with-prev() primitive
+//
+
+void
+skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,
+                             skc_subbuf_id_t                 subbuf_id)
+{
+  // get subbuf for id
+  struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id;
+
+  assert(subbuf->inuse == 1);
+
+  suballocator->total -= subbuf->size;
+
+  SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size);
+
+  //
+  // try to merge subbuf with left and maybe right and then dispose
+  //
+  struct skc_subbuf * prev;
+  struct skc_subbuf * next;
+
+  if (((prev = subbuf->prev) != NULL) && !prev->inuse)
+    {
+      next = subbuf->next;
+
+      if ((next != NULL) && !next->inuse)
+        {
+          subbuf->inuse -= 1;
+
+          assert(next->inuse == 0);
+
+          // increment size
+          prev->size += (subbuf->size + next->size);
+
+          struct skc_subbuf * const nextnext = next->next;
+
+          // update next link
+          prev->next = nextnext;
+
+          // update prev link
+          if (nextnext != NULL)
+            nextnext->prev = prev;
+
+          //
+          // both subbuf and next are now spare which means we need to
+          // move final available subbuffer into next's old position
+          // unless they're the same
+          //
+          skc_uint const last_idx = --suballocator->rem.avail;
+          skc_uint const next_idx = next->idx;
+
+          assert(suballocator->rem.avail > 0);
+
+          if (last_idx != next_idx)
+            {
+              skc_subbuf_id_t     const last_id = suballocator->ids[last_idx];
+              struct skc_subbuf * const last    = suballocator->subbufs + last_id;
+
+              suballocator->ids[next_idx]       = last_id;
+              last->idx                         = next_idx;
+            }
+
+          skc_subbuf_id_t  const next_id   = (skc_subbuf_id_t)(next - suballocator->subbufs);
+
+          skc_uint         const spare_rem = suballocator->rem.spare + 2;
+          skc_uint         const spare_idx = suballocator->count - spare_rem;
+
+          suballocator->rem.spare          = spare_rem;
+          suballocator->ids[spare_idx + 0] = subbuf_id;
+          suballocator->ids[spare_idx + 1] = next_id;
+        }
+      else
+        {
+          prev->size += subbuf->size;
+          prev->next  = next;
+
+          if (next != NULL)
+            next->prev = prev;
+
+          subbuf->inuse -= 1;
+
+          assert(subbuf->inuse == 0);
+          assert(suballocator->rem.avail > 0);
+
+          suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
+        }
+    }
+  //
+  // try to merge with right
+  //
+  else if (((next = subbuf->next) != NULL) && !next->inuse)
+    {
+      subbuf->inuse -= 1;
+
+      assert(subbuf->inuse == 0);
+      assert(suballocator->rem.avail > 0);
+
+      next->prev     = prev;
+      next->origin   = subbuf->origin;
+      next->size    += subbuf->size;
+
+      if (prev != NULL)
+        prev->next = next;
+
+      // subbuf is now spare
+      suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
+    }
+  else // couldn't merge with a neighbor
+    {
+      skc_uint avail_idx = suballocator->rem.avail++;
+
+      // subbuf is now available
+      subbuf->idx    = avail_idx;
+      subbuf->inuse -= 1;
+
+      assert(subbuf->inuse == 0);
+      assert(suballocator->rem.avail > 0);
+
+      suballocator->ids[avail_idx] = subbuf_id;
+    }
+}
+
+//
+//
+//
+
+#if 0
+
+//
+// At some point there might be a reason to sort the available
+// subbuffers into some useful order -- presumably to binary search
+// for the closest match or to chip away at the largest available
+// subbuffer
+//
+
+static
+void
+skc_suballocator_optimize(struct skc_suballocator * const suballocator)
+{
+  ;
+}
+
+#endif
+
+//
+//
+//